In [1]:
import requests
from bs4 import BeautifulSoup
import xmltodict
import json
import pandas as pd

In [2]:
def parse(item):
    try:
        applicantName = item.find('applicantName').get_text()
        applicationDate = item.find('applicationDate').get_text()
        applicationNumber = item.find('applicationNumber').get_text()
        astrtCont = item.find('astrtCont').get_text()
        bigDrawing = item.find('bigDrawing').get_text()
        drawing = item.find('drawing').get_text()
        indexNo = item.find('indexNo').get_text()
        inventionTitle = item.find('inventionTitle').get_text()
        ipcNumber = item.find('ipcNumber').get_text()
        openDate = item.find('openDate').get_text()
        openNumber = item.find('openNumber').get_text()
        publicationDate = item.find('publicationDate').get_text()
        publicationNumber = item.find('publicationNumber').get_text()
        registerDate = item.find('registerDate').get_text()
        registerNumber = item.find('registerNumber').get_text()
        registerStatus = item.find('registerStatus').get_text()
        return {
            '일련번호':indexNo,
            '등록상태':registerStatus,
            '발명의명칭(한글)':inventionTitle,
            'IPC번호':ipcNumber,
            '등록번호':registerNumber,
            '등록일자':registerDate,
            '출원번호':applicationNumber,
            '출원일자':applicationDate,
            '공개번호':openNumber,
            '공개일자':openDate,
            '공고번호':publicationNumber,
            '공고일자':publicationDate,
            '초록':astrtCont,
            '큰 이미지 경로':bigDrawing,
            '이미지 경로':drawing,
            '출원인':applicantName
        }

    except AttributeError as e:
        return {
            '일련번호':None,
            '등록상태':None,
            '발명의명칭(한글)':None,
            'IPC번호':None,
            '등록번호':None,
            '등록일자':None,
            '출원번호':None,
            '출원일자':None,
            '공개번호':None,
            '공개일자':None,
            '공고번호':None,
            '공고일자':None,
            '초록':None,
            '큰 이미지 경로':None,
            '이미지 경로':None,
            '출원인':None
        }

In [7]:
def send_api(word, year, patent, utility, pageNo, method, numOfRows=500):
    host_url = "http://plus.kipris.or.kr/kipo-api/kipi/patUtiModInfoSearchSevice/getWordSearch?word={}".format(word)
    svc_key = "ServiceKey=ulibXju3ktntAkv/h0O7zPqXi6DB==HotliWIfndqgE="
    year_url = "year={}".format(year)
    patent_url = "patent={}".format(patent)
    utility_url = "utility={}".format(utility)
    numOfRows_url = "numOfRows={}".format(numOfRows)
    pageNo_url = "pageNo={}".format(pageNo)

    if year != "":
        request_url = host_url + "&" + year_url
    if patent != "":
        request_url = request_url + "&" + patent_url
    if utility != "":
        request_url = request_url + "&" + utility_url
    if numOfRows != "":
        request_url = request_url + "&" + numOfRows_url
    if pageNo != "":
        request_url = request_url + "&" + pageNo_url

    request_url = request_url + "&" + svc_key
    print(request_url)

    try:
        if method == 'GET':
            response = requests.get(request_url)
        elif method == 'POST':
            response = requests.post(request_url)
        print("response status {}".format(response.status_code))
        # print("response text %r" % response.text)

        soup = BeautifulSoup(response.text, 'lxml-xml')
        err = soup.find('resultMsg').text
        total_cnt = soup.find('totalCount').text
        num_rows = soup.find('numOfRows').text
        # pagenum = (int(total_count) // int(num_rows)) + 1
    
        print("Total search results: {}".format(total_cnt))

        dic = xmltodict.parse(str(soup))
        json_obj = json.dumps(dic, ensure_ascii=False)
        # <count><numOfRows>500</numOfRows><pageNo>1</pageNo><totalCount>7276</totalCount></count>

        items = soup.find_all('item')
        row = []
        for item in items:
            row.append(parse(item))

        df = pd.DataFrame(row)  

        # if total_cnt > 500:
        #     for i in range(2, pagenum):


        if err == 'INVALID REQUEST PARAMETER ERROR.':
            print('Wrong Parameter!!!!!!!!!')
        else:
            return response.text, soup, df, dic, json_obj

    except Exception as ex:
        print('exception')
        print(ex)

In [8]:
### Usage
### send_api(word, year, patent, utility, numOfRows, pageNo, method)
### numOfRows는 최대 500까지 가능
resp_text, soup, df, dic, json_obj = send_api(word="유중수", year=10, patent="", utility="", numOfRows="500", pageNo="1", method="GET")
# soup, df = send_api(word="유중수", year=10, patent="", utility="", numOfRows="20", pageNo="1", method="GET")
# send_api("유중수", 2, "", "", 100, 10, "GET")
# soup, df = send_api("유중수", "GET")

http://plus.kipris.or.kr/kipo-api/kipi/patUtiModInfoSearchSevice/getWordSearch?word=유중수&year=10&numOfRows=500&pageNo=1&ServiceKey=ulibXju3ktntAkv/h0O7zPqXi6DB==HotliWIfndqgE=
response status 200
Total search results: 7283
