In [1]:
import sys
import urllib.request
import datetime
import time
import json
import os
from dotenv import load_dotenv

load_dotenv()
client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

#[CODE 1]
def getRequestUrl(url):
    req = urllib.request.Request(url)
    req.add_header("X-Naver-Client-Id", client_id)
    req.add_header("X-Naver-Client-Secret", client_secret)

    try:
        response = urllib.request.urlopen(req)
        if response.getcode() == 200:
            print("[%s] Url Request Success" % datetime.datetime.now())
            return response.read().decode('utf-8')

    except Exception as e:
        print(e)
        print("[%s] Error for URL : %s" % (datetime.datetime.now(), url))
        return None

In [2]:
#[CODE 2]
def getNaverSearch(node, srcText, start, display):
    base = "https://openapi.naver.com/v1/search"
    node = "/%s.json" % node
    parameters = "?query=%s&start=%s&display=%s" %(urllib.parse.quote(srcText), start, display)
    url = base + node + parameters
    responseDecode = getRequestUrl(url) #[CODE 1]
    if (responseDecode == None):
        return None
    else:
        return json.loads(responseDecode)


In [3]:
#[CODE 3]
def getPostData(post, jsonResult, cnt):
    title = post['title']
    description = post['description']
    org_link = post['bloggerlink']
    link = post['link']
    pDate = datetime.datetime.strptime(post['postdate'], '%Y%m%d')
    pDate = pDate.strftime('%Y-%m-%d')
    jsonResult.append({'cnt':cnt, 'title':title, 'description': description,
    'org_link':org_link, 'link': link, 'pDate':pDate})
    return

In [4]:
#[CODE 0]
def main():
    node = 'blog' #크롤링할 대상
    srcText = input('검색어를 입력하세요: ')
    cnt = 0
    jsonResult = []
    jsonResponse = getNaverSearch(node, srcText, 1, 100) #[CODE 2]
    total = jsonResponse['total']
    while ((jsonResponse != None) and (jsonResponse['display'] != 0)):
        for post in jsonResponse['items']:
            cnt += 1
            getPostData(post, jsonResult, cnt) #[CODE 3]
        start = jsonResponse['start'] + jsonResponse['display']
        jsonResponse = getNaverSearch(node, srcText, start, 100) #[CODE 2]
    print('전체 검색 : %d 건' %total)
    with open('%s_naver_%s.json' % (srcText, node), 'w', encoding='utf8') as outfile:
        jsonFile = json.dumps(jsonResult, indent = 4, sort_keys = True, ensure_ascii = False)
        outfile.write(jsonFile)
    print("가져온 데이터 : %d 건" %(cnt))
    print('%s_naver_%s.json SAVED' % (srcText, node))

if __name__ == '__main__':
    main()

[2025-02-19 17:02:30.938539] Url Request Success
[2025-02-19 17:02:31.221440] Url Request Success
[2025-02-19 17:02:31.519144] Url Request Success
[2025-02-19 17:02:31.797407] Url Request Success
[2025-02-19 17:02:32.118736] Url Request Success
[2025-02-19 17:02:32.419728] Url Request Success
[2025-02-19 17:02:32.754763] Url Request Success
[2025-02-19 17:02:33.062904] Url Request Success
[2025-02-19 17:02:33.370402] Url Request Success
[2025-02-19 17:02:33.704192] Url Request Success
HTTP Error 400: Bad Request
[2025-02-19 17:02:33.810245] Error for URL : https://openapi.naver.com/v1/search/blog.json?query=%EC%A7%91%EA%B0%80%EA%B3%A0%EC%8B%B6%EB%8B%A4&start=1001&display=100
전체 검색 : 1145504 건
가져온 데이터 : 1000 건
집가고싶다_naver_blog.json SAVED
