In [17]:
import os
import urllib.request
import datetime
import json
from dotenv import load_dotenv

load_dotenv()

client_id= os.getenv('NAVER_CLIENT_ID')
client_secret= os.getenv('NAVER_CLIENT_SECRET')

#[CODE 1]
def getRequestUrl(url):
    req = urllib.request.Request(url)
    req.add_header("X-Naver-Client-Id", client_id)
    req.add_header("X-Naver-Client-Secret", client_secret)
    try:
        response = urllib.request.urlopen(req)
        if response.getcode() == 200:
            print("[%s] Url Request Success" % datetime.datetime.now())
            return response.read().decode('utf-8')
    except Exception as e:
        print(e)
        print("[%s] Error for URL : %s" % (datetime.datetime.now(), url))
        return None

In [18]:
#[CODE 2]
def getNaverSearch(node, srcText, start, display):
    base = "https://openapi.naver.com/v1/search"
    node = "/%s.json" % node
    parameters = "?query=%s&start=%s&display=%s" % (urllib.parse.quote(srcText), start,display)
    url = base + node + parameters
    responseDecode = getRequestUrl(url) #[CODE 1]
    if (responseDecode == None):
        return None
    else:
        return json.loads(responseDecode)


In [19]:
#[CODE 3]
def getPostData(post, jsonResult, cnt):
    title = post['title']
    description = post['description']
    if 'originallink' not in post:
        org_link = post['bloggerlink']
    else:
        org_link = post['originallink']
    link = post['link']

    if 'pubDate' not in post:
        pdate = datetime.datetime.strptime(post['postdate'], '%Y%m%d')
    else:
        pdate = datetime.datetime.strptime(post['pubDate'], '%a, %d %b %Y %H:%M:%S +0900')
    pdate = pdate.strftime('%Y-%m-%d %H:%M:%S')


    jsonResult.append({'cnt':cnt, 'title':title, 'description': description, 'org_link':org_link, 'link': link, 'pdate':pdate})
    return

In [20]:
#[CODE 0]
def main():
    text = input('검색노드를 입력해주세요 (blog, news, 블로그, 뉴스): ')
    srcText = input('검색어를 입력하세요: ')
    cnt = 0
    jsonResult = []
    if text in ['blog', '블로그']:
        node = 'blog'
    elif text in ['news', '뉴스']:
        node = 'news'
    else:
        print('검색노드를 확인해주세요')
    jsonResponse = getNaverSearch(node, srcText, 1, 100) #[CODE 2]
    total = jsonResponse['total']
    while ((jsonResponse != None) and (jsonResponse['display'] != 0)):
        for post in jsonResponse['items']:
            cnt += 1
            getPostData(post, jsonResult, cnt) #[CODE 3]
        start = jsonResponse['start'] + jsonResponse['display']
        jsonResponse = getNaverSearch(node, srcText, start, 100) #[CODE 2]

    print('전체 검색 : %d 건' %total)

    with open('%s_naver_%s.json' % (srcText, node), 'w', encoding='utf8') as outfile:
        jsonFile = json.dumps(jsonResult, indent = 4, sort_keys = True, ensure_ascii = False)
        outfile.write(jsonFile)

    print("가져온 데이터 : %d 건" %(cnt))
    print('%s_naver_%s.json SAVED' % (srcText, node))

if __name__ == '__main__':
    main()

[2025-07-07 17:12:17.632050] Url Request Success
[2025-07-07 17:12:17.770536] Url Request Success
[2025-07-07 17:12:17.918923] Url Request Success
[2025-07-07 17:12:18.074754] Url Request Success
[2025-07-07 17:12:18.249672] Url Request Success
[2025-07-07 17:12:18.431408] Url Request Success
[2025-07-07 17:12:18.601528] Url Request Success
[2025-07-07 17:12:18.780509] Url Request Success
[2025-07-07 17:12:18.955556] Url Request Success
[2025-07-07 17:12:19.139960] Url Request Success
HTTP Error 400: Bad Request
[2025-07-07 17:12:19.209130] Error for URL : https://openapi.naver.com/v1/search/news.json?query=%EC%9D%B8%EA%B3%B5%EC%A7%80%EB%8A%A5%EC%82%AC%EA%B4%80%ED%95%99%EA%B5%90&start=1001&display=100
전체 검색 : 11981 건
가져온 데이터 : 1000 건
인공지능사관학교_naver_news.json SAVED
