# 네이버 뉴스 검색 api

공식 문서를 참고해보면, datalab의 경우는, 비로그인 방식 오픈 API를 쓴다.  
header에 
- client_id
- cliend_secret을 전송해서 사용한다.

## 검색 예시

In [9]:
# 검색 조건 주는 body
body = "{\"startDate\":\"2017-01-01\",\"endDate\":\"2017-04-30\",\"timeUnit\":\"month\",\"keywordGroups\":[{\"groupName\":\"한글\",\"keywords\":[\"한글\",\"korean\"]},{\"groupName\":\"영어\",\"keywords\":[\"영어\",\"english\"]}],\"device\":\"pc\",\"ages\":[\"1\",\"2\"],\"gender\":\"f\"}"

조금 다른 방식으로 한번 request를 해보려고 한다.

In [18]:
import os
import sys
import urllib.request

url = "https://openapi.naver.com/v1/datalab/search"

request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
request.add_header("Content-Type","application/json")

response = urllib.request.urlopen(request, data = body.encode("utf-8"))

In [20]:
response.read().decode('utf_8')

'{"startDate":"2017-01-01","endDate":"2017-04-30","timeUnit":"month","results":[{"title":"한글","keywords":["한글","korean"],"data":[{"period":"2017-01-01","ratio":47.00101},{"period":"2017-02-01","ratio":53.23619},{"period":"2017-03-01","ratio":100},{"period":"2017-04-01","ratio":85.327}]},{"title":"영어","keywords":["영어","english"],"data":[{"period":"2017-01-01","ratio":40.0881},{"period":"2017-02-01","ratio":36.69942},{"period":"2017-03-01","ratio":52.11792},{"period":"2017-04-01","ratio":44.4595}]}]}'

무난하게 데이터가 들어오고 있지만, 조금 더 정리해보려고 하다.

## 원하는 데이터를 바탕으로, body 수정

In [26]:
body = "{\"startDate\":\"2019-01-01\",\"endDate\":\"2022-02-28\",\"timeUnit\":\"month\",\"keywordGroups\":[{\"groupName\":\"코로나\",\"keywords\":[\"코로나\"]}]}"

In [31]:
url = "https://openapi.naver.com/v1/datalab/search"

request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
request.add_header("Content-Type","application/json")

response = urllib.request.urlopen(request, data = body.encode("utf-8"))

In [32]:
result = response.read().decode('utf_8')

In [33]:
result

'{"startDate":"2019-01-01","endDate":"2022-02-28","timeUnit":"month","results":[{"title":"코로나","keywords":["코로나"],"data":[{"period":"2019-01-01","ratio":0.02654},{"period":"2019-02-01","ratio":0.02146},{"period":"2019-03-01","ratio":0.02302},{"period":"2019-04-01","ratio":0.02953},{"period":"2019-05-01","ratio":0.02615},{"period":"2019-06-01","ratio":0.03197},{"period":"2019-07-01","ratio":0.03305},{"period":"2019-08-01","ratio":0.03023},{"period":"2019-09-01","ratio":0.03079},{"period":"2019-10-01","ratio":0.02642},{"period":"2019-11-01","ratio":0.02381},{"period":"2019-12-01","ratio":0.02353},{"period":"2020-01-01","ratio":2.20883},{"period":"2020-02-01","ratio":84.25142},{"period":"2020-03-01","ratio":100},{"period":"2020-04-01","ratio":40.21097},{"period":"2020-05-01","ratio":35.67645},{"period":"2020-06-01","ratio":21.38294},{"period":"2020-07-01","ratio":15.14822},{"period":"2020-08-01","ratio":52.73999},{"period":"2020-09-01","ratio":35.57399},{"period":"2020-10-01","ratio":23.4

return해주는 값은 ration의 의미로. 구간별로 검색량의 상대적 비율을 보여준다.
이를 dataframe으로 정리하거나, 시각화로 정리하려고 한다.

In [35]:
# json으로 다루어서 편하게 보려고 한다.
import json 

result_json = json.loads(result)

In [36]:
result_json

{'startDate': '2019-01-01',
 'endDate': '2022-02-28',
 'timeUnit': 'month',
 'results': [{'title': '코로나',
   'keywords': ['코로나'],
   'data': [{'period': '2019-01-01', 'ratio': 0.02654},
    {'period': '2019-02-01', 'ratio': 0.02146},
    {'period': '2019-03-01', 'ratio': 0.02302},
    {'period': '2019-04-01', 'ratio': 0.02953},
    {'period': '2019-05-01', 'ratio': 0.02615},
    {'period': '2019-06-01', 'ratio': 0.03197},
    {'period': '2019-07-01', 'ratio': 0.03305},
    {'period': '2019-08-01', 'ratio': 0.03023},
    {'period': '2019-09-01', 'ratio': 0.03079},
    {'period': '2019-10-01', 'ratio': 0.02642},
    {'period': '2019-11-01', 'ratio': 0.02381},
    {'period': '2019-12-01', 'ratio': 0.02353},
    {'period': '2020-01-01', 'ratio': 2.20883},
    {'period': '2020-02-01', 'ratio': 84.25142},
    {'period': '2020-03-01', 'ratio': 100},
    {'period': '2020-04-01', 'ratio': 40.21097},
    {'period': '2020-05-01', 'ratio': 35.67645},
    {'period': '2020-06-01', 'ratio': 21.38294}

json_normalize를 활용하면 다음과 같이 만들 수 있다.

In [43]:
import pandas as pd
pd.json_normalize(result_json['results'][0]['data'])

Unnamed: 0,period,ratio
0,2019-01-01,0.02654
1,2019-02-01,0.02146
2,2019-03-01,0.02302
3,2019-04-01,0.02953
4,2019-05-01,0.02615
5,2019-06-01,0.03197
6,2019-07-01,0.03305
7,2019-08-01,0.03023
8,2019-09-01,0.03079
9,2019-10-01,0.02642
