In [1]:
from selenium import webdriver
import subprocess
import chromedriver_autoinstaller
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import chromedriver_autoinstaller
    
from selenium.webdriver.common.keys import Keys

import time
import datetime as dt
import schedule

import pandas as pd
import numpy as np
from pprint import pprint
import urllib.request
import json
import glob
import sys
import os

import warnings
warnings.filterwarnings(action='ignore')

pd.set_option('display.max_columns', 250)
pd.set_option('display.max_rows', 250)
pd.set_option('display.width', 100)

pd.options.display.float_format = '{:.2f}'.format

class NaverDataLabOpenAPI():
    """
    네이버 데이터랩 오픈 API 컨트롤러 클래스
    """

    def __init__(self, client_id, client_secret):
        """
        인증키 설정 및 검색어 그룹 초기화
        """
        self.client_id = client_id
        self.client_secret = client_secret
        self.keywordGroups = []
        self.url = "https://openapi.naver.com/v1/datalab/search"

    def add_keyword_groups(self, group_dict):
        """
        검색어 그룹 추가
        """

        keyword_gorup = {
            'groupName': group_dict['groupName'],
            'keywords': group_dict['keywords']
        }
        
        self.keywordGroups.append(keyword_gorup)
        print(f">>> Num of keywordGroups: {len(self.keywordGroups)}")
        
    def get_data(self, startDate, endDate, timeUnit, device, ages, gender):
        """
        요청 결과 반환
        timeUnit - 'date', 'week', 'month'
        device - None, 'pc', 'mo'
        ages = [], ['1' ~ '11']
        gender = None, 'm', 'f'
        """

        # Request body
        body = json.dumps({
            "startDate": startDate,
            "endDate": endDate,
            "timeUnit": timeUnit,
            "keywordGroups": self.keywordGroups,
            "device": device,
            "ages": ages,
            "gender": gender
        }, ensure_ascii=False)
        
        # Results
        request = urllib.request.Request(self.url)
        request.add_header("X-Naver-Client-Id",self.client_id)
        request.add_header("X-Naver-Client-Secret",self.client_secret)
        request.add_header("Content-Type","application/json")
        response = urllib.request.urlopen(request, data=body.encode("utf-8"))
        rescode = response.getcode()
        if(rescode==200):
            # Json Result
            result = json.loads(response.read())
            
            period = []
            ratio = []
            title = []
            pprint(result)
            if result['results'][0]['data']:
                for val in result['results']:
                    period.append(val['data'][-1]['period'])
                    ratio.append(val['data'][-1]['ratio'])
                    title.append(val['title'])
            
            df = pd.DataFrame({"title" : title, 'period' : period, "ratio" : ratio})
            
        else:
            print("Error Code:" + rescode)
            
        return df

keyword_group_set = {
    'keyword_group_1': {'groupName': "애플", 'keywords': ["애플"]},
}
def run():
    print("시작시작시작")
    try:
        subprocess.Popen(r'C:\Program Files\Google\Chrome\Application\chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\chrometemp1"')  # 디버거 크롬 구동
    except:
        subprocess.Popen(r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\chrometemp1"')  # 디버거 크롬 구동
    option = Options()
    option.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
    # option.add_argument("--headless")

    chrome_ver = chromedriver_autoinstaller.get_chrome_version().split('.')[0]
    try:
        driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)
    except:
        chromedriver_autoinstaller.install('./')
        driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)

    #####크롤링
    
    driver.get('https://blackkiwi.net/service/keyword-analysis?keyword=%ED%94%8C%EB%9D%BC%EC%8A%A4%ED%8B%B1%20%EC%A0%9C%EB%A1%9C&platform=naver')
    time.sleep(10) #혹시 모를 에러 방지를 위한 적절한 wait 넣어주기
    title=driver.find_elements(By.CSS_SELECTOR,'table a')
    title_l=[]
    for i in range(20):
        title_l.append(title[i].text)
        print(title_l[i])
    driver.implicitly_wait(10)
    driver.get('http://surffing.net/MainSeoSearch.do')
    search=driver.find_element(By.ID,'saerchKeyword')
    search.send_keys('플라스틱 제로')
    search.send_keys(Keys.ENTER)
    title2=driver.find_elements(By.CSS_SELECTOR,'tr>td.center:nth-child(1)')
    title_b=[]
    
    for i in range(len(title2)):
        title_b.append(title2[i].text)
        print(title_b[i])
    
    title_l.extend(title_b)
    list_a=title_l
    
    ##날짜지정
    today = dt.date.today()
    monday=today - dt.timedelta(days=today.weekday())
    end=monday.strftime('%Y-%m-%d')
    print(end)
    th=dt.timedelta(days=30)
    start=monday-th
    start=start.strftime('%Y-%m-%d')
        
    #####크롤링end
    cnt=0
    while(True):
        if len(list_a)>0 :
            for idx, (key, val) in enumerate(keyword_group_set.items()):
                val['groupName'] = list_a[idx]
                val['keywords'] = [list_a[idx]]
            del list_a[:1]
            cnt+=1
        else:
            break
    #     pprint(keyword_group_set)

            # API 인증 정보 설정
        client_id = "xTsOEpqU869lYEaqyaU6"
        client_secret = "kkp_ZP2j9T"

        # 요청 파라미터 설정
        startDate = start
        endDate = end
        timeUnit = 'week'
        device = ''
        ages = []
        gender = ''

        # 데이터 프레임 정의
        naver = NaverDataLabOpenAPI(client_id=client_id, client_secret=client_secret)

        naver.add_keyword_groups(keyword_group_set['keyword_group_1'])

        df = naver.get_data(startDate, endDate, timeUnit, device, ages, gender)

        if(cnt>1):
            df1=pd.concat([df1,df])
        else:
            df1=df
#     print(df1)
    driver.close()
    #검색주간과 같은 데이터만 추출
    df1=df1[df1['period']==end]
    #ratio높은순으로 정렬
    df2=df1.sort_values('ratio',ascending=False)
    #ratio 삭제
    df2=df2.drop([df2.columns[2]],axis=1)
    #인덱싱해주기
    df2.reset_index(drop=True,inplace=True)
    #10위까지 저장
    data=df2.loc[:9]
    print(data)
    
    #db에 데이터 저장
    import cx_Oracle

    dsn = cx_Oracle.makedsn("localhost", 1521, service_name = "XE") # 오라클 주소
    connection = cx_Oracle.connect(user="scott", password="tiger", dsn=dsn, encoding="UTF-8") # 오라클 접속
    cur = connection.cursor() # 실행 결과 데이터를 담을 메모리 객체
    #dataframe 변환
    df_list = data.to_records(index=False)
    df_list = data.values.tolist()
    #쿼리
    cur.executemany("delete from plastic")
    cur.executemany("INSERT INTO plastic (title, day) VALUES (:1,:2)",
            df_list, batcherrors = True)
    cur.execute("commit")
    
#     return data

# step3.실행 주기 설정
# schedule.every().monday.at("02:00").do(run)
schedule.every().day.at("21:32").do(run)

# step4.스캐쥴 시작
while True:
    schedule.run_pending()
    time.sleep(1)

시작시작시작
도전! 플라스틱 제로
미세 플라스틱 없는 치약
미세플라스틱치약
플라스틱 프리
플라스틱 줄이기
재생플라스틱
친환경플라스틱
생분해플라스틱
생분해성 플라스틱
바이오 플라스틱
실리콘 빨대
지구의 날
대나무 칫솔
양말목 공예
업사이클링
고체치약
천연 수세미
샴푸바
제로 웨이스트
미니멀 라이프
플라스틱제로
제로웨이스트
양말목공예
업사이클링제품
대나무칫솔
업사이클링
친환경칫솔
친환경브랜드
친환경플라스틱
친환경선물
밀랍랩
친환경쇼핑몰
실리콘빨대
고체치약
생분해플라스틱
천연수세미
2022-08-22
>>> Num of keywordGroups: 1
{'endDate': '2022-08-26',
 'results': [{'data': [], 'keywords': ['도전! 플라스틱 제로'], 'title': '도전! 플라스틱 제로'}],
 'startDate': '2022-07-18',
 'timeUnit': 'week'}
>>> Num of keywordGroups: 1
{'endDate': '2022-08-26',
 'results': [{'data': [{'period': '2022-08-08', 'ratio': 100}],
              'keywords': ['미세 플라스틱 없는 치약'],
              'title': '미세 플라스틱 없는 치약'}],
 'startDate': '2022-07-18',
 'timeUnit': 'week'}
>>> Num of keywordGroups: 1
{'endDate': '2022-08-26',
 'results': [{'data': [{'period': '2022-08-08', 'ratio': 100},
                       {'period': '2022-08-22', 'ratio': 37.5}],
              'keywords': ['미세플라스틱치약'],
              'title': '미세플라스틱치약'}],
 'startDate': '2022-07-

{'endDate': '2022-08-26',
 'results': [{'data': [{'period': '2022-07-18', 'ratio': 78.57722},
                       {'period': '2022-07-25', 'ratio': 86.47068},
                       {'period': '2022-08-01', 'ratio': 80.8673},
                       {'period': '2022-08-08', 'ratio': 97.85609},
                       {'period': '2022-08-15', 'ratio': 100},
                       {'period': '2022-08-22', 'ratio': 63.05018}],
              'keywords': ['샴푸바'],
              'title': '샴푸바'}],
 'startDate': '2022-07-18',
 'timeUnit': 'week'}
>>> Num of keywordGroups: 1
{'endDate': '2022-08-26',
 'results': [{'data': [{'period': '2022-07-18', 'ratio': 90.19522},
                       {'period': '2022-07-25', 'ratio': 77.10773},
                       {'period': '2022-08-01', 'ratio': 75.18438},
                       {'period': '2022-08-08', 'ratio': 100},
                       {'period': '2022-08-15', 'ratio': 91.85827},
                       {'period': '2022-08-22', 'ratio': 62.25596}

{'endDate': '2022-08-26',
 'results': [{'data': [{'period': '2022-07-18', 'ratio': 100},
                       {'period': '2022-07-25', 'ratio': 79.48717},
                       {'period': '2022-08-01', 'ratio': 54.87179},
                       {'period': '2022-08-08', 'ratio': 57.82051},
                       {'period': '2022-08-15', 'ratio': 60.64102},
                       {'period': '2022-08-22', 'ratio': 42.94871}],
              'keywords': ['생분해플라스틱'],
              'title': '생분해플라스틱'}],
 'startDate': '2022-07-18',
 'timeUnit': 'week'}
>>> Num of keywordGroups: 1
{'endDate': '2022-08-26',
 'results': [{'data': [{'period': '2022-07-18', 'ratio': 90.75444},
                       {'period': '2022-07-25', 'ratio': 76.9255},
                       {'period': '2022-08-01', 'ratio': 88.54937},
                       {'period': '2022-08-08', 'ratio': 77.91778},
                       {'period': '2022-08-15', 'ratio': 100},
                       {'period': '2022-08-22', 'ratio': 6

TypeError: function missing required argument 'parameters' (pos 2)