In [20]:
import urllib.request
import json
import datetime
import pandas as pd

class NaverShoppingCrawler:
    def __init__(self, client_id, client_secret, keyWord):
        self.client_id = client_id
        self.client_secret = client_secret
        self.keyWord = keyWord

    def gen_search_url(self, api_node, start_num, disp_num):
        base = 'https://openapi.naver.com/v1/search'
        node = '/' + api_node + '.json'
        param_query = '?query=' + urllib.parse.quote(self.keyWord)
        param_start = '&start=' + str(start_num)
        param_disp = '&display=' + str(disp_num)
        return base + node + param_query + param_disp + param_start

    def get_result_onpage(self, url):
        request = urllib.request.Request(url)
        request.add_header('X-Naver-Client-Id', self.client_id)
        request.add_header('X-Naver-Client-Secret', self.client_secret)
        response = urllib.request.urlopen(request)
        print(f'{datetime.datetime.now()} Url Request Success')
        return json.loads(response.read().decode('utf-8'))

    def delete_tag(self, input_str):
        input_str = input_str.replace('<b>', '')
        input_str = input_str.replace('</b>', '')
        input_str = input_str.replace('\xa0', '')
        return input_str

    def get_fields(self, json_data):
        title = [self.delete_tag(each['title']) for each in json_data['items']]
        link = [each['link'] for each in json_data['items']]
        lprice = [each['lprice'] for each in json_data['items']]
        mall_name = [each['mallName'] for each in json_data['items']]
        result = pd.DataFrame({
            'title': title,
            'link': link,
            'lprice': lprice,
            'mall': mall_name,
        }, columns=['title','lprice','mall','link'])
        return result

    def run(self):
        result_datas = []
        for n in range(1, 1000, 100):
            url = self.gen_search_url('shop', n, 100)
            json_result = self.get_result_onpage(url)
            result = self.get_fields(json_result)
            result_datas.append(result)
        result_datas_concat = pd.concat(result_datas)
        result_datas_concat.reset_index(drop=True, inplace=True)
        result_datas_concat['lprice'] = result_datas_concat['lprice'].str.replace(' ', '').astype(int)
        result_datas_concat.to_csv('/content/Naver_shopping.csv', sep=',', encoding="utf-8")

In [21]:
keyword = '배'
crawler = NaverShoppingCrawler('sWRweonFDwTIbRP88MsS', "yNVbAK8Kya", keyWord)
result = crawler.run()
print(result)

2024-01-09 05:15:24.048003 Url Request Success
2024-01-09 05:15:25.441795 Url Request Success
2024-01-09 05:15:26.860851 Url Request Success
2024-01-09 05:15:28.088707 Url Request Success
2024-01-09 05:15:29.479084 Url Request Success
2024-01-09 05:15:30.833751 Url Request Success
2024-01-09 05:15:32.223384 Url Request Success
2024-01-09 05:15:33.432014 Url Request Success
2024-01-09 05:15:34.828009 Url Request Success
2024-01-09 05:15:36.381776 Url Request Success
None


In [22]:
df = pd.read_csv('Naver_shopping.csv')

In [23]:
df

Unnamed: 0.1,Unnamed: 0,title,lprice,mall,link
0,0,나주배 가정용 선물용 5kg 7.5kg 15kg,21900,나주청년농부,https://search.shopping.naver.com/gate.nhn?id=...
1,1,나주배 5kg 7.5kg 15kg 가정용 선물세트,17900,과일꾼,https://search.shopping.naver.com/gate.nhn?id=...
2,2,못난이배 신고배 업소용 가정용 천안 성환 배 8kg 17kg,37900,천안강남농원,https://search.shopping.naver.com/gate.nhn?id=...
3,3,나주배 배즙용 양념용 가정용 흠집배 가공배 15kg,29900,자연진리,https://search.shopping.naver.com/gate.nhn?id=...
4,4,산지직송 나주배 가정용배 선물용배 과일선물세트 5kg,21900,나리배,https://search.shopping.naver.com/gate.nhn?id=...
...,...,...,...,...,...
995,995,GAP 가정용 하동배 국내산 스위트 센세이션 서양배 유럽배 외국배 아산배 추황배,19800,푸드파머스,https://search.shopping.naver.com/gate.nhn?id=...
996,996,"생명애찬 나주 맑은배즙, 도라지배즙 50팩 100팩",39960,위메프,https://search.shopping.naver.com/gate.nhn?id=...
997,997,배한상자 나주 황금 배 선물세트 15KG,99000,짱구네만물상,https://search.shopping.naver.com/gate.nhn?id=...
998,998,청년농부 나주배 15kg 21-25과(가정용),44120,옥션,https://search.shopping.naver.com/gate.nhn?id=...
