In [1]:
import time
from bs4 import BeautifulSoup
import pandas as pd
import requests
import json

ITEMS_COUNT = 100

In [2]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
    
}

timeout_settings = (60, 60)

In [3]:
middle_category_nums = ['367', '596', '2074', '595', '980', '981', 
                        '1477', '657', '1004', '2080', '1003', '1002',
                        '2224']

In [4]:
def get_response(url, headers):

    # GET 요청
    response = requests.get(url, headers=headers, timeout=timeout_settings)
    soup = BeautifulSoup(response.text, 'lxml')
    time.sleep(0.5)

    return soup


In [5]:
def get_item_info(goods):
    # 상품 id
    goodsNo = goods['goodsNo']

    # 브랜드 명
    brandNm = goods['brandNm']

    # 정가
    norPrc = goods['norPrc']

    # 할인가
    salePrc = goods['salePrc']

    # 이미지    
    image_urls = []      
    try:    
        for i in range(len(goods['colorInfo'])):
            for j in range(len(goods['colorInfo'][i]['colorContInfo'])):
                colorContInfo = goods['colorInfo'][i]['colorContInfo'][j]['dispGoodsContUrl']
                image_urls.append(colorContInfo)
    except:
        pass

    # 색상 정보
    colors = []
    try:
        for color in goods['colorInfo']:
            colors.append(color['optnNm'])
    except:
        pass

    # 사이즈 정보
    sizes = []
    for i in range(len(goods['colorInfo'][0]['colorSizeInfo'])):
        size_info = goods['colorInfo'][0]['colorSizeInfo'][i]['erpSzCd']
        sizes.append(size_info)


    data = {
        'product_id': goodsNo,
        'brand': brandNm,
        'fixed_price': norPrc,
        'discounted_price': salePrc,
        'url': image_urls,
        'colors': colors,
        'sizes': sizes,
    }

    return data

In [6]:
def get_rank_score(ranking, item_count):
    rank_score = 1 - ((ranking - 1) / (item_count - 1))

    return rank_score

In [7]:
def get_items(num):
    url = f'https://www.thehandsome.com/api/display/1/ko/category/categoryGoodsList?dispMediaCd=10&sortGbn=20&pageSize={ITEMS_COUNT}&pageNo=1&norOutletGbCd=J&dispCtgNo={num}&productListLayout=4&theditedYn=N'

    goods_info = []

    soup = get_response(url, headers)
    info = soup.string
    goods_in_page = json.loads(info)['payload']['goodsList']

    for rank, goods in enumerate(goods_in_page, 1):
        
        goods_data = get_item_info(goods)
        goods_data['rank_score'] = get_rank_score(int(rank), len(goods_in_page))
        goods_info.append(goods_data)
        print(len(goods_info), goods_data)


    return goods_info

In [8]:
def main():
    all_info = []
    for num in middle_category_nums:
        middle_category_info = get_items(num)
        all_info += middle_category_info
    
    print(len(all_info))

In [9]:
if __name__ == '__main__':
    main()  

1 {'product_id': 'TM2E5KCDC81W', 'brand': 'TIME', 'fixed_price': 495000, 'discounted_price': 495000, 'url': ['/TM/2E/SS/TM2E5KCDC81W_LE_C01.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W01.jpg?rs=684X1032', '/TM/2E/SS/TM2E5KCDC81W_LE_W02.jpg?rs=684X1032', '/TM/2E/SS/TM2E5KCDC81W_LE_W03.jpg?rs=684X1032', '/TM/2E/SS/TM2E5KCDC81W_LE_W04.jpg?rs=684X1032', '/TM/2E/SS/TM2E5KCDC81W_LE_W05.jpg?rs=684X1032', '/TM/2E/SS/TM2E5KCDC81W_LE_W06.jpg?rs=684X1032', '/TM/2E/SS/TM2E5KCDC81W_LE_W07.jpg?rs=684X1032', '/TM/2E/SS/TM2E5KCDC81W_LE_S01.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_T01.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_T02.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W01.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W02.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W03.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W04.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W05.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W06.jpg', '/TM/2E/SS/TM2E5KCDC81W_LE_W07.jpg'], 'colors': ['LIGHT BEIGE'], 'sizes': ['90'], 'rank_score': 1.0}
2 {'product_id': 'SY2E3WJMTE1WM2', 'brand': 'SYSTEM', 'fixed_price': 375000, 

KeyboardInterrupt: 