In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import json
import numpy as np

ITEM_COUNTS = 100

In [2]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
    'Display-Api-Key': 'VWmkUPgs6g2fviPZ5JQFQ3pERP4tIXv/J2jppLqSRBk='
}

In [3]:
middle_category_nums = ['10101201', '10101202', '10101203', '10101204', '10101205',
                        '10101206', '10101207', '10101208', '10101209', '10101210',
                        '10101211', '10101212']

In [4]:
def get_item_cds_fp(middle_category_num, gender):
    url = 'https://api-display.wconcept.co.kr/display/api/v2/best/products'

    if gender == 'men':
        genderType = 'men'
    else:
        genderType = 'women'

    data = {
    "custNo": "",
    "dateType": "daily",
    "domain": 'WOMEN',
    "genderType": genderType,
    "depth1Code": "10101",
    "depth2Code": middle_category_num,
    "pageNo": 1,
    "pageSize": ITEM_COUNTS
    }

    response = requests.post(url, headers=headers, json=data)

    soup = BeautifulSoup(response.text, 'lxml')
    info = soup.string
    info = json.loads(info)
    info = info['data']['content']

    item_cds_fp = []

    for item in info:
        item_cds_fp.append([item['itemCd'], item['finalPrice']])
        
    item_cds_fp = np.array(item_cds_fp)
    rankings = np.arange(1, len(item_cds_fp) + 1)
    total_items_count = np.full(len(item_cds_fp), len(item_cds_fp))

    item_cds_fp = np.column_stack((item_cds_fp, rankings, total_items_count))

    return item_cds_fp.tolist()

In [5]:
def get_item_data(info, item_fp):
    # 브랜드
    brandNameKr = info['brandNameKr']

    # 품번
    itemCd = info['itemCd']

    # 좋아요
    heartCnt = info['heartCnt']

    # 품절 여부
    statusName = info['statusName']

    # 정상가
    fixed_price = info['customerPrice']

    # 쿠폰 적용가 
    discounted_price = item_fp

    # 색상
    color = info['color1']

    # 사이즈

    # 이미지
    url = f'https://www.wconcept.co.kr/Product/{itemCd}?rccode=pc_topseller'
    response = requests.get(url, headers)
    soup = BeautifulSoup(response.text, 'lxml')
    imageUrls = []
    try:
        images = soup.select('ul#gallery > li > a > img') 
        for image in images:
            imageUrls.append(image['src'])

    except:
        pass

    # 카테고리
    categories = info['category']
    category_per_depth = []
    for category in categories:
        medium_name = category['mediumName']
        category_depthname1 = category['categoryDepthname1']
        category_depthname2 = category['categoryDepthname2']
        category_depthname3 = category['categoryDepthname3']
        category_depth1 = category['categoryDepth1']
        category_depth2 =  category['categoryDepth2']
        category_depth3 =  category['categoryDepth3']
        
        dic = {'medium_name': medium_name,
               'category_depthname1': [category_depthname1, category_depth1],
               'category_depthname2': [category_depthname2, category_depth2],
               'category_depthname3': [category_depthname3, category_depth3]}
        
        category_per_depth.append(dic)


    # 상품명
    itemName = info['itemName']

    data = {
        'brand': brandNameKr,
        'product_id': itemCd,
        'likes': heartCnt,
        'sold_out': statusName,
        'fixed_price': fixed_price,
        'discounted_price': discounted_price,
        'product_name': itemName,
        'url': imageUrls,
        'category_per_depth': category_per_depth,
        'color': color,
        
    }

    return data



In [6]:
def get_item_info(item_cd, item_fp):
    
    url = f'https://www.wconcept.co.kr/Ajax/GetProductsInfo'
    data = {'itemcds': item_cd}
    response = requests.post(url, headers=headers, data=data)

    soup = BeautifulSoup(response.text, 'lxml')
    info = json.loads(soup.string)[0]
    
    info = get_item_data(info, item_fp)

    return info



In [7]:
def get_rank_score(ranking, item_count):
    rank_score = 1 - ((ranking - 1) / (item_count - 1))

    return rank_score

In [8]:
def main():
    item_cds_list = []
    for gender in ['men', 'women']:
        for middle_category_num in middle_category_nums:
            item_cds_fp = get_item_cds_fp(middle_category_num, gender)
            item_cds_list += item_cds_fp
    
    item_info = []
    for item_cd, item_fp, ranking, middle_item_count in item_cds_list:
        info = get_item_info(item_cd, item_fp)
        info['rank_score'] = get_rank_score(int(ranking), int(middle_item_count))
        item_info.append(info)
        print(info)
    df = pd.DataFrame(item_info)
    df.to_csv('wconcept_0504.csv', index=False)
    

In [9]:
if __name__ == '__main__':
    main()

{'brand': '앤더슨 벨 포 맨', 'product_id': '305706699', 'likes': 3.0, 'sold_out': '판매중', 'fixed_price': 980000.0, 'discounted_price': '980000', 'product_name': 'VINTAGE LEATHER BIKER JACKET awa582m(BLACK)', 'url': ['//product-image.wconcept.co.kr/productimg/image/img0/99/305706699_OT85075.jpg', '//product-image.wconcept.co.kr/productimg/image/img0/99/305706699_add1_YN52638.jpg?thumbnail=60x80', '//product-image.wconcept.co.kr/productimg/image/img0/99/305706699_add2_IL59728.jpg?thumbnail=60x80', '//product-image.wconcept.co.kr/productimg/image/img0/99/305706699_add3_PB77224.jpg?thumbnail=60x80'], 'category_per_depth': [{'medium_name': '남성', 'category_depthname1': ['의류', '001000000'], 'category_depthname2': ['아우터', '001001000'], 'category_depthname3': ['무스탕', '001001012']}], 'color': [], 'rank_score': 1.0}
{'brand': '플루크', 'product_id': '305733727', 'likes': 24.0, 'sold_out': '판매중', 'fixed_price': 159000.0, 'discounted_price': '65300', 'product_name': '플루크 패커블 윈드브레이커 자켓 FJK110 / 4color', 'url'

In [11]:
df = pd.read_csv('wconcept_0504.csv')

In [12]:
df

Unnamed: 0,brand,product_id,likes,sold_out,fixed_price,discounted_price,product_name,url,category_per_depth,color,rank_score
0,앤더슨 벨 포 맨,305706699,3.0,판매중,980000.0,980000,VINTAGE LEATHER BIKER JACKET awa582m(BLACK),['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '남성', 'category_depthname1': ...",[],1.000000
1,플루크,305733727,24.0,판매중,159000.0,65300,플루크 패커블 윈드브레이커 자켓 FJK110 / 4color,['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '남성', 'category_depthname1': ...",[],0.982143
2,앤더슨 벨 포 맨,305706690,4.0,판매중,598000.0,598000,UNISEX NEW PATCHWORK DENIM JACKET awa615u(BLACK),['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '남성', 'category_depthname1': ...",[],0.964286
3,팔육디,303814555,12.0,판매중,699000.0,529200,Taxi Bomber Leather Jacket (Burgundy),['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '남성', 'category_depthname1': ...",['RED'],0.946429
4,바튼웨어,302686688,5.0,판매중,497000.0,223650,PACKABLE ANORAK_PURPLE,['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '남성', 'category_depthname1': ...",[],0.928571
...,...,...,...,...,...,...,...,...,...,...,...
1529,원더브라,305731336,23.0,판매중,97000.0,23484,풀컵 베이직 노와이어 브라팬티 2종(브라+팬티) 택 1,['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '여성', 'category_depthname1': ...",[],0.040404
1530,아베크부,301422783,135.0,판매중,20900.0,18810,Lace Back Briefs Blanc,['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '여성', 'category_depthname1': ...",[],0.030303
1531,아비스,300474339,377.0,판매중,25000.0,22500,래쉬가드 이너세트 브라/팬티,['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '여성', 'category_depthname1': ...",[],0.020202
1532,컴포트랩,305775787,12.0,판매중,24900.0,24900,매직사이즈 슈퍼플렉스 브라렛,['//product-image.wconcept.co.kr/productimg/im...,"[{'medium_name': '여성', 'category_depthname1': ...","['GREEN', 'NAVY', 'WHITE', 'BLACK']",0.010101
