In [16]:
import time
from bs4 import BeautifulSoup
import pandas as pd
import requests
import json
import numpy as np

ITEM_COUNTS = 100

In [17]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
    'Display-Api-Key': 'VWmkUPgs6g2fviPZ5JQFQ3pERP4tIXv/J2jppLqSRBk='
}

In [18]:
middle_category_nums = ['10101201', '10101202', '10101203', '10101204', '10101205',
                        '10101206', '10101207', '10101208', '10101209', '10101210',
                        '10101211', '10101212']

In [19]:
def get_item_cds_fp(middle_category_num, gender):
    url = 'https://api-display.wconcept.co.kr/display/api/v2/best/products'

    if gender == 'men':
        genderType = 'men'
    else:
        genderType = 'women'

    data = {
    "custNo": "",
    "dateType": "daily",
    "domain": 'WOMEN',
    "genderType": genderType,
    "depth1Code": "10101",
    "depth2Code": middle_category_num,
    "pageNo": 1,
    "pageSize": ITEM_COUNTS
    }

    response = requests.post(url, headers=headers, json=data)

    soup = BeautifulSoup(response.text, 'lxml')
    info = soup.string
    info = json.loads(info)
    info = info['data']['content']

    item_cds_fp = []

    for item in info:
        item_cds_fp.append([item['itemCd'], item['finalPrice']])
        
    item_cds_fp = np.array(item_cds_fp)
    rankings = np.arange(1, len(item_cds_fp) + 1)
    total_items_count = np.full(len(item_cds_fp), len(item_cds_fp))

    item_cds_fp = np.column_stack((item_cds_fp, rankings, total_items_count))

    return item_cds_fp.tolist()

In [20]:
def get_item_data(info, item_fp):
    # 브랜드
    brandNameKr = info['brandNameKr']

    # 품번
    itemCd = info['itemCd']

    # 좋아요
    heartCnt = info['heartCnt']

    # 품절 여부
    statusName = info['statusName']

    # 정상가
    fixed_price = info['customerPrice']

    # 쿠폰 적용가 
    discounted_price = item_fp

    # 색상

    # 사이즈

    # 이미지
    url = f'https://www.wconcept.co.kr/Product/{itemCd}?rccode=pc_topseller'
    response = requests.get(url, headers)
    soup = BeautifulSoup(response.text, 'lxml')
    imageUrls = []
    try:
        images = soup.select('ul#gallery > li > a > img') 
        for image in images:
            imageUrls.append(image['src'])

    except:
        pass

    # 상품명
    itemName = info['itemName']

    data = {
        'brand': brandNameKr,
        'product_id': itemCd,
        'likes': heartCnt,
        'sold_out': statusName,
        'fixed_price': fixed_price,
        'discounted_price': discounted_price,
        'product_name': itemName,
        'url': imageUrls
    }

    return data



In [21]:
def get_item_info(item_cd, item_fp):
    
    url = f'https://www.wconcept.co.kr/Ajax/GetProductsInfo'
    data = {'itemcds': item_cd}
    response = requests.post(url, headers=headers, data=data)

    soup = BeautifulSoup(response.text, 'lxml')
    info = json.loads(soup.string)[0]
    
    info = get_item_data(info, item_fp)

    return info



In [22]:
def get_rank_score(ranking, item_count):
    rank_score = 1 - ((ranking - 1) / (item_count - 1))

    return rank_score

In [23]:
def main():
    item_cds_list = []
    for gender in ['men', 'women']:
        for middle_category_num in middle_category_nums:
            item_cds_fp = get_item_cds_fp(middle_category_num, gender)
            item_cds_list += item_cds_fp
    
    item_info = []
    for item_cd, item_fp, ranking, middle_item_count in item_cds_list:
        info = get_item_info(item_cd, item_fp)
        info['rank_score'] = get_rank_score(int(ranking), int(middle_item_count))
        item_info.append(info)
        print(info)
    df = pd.DataFrame(item_info)
    df.to_csv('wconcept.csv', index=False)
    

In [24]:
if __name__ == '__main__':
    main()

{'brand': '코닥 어패럴', 'product_id': '305685943', 'likes': 19.0, 'sold_out': '판매중', 'fixed_price': 189000.0, 'discounted_price': '170100', 'product_name': '파인더 후드 아노락 자켓 IVORY', 'url': ['//product-image.wconcept.co.kr/productimg/image/img0/43/305685943_NG18313.jpg', '//product-image.wconcept.co.kr/productimg/image/img0/43/305685943_add1_IH16136.jpg?thumbnail=60x80'], 'rank_score': 1.0}
{'brand': '필시크', 'product_id': '305814488', 'likes': 0.0, 'sold_out': '판매중', 'fixed_price': 278000.0, 'discounted_price': '225180', 'product_name': 'MINIMAL SHORT JACKET (BLACK)', 'url': ['//product-image.wconcept.co.kr/productimg/image/img0/88/305814488_UO61336.jpg', '//product-image.wconcept.co.kr/productimg/image/img0/88/305814488_add1_JL79048.jpg?thumbnail=60x80', '//product-image.wconcept.co.kr/productimg/image/img0/88/305814488_add2_QP97836.jpg?thumbnail=60x80', '//product-image.wconcept.co.kr/productimg/image/img0/88/305814488_add3_OO10104.jpg?thumbnail=60x80'], 'rank_score': 0.9855072463768116}
{'br

KeyboardInterrupt: 

In [None]:
df = pd.read_csv('wconcept.csv')

In [None]:
df

Unnamed: 0,brandNameKr,itemCd,heartCnt,statusName,customerPrice,finalPrice,itemName,imageUrls,rank_score
0,코닥 어패럴,305685943,19.0,판매중,189000.0,170100,파인더 후드 아노락 자켓 IVORY,['//product-image.wconcept.co.kr/productimg/im...,1.000000
1,필시크,305814488,0.0,판매중,278000.0,225180,MINIMAL SHORT JACKET (BLACK),['//product-image.wconcept.co.kr/productimg/im...,0.982456
2,더 그레이티스트,302630480,7.0,판매중,186000.0,167400,Basic Blazer BEIGE,['//product-image.wconcept.co.kr/productimg/im...,0.964912
3,노스페이스,305785964,3.0,판매중,169000.0,152100,NJ3LQ03B 남성 아이스 트랙 자켓,['//product-image.wconcept.co.kr/productimg/im...,0.947368
4,더니트컴퍼니,305744799,0.0,판매중,169000.0,109800,쿨맥스 컴포트 자켓 4color,['//product-image.wconcept.co.kr/productimg/im...,0.929825
...,...,...,...,...,...,...,...,...,...
1583,오끌레르,304823700,202.0,판매중,9000.0,7650,툴리 팬티,['//product-image.wconcept.co.kr/productimg/im...,0.040404
1584,티에스에이치,302646770,713.0,판매중,51800.0,35500,스퀘어넥 리브드 브라탑 4colors 2P,['//product-image.wconcept.co.kr/productimg/im...,0.030303
1585,마른파이브,305809248,38.0,판매중,44900.0,26010,쉬어 브라 볼륨 서포트,['//product-image.wconcept.co.kr/productimg/im...,0.020202
1586,마른파이브,305778407,24.0,판매중,28400.0,20610,슬라인 바디쉐이퍼 힙업 팬티,['//product-image.wconcept.co.kr/productimg/im...,0.010101
