In [160]:
import requests
import random
import pandas as pd
import time

In [161]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
    "Accept-Language": 'en-US,en;q=0.9',
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Referer": "https://shopee.vn/",
    "From": "",
    "af-ac-enc-dat": "",
    "x-api-source": "pc"
}

base_image_url = "https://down-vn.img.susercontent.com/file/"


### EXTRACT CATEGORY

In [162]:
URL = "https://shopee.vn/api/v4/official_shop/get_categories"
PARAMS = { 
}

In [163]:
response = requests.get(URL, headers=HEADERS, params=PARAMS)
time.sleep(random.uniform(3.2, 8.7))
if response.status_code == 200:
    data = response.json()["data"]["categories"]
    category_data = []

    for category in data:
        category_id = category["category_id"]
        name = category["name"]
        image = category["image"]

        category_data.append({
            "category_id": category_id,
            "name": name,
            "image": image
        })
    category = pd.DataFrame(category_data)

else :
    print(f"Error: {response.status_code}")

### TRANSFORM CATEGORY

In [164]:
category = category[category["category_id"] != -1]

In [165]:
category["category_logo_url"] = base_image_url + category["image"]
category.columns

Index(['category_id', 'name', 'image', 'category_logo_url'], dtype='object')

In [166]:
category = category.drop(columns=["image"])
category.columns

Index(['category_id', 'name', 'category_logo_url'], dtype='object')

In [167]:
print(f"Total shop fetched: {len(category)}")
category.head()

Unnamed: 0,category_id,name,category_logo_url
1,11035478,Thể Thao & Du Lịch,https://down-vn.img.susercontent.com/file/6cb7...
2,11036793,Ô tô - xe máy - xe đạp,https://down-vn.img.susercontent.com/file/3fb4...
3,11036525,Bách hóa Online,https://down-vn.img.susercontent.com/file/c432...
4,11036670,Nhà Cửa & Đời Sống,https://down-vn.img.susercontent.com/file/24b1...
5,11036279,Sắc Đẹp,https://down-vn.img.susercontent.com/file/ef1f...


### EXTRACT SHOP

In [168]:
categories = category['category_id'].tolist()  # Convert the 'category_id' column to a list

In [169]:
shop_data = []

for cat_id in categories:
    URL = f"https://shopee.vn/api/v4/official_shop/get_shops_by_category?need_zhuyin=0&category_id={cat_id}"
    PARAMS = {}

    response = requests.get(URL, headers=HEADERS)
    time.sleep(random.uniform(3.2, 8.7))
    if response.status_code == 200:
        data = response.json()
        cat_id_res = data["data"]["cat_id"]  # Get the category_id from the response

        brands = data["data"]["brands"]

        for brand in brands:
            brand_ids = brand["brand_ids"]
            for shop in brand_ids:
                username = shop.get("username", "")
                brand_name = shop.get("brand_name", "")
                shopid = shop.get("shopid", "")
                logo = shop.get("logo", "")
                logo_pc = shop.get("logo_pc", "")
                shop_collection_id = shop.get("shop_collection_id", "")
                ctime = shop.get("ctime", "")
                brand_label = shop.get("brand_label", "")
                shop_data.append({
                    "category_id": cat_id_res,
                    "username": username,
                    "brand_name": brand_name,
                    "shop_id": shopid,
                    "logo": logo,
                    "logo_pc": logo_pc,
                    "shop_collection_id": shop_collection_id,
                    "ctime": ctime,
                    "brand_label": brand_label
                })
    else:
        print(f"Error: {response.status_code}")

shop = pd.DataFrame(shop_data)
print(f"Total shop fetched: {len(shop)}")

Total shop fetched: 3587


### TRANSFORM SHOP

In [170]:
shop["creation_time"] = pd.to_datetime(shop['ctime'], unit='s')
shop["shop_logo_url"] = base_image_url + shop["logo_pc"]
shop.columns

Index(['category_id', 'username', 'brand_name', 'shop_id', 'logo', 'logo_pc',
       'shop_collection_id', 'ctime', 'brand_label', 'creation_time',
       'shop_logo_url'],
      dtype='object')

In [171]:
shop = shop.drop(columns=["logo_pc", "logo", "shop_collection_id", "brand_label", "ctime"])
shop.columns

Index(['category_id', 'username', 'brand_name', 'shop_id', 'creation_time',
       'shop_logo_url'],
      dtype='object')

In [187]:
shop.head()

Unnamed: 0,category_id,username,brand_name,shop_id,creation_time,shop_logo_url
0,11035478,adidasofficialstore,Adidas Official Store,71009635,2021-05-14 08:44:55,https://down-vn.img.susercontent.com/file/3a7d...
1,11035478,akisport.net,Aki Sport,432189904,2021-08-30 04:29:27,https://down-vn.img.susercontent.com/file/eea8...
2,11035478,amalife_vietnam,Amalife Việt Nam,551582212,2021-11-04 07:51:47,https://down-vn.img.susercontent.com/file/b8ee...
3,11035478,_andrealin,Andrealin,388588353,2023-06-20 08:25:43,https://down-vn.img.susercontent.com/file/vn-5...
4,11035478,anta.official,Anta Flagship Store,93177350,2021-05-14 08:44:55,https://down-vn.img.susercontent.com/file/7af9...


### EXTRACT SHOP BASE

In [201]:
# Unique value in the 'category_id' column
category_ids = shop["category_id"].unique()

for cat_id in category_ids:
    # Filter the data for the current category_id
    cat_data = shop[shop['category_id'] == cat_id]

    # Create an empty list to store shop details for the current category_id
    shop_details = []

    for idx, row in cat_data.iterrows():
        username = row['username']
        shopid = row['shop_id']

        URL = f"https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username={username}&version=2&shopid={shopid}"
        response = requests.get(URL, headers=HEADERS)
        time.sleep(random.uniform(3.2, 8.7))

        if response.status_code == 200:
            print(f"Success - {username}: {response.status_code}")
            data = response.json()['data']
            userid = data['userid']
            follower_count = data['follower_count']
            total_avg_star = data['account']['total_avg_star']
            item_count = data['item_count']
            rating_normal = data['shop_rating']['rating_normal']
            rating_bad = data['shop_rating']['rating_bad']
            rating_good = data['shop_rating']['rating_good']
            response_time = data['response_time']
            ctime = data['ctime']

            shop_details.append({
                'category_id': row['category_id'],
                'username': row['username'],
                'brand_name': row['brand_name'],
                'shop_id': row['shop_id'],
                'creation_time': row['creation_time'],
                'shop_logo_url': row['shop_logo_url'],
                'userid': userid,
                'follower_count': follower_count,
                'total_avg_star': total_avg_star,
                'item_count': item_count,
                'rating_normal': rating_normal,
                'rating_bad': rating_bad,
                'rating_good': rating_good,
                'response_time': response_time,
                'ctime': ctime
            })
        else:
            print(f"Error - {username}: {response.status_code} - https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username={username}&version=2&shopid={shopid}")

    # Create a DataFrame for the current category_id
    shop_details_df = pd.DataFrame(shop_details)

    # Save the DataFrame to a CSV file with the desired file name format
    file_name = f"shop_base_category_{cat_id}.csv"
    shop_details_df.to_csv(file_name, index=False)

    print(f"Data for category_id {cat_id} saved to {file_name}")

Success - adidasofficialstore: 200
Error - akisport.net: 403 - https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username=akisport.net&version=2&shopid=432189904
Error - amalife_vietnam: 403 - https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username=amalife_vietnam&version=2&shopid=551582212
Error - _andrealin: 403 - https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username=_andrealin&version=2&shopid=388588353
Error - anta.official: 403 - https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username=anta.official&version=2&shopid=93177350
Error - dominvietnam: 403 - https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username=dominvietnam&version=2&shopid=239345959
Error - aolikessports: 403 - htt

KeyError: 'response_time'

In [None]:
# import requests

# HEADERS = {
#     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
#     "Accept-Language": 'en-US,en;q=0.9',
#     "Accept-Encoding": "gzip, deflate, br, zstd",
#     "Referer": "https://shopee.vn/",
#     "From": "",
#     "af-ac-enc-dat": "",
#     "x-api-source": "pc"
# }

# URL = "https://shopee.vn/api/v4/shop/get_shop_base?entry_point=&need_cancel_rate=true&request_source=shop_home_page&username=adidasofficialstore&version=2&shopid=71009635"

# response = requests.get(URL, headers=HEADERS)
# data = response.json()['data']

# userid = data['userid']
# follower_count = data['follower_count']
# total_avg_star = data['account']['total_avg_star']
# item_count = data['item_count']
# rating_normal = data['shop_rating']['rating_normal']
# rating_bad = data['shop_rating']['rating_bad']
# rating_good = data['shop_rating']['rating_good']
# response_time = data['response_time']
# ctime = data['ctime']

# print(f"userid: {userid}")
# print(f"follower_count: {follower_count}")
# print(f"total_avg_star: {total_avg_star}")
# print(f"item_count: {item_count}")
# print(f"rating_normal: {rating_normal}")
# print(f"rating_bad: {rating_bad}")
# print(f"rating_good: {rating_good}")
# print(f"response_time: {response_time}")
# print(f"ctime: {ctime}")