### LIBRARY

In [40]:
import pandas as pd
import requests
import random
import time

### SET UP THE ENVIRONMENT

In [41]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
    "Accept-Language": 'en-US,en;q=0.9',
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Referer": "https://tiki.vn/",
    "From": "",
    "af-ac-enc-dat": "",
    "x-api-source": "pc"
}

### EXTRACT GROUP DATA

In [42]:
URL = "https://api.tiki.vn/raiden/v2/menu-config?platform=desktop"

In [43]:
response = requests.get(URL, headers=HEADERS)
time.sleep(random.uniform(3.2, 8.7))
if response.status_code == 200:
    data = response.json()
    print(f"Success to fetch {len(data["menu_block"]["items"])} groups.")
else:
    print("Failed to fetch data:", response.status_code)


Success to fetch 26 groups.


### EXTRACT GROUP ID
Extract the group id = 8322, 1846, 1789

In [44]:
group = data["menu_block"]["items"]
group_list = []
for group in group:
    link = group["link"]
    group_id = link.split("/")[-1][1:]
    text = group["text"]
    
    if group_id in ["8322", "1846", "1789"]:
        group_list.append([group_id, text])

group = pd.DataFrame(group_list, columns=["GroupID", "Name"])

In [45]:
group

Unnamed: 0,GroupID,Name
0,8322,Nhà Sách Tiki
1,1789,Điện Thoại - Máy Tính Bảng
2,1846,Laptop - Máy Vi Tính - Linh kiện


### EXTRACT CATEGORY HIERARCHY

In [46]:
master_category_list = []
category_list = []
sub_category_list = []

for group_id in group["GroupID"]:
    parent_url = f"https://tiki.vn/api/v2/categories?parent_id={group_id}"
    parent_response = requests.get(parent_url, headers=HEADERS)
    time.sleep(random.uniform(3.2, 8.7))
    
    if parent_response.status_code == 200:
        parent_data = parent_response.json()
        if not parent_data["data"]:
            master_category_list.append([None, group_id, group[group["GroupID"] == group_id]["Name"].item()])
        else:
            for parent_category in parent_data["data"]:
                parent_id = parent_category["id"]
                parent_name = parent_category["name"]
                master_category_list.append([parent_id, group_id, parent_name])
                
                child_url = f"https://tiki.vn/api/v2/categories?parent_id={parent_id}"
                child_response = requests.get(child_url, headers=HEADERS)
                time.sleep(random.uniform(3.2, 8.7))
                
                if child_response.status_code == 200:
                    child_data = child_response.json()
                    if not child_data["data"]:
                        category_list.append([None, parent_id, None])
                    else:
                        for child_category in child_data["data"]:
                            child_id = child_category["id"]
                            child_name = child_category["name"]
                            category_list.append([child_id, parent_id, child_name])
                            
                            # Fetch type information
                            type_url = f"https://tiki.vn/api/v2/categories?parent_id={child_id}"
                            type_response = requests.get(type_url, headers=HEADERS)
                            time.sleep(random.uniform(3.2, 8.7))
                            
                            if type_response.status_code == 200:
                                type_data = type_response.json()
                                if type_data["data"]:
                                    for type_item in type_data["data"]:
                                        type_id = type_item.get("id")
                                        type_name = type_item.get("name")
                                        sub_category_list.append([type_id, child_id, type_name])
                                else:
                                    sub_category_list.append([None, child_id, None])

master_category = pd.DataFrame(master_category_list, columns=["MasterCategoryID", "GroupID", "Name"])
category = pd.DataFrame(category_list, columns=["CategoryID", "MasterCategoryID", "Name"])
sub_category = pd.DataFrame(sub_category_list, columns=["SubCategoryID", "CategoryID", "Name"])

In [47]:
print(f"Success to fetch {len(master_category)} master categories.")
print(f"Success to fetch {len(category)} categories.")
print(f"Success to fetch {len(sub_category)} sub categories.")

Success to fetch 15 master categories.
Success to fetch 136 categories.
Success to fetch 466 sub categories.
