In [None]:
import pandas as pd
import requests
import random
import time
import matplotlib.pyplot as plt

In [None]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
    "Accept-Language": 'en-US,en;q=0.9',
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Referer": "https://tiki.vn/",
    "From": "",
    "af-ac-enc-dat": "",
    "x-api-source": "pc"
}

### EXTRACT SUB CATEGORY

In [None]:
URL = "https://api.tiki.vn/raiden/v2/menu-config?platform=desktop"
response = requests.get(URL, headers=HEADERS)
time.sleep(random.uniform(3.2, 8.7))
if response.status_code == 200:
    data = response.json()
else:
    print("Failed to fetch data:", response.status_code)
    
group = data["menu_block"]["items"]
group_list = []
for group in group:
    link = group["link"]
    group_id = link.split("/")[-1][1:]
    text = group["text"]
    
    if group_id in ["1846", "1789"]:
        group_list.append([group_id, text])

group = pd.DataFrame(group_list, columns=["GroupID", "Name"])

In [None]:
category_list = []
for group_id, group_name in zip(group["GroupID"], group["Name"]):
    parent_url = f"https://tiki.vn/api/v2/categories?parent_id={group_id}"
    parent_response = requests.get(parent_url, headers=HEADERS)
    time.sleep(random.uniform(3.2, 8.7))
    if parent_response.status_code == 200:
        parent_data = parent_response.json()
        if not parent_data["data"]:
            category_list.append([group_id, group_name, None, None, None, None, None, None])
        else:
            for parent_category in parent_data["data"]:
                parent_id = parent_category["id"]
                parent_name = parent_category["name"]
                
                child_url = f"https://tiki.vn/api/v2/categories?parent_id={parent_id}"
                child_response = requests.get(child_url, headers=HEADERS)
                time.sleep(random.uniform(3.2, 8.7))
                
                if child_response.status_code == 200:
                    child_data = child_response.json()
                    if not child_data["data"]:
                        category_list.append([group_id, group_name, parent_id, parent_name, None, None, None, None])
                    else:
                        for child_category in child_data["data"]:
                            child_id = child_category["id"]
                            child_name = child_category["name"]
                            
                            type_url = f"https://tiki.vn/api/v2/categories?parent_id={child_id}"
                            type_response = requests.get(type_url, headers=HEADERS)
                            time.sleep(random.uniform(3.2, 8.7))
                            
                            if type_response.status_code == 200:
                                type_data = type_response.json()
                                if type_data["data"]:
                                    for type_item in type_data["data"]:
                                        type_id = type_item.get("id")
                                        type_name = type_item.get("name")
                                        category_list.append([group_id, group_name, parent_id, parent_name, child_id, child_name, type_id, type_name])
                                else:
                                    category_list.append([group_id, group_name, parent_id, parent_name, child_id, child_name, None, None])

category_df = pd.DataFrame(category_list, columns=["GroupID", "GroupName", "MasterCategoryID", "MasterCategoryName", "CategoryID", "CategoryName", "SubCategoryID", "SubCategoryName"])

In [None]:
def remove_single_category(df):
    # Group the DataFrame by MasterCategoryID and count the unique CategoryID values
    category_counts = df.groupby('MasterCategoryID')['CategoryID'].nunique()
    
    # Create a mask for MasterCategoryIDs with only one CategoryID
    single_category_mask = category_counts == 1
    
    # Drop the CategoryID and CategoryName columns for rows where there is only one CategoryID
    df.loc[df['MasterCategoryID'].isin(single_category_mask[single_category_mask].index), ['CategoryID', 'CategoryName']] = [None, None]
    
    return df
  
cleaned_df = remove_single_category(category_df)

def transform_category(row):
    # Handle MasterCategory and Category
    if pd.isna(row['CategoryID']):
        master_category_id = row['MasterCategoryID']
        master_category_name = row['MasterCategoryName']
        category_id = row['MasterCategoryID']
        category_name = row['MasterCategoryName']
        is_category = 0
    else:
        master_category_id = row['MasterCategoryID']
        master_category_name = row['MasterCategoryName']
        category_id = row['CategoryID']
        category_name = row['CategoryName']
        is_category = 1

    # Handle SubCategory
    if pd.isna(row['SubCategoryID']):
        sub_category_id = category_id
        sub_category_name = category_name
        is_sub_category = 0
    else:
        sub_category_id = row['SubCategoryID']
        sub_category_name = row['SubCategoryName']
        is_sub_category = 1

    return pd.Series([
        master_category_id, master_category_name, category_id, category_name,
        is_category, sub_category_id, sub_category_name, is_sub_category
    ])
    
category_df[['MasterCategoryID', 'MasterCategoryName', 'CategoryID', 'CategoryName',
             'isCategory', 'SubCategoryID', 'SubCategoryName', 'isSubCategory']] = category_df.apply(transform_category, axis=1, result_type='expand')

In [None]:
category_df["GroupID"] = category_df["GroupID"].astype(int)
category_df["MasterCategoryID"] = category_df["MasterCategoryID"].astype(int)
category_df["CategoryID"] = category_df["CategoryID"].astype(int)
category_df["SubCategoryID"] = category_df["SubCategoryID"].astype(int)


master_category = category_df[["MasterCategoryID", "GroupID", "MasterCategoryName"]].drop_duplicates()
master_category = master_category.rename(columns={"MasterCategoryName": "Name"})

category = category_df[["CategoryID", "MasterCategoryID", "CategoryName", "isCategory"]].drop_duplicates()
category = category.rename(columns={"CategoryName": "Name"})

sub_category = category_df[["SubCategoryID", "CategoryID", "SubCategoryName", "isSubCategory"]].drop_duplicates()
sub_category = sub_category.rename(columns={"SubCategoryName": "Name"})

In [None]:
def retrieve_product_ids(sub_category_id):
    base_url = "https://tiki.vn/api/personalish/v1/blocks/listings"
    PARAMS = {"category": sub_category_id, "page": 1}
    response = requests.get(base_url, headers=HEADERS, params=PARAMS)
    time.sleep(random.uniform(3.2, 8.7))
    data = response.json()
    total_page = data["paging"]["last_page"]

    # Fetch data from each page
    product_data = []
    for page in range(1, total_page + 1):
        PARAMS = {"category": sub_category_id, "page": page}
        response = requests.get(base_url, headers=HEADERS, params=PARAMS)
        time.sleep(random.uniform(3.2, 8.7))
        data = response.json()
        for item in data["data"]:
            product_id = item["id"]
            brand_name = item.get("brand_name", None)  # Use .get() with a default value in case brand_name is missing
            product_data.append({"product_id": product_id, "brand_name": brand_name})

    return product_data

product_df = []

for sub_category_id in sub_category["SubCategoryID"]:
    product_data = retrieve_product_ids(sub_category_id)
    for product in product_data:
        product_df.append([sub_category_id, product["product_id"], product["brand_name"]])
    
product_df = pd.DataFrame(product_df, columns=["SubCategoryID", "ProductID", "BrandName"])

In [None]:
# # Save it to CSV
product_df = pd.read_csv("data/product_id.csv")

In [None]:
# Count the number of ProductID by BrandName
brand_counts = product_df["BrandName"].value_counts()
brand_counts.head(10)

In [None]:
# brands = ['Apple', 'Samsung', 'Dell', 'HP']
brands = ['Apple']
brand_counts = brand_counts[brands]

fig, ax = plt.subplots(figsize=(8, 6))
brand_counts.plot(kind='bar', ax=ax)

for i, v in enumerate(brand_counts):
    ax.text(i, v, str(v), color='black', fontweight='bold', horizontalalignment='center', verticalalignment='bottom')

plt.title('Number of Products by Brand')
plt.xlabel('Brand')
plt.ylabel('Number of Products')

plt.xticks(range(len(brands)), brands, rotation=0)

plt.tight_layout()
plt.show()

In [None]:
product_df = product_df[product_df['BrandName'].isin(brands)]
product_df

In [None]:
product_df.to_csv("temp.csv", index=False)

### RETRIEVE PRODUCT DATA

In [None]:
# filtered_df = pd.read_csv("data/product_id.csv")

In [10]:
product_data_list = []

for _, row in product_df.iterrows():
    sub_category_id = row['SubCategoryID']
    product_id = row['ProductID']

    URL = f"https://tiki.vn/api/v2/products/{product_id}"
    PARAMS = {}

    response = requests.get(URL, headers=HEADERS, params=PARAMS)
    time.sleep(random.uniform(3.2, 4.7))
    
    data = response.json()

    product_data = {
        'product_id': data['id'],
        'product_name': data.get('name', None),
        'product_url': data.get('short_url', None),
        'pricing_current': data.get('price', None),
        'pricing_original': data.get('original_price', None),
        'product_image_url': data.get('thumbnail_url', None),
        'inventory_status': data.get('inventory_status', None),
        'inventory_type': data.get('inventory_type', None),
        'created_date': data.get('day_ago_created', None),
        'quantity_sold': data.get('all_time_quantity_sold', None),
        'brand_id': data.get('brand', {}).get('id', None),
        'brand_name': data.get('brand', {}).get('name', None),
        'brand_slug': data.get('brand', {}).get('slug', None),
        'seller_id': data.get('current_seller', {}).get('id', 0) if data.get('current_seller') else 0,
        'seller_name': data.get('current_seller', {}).get('name', 0) if data.get('current_seller') else 0,
        'seller_link': data.get('current_seller', {}).get('link', 0) if data.get('current_seller') else 0,
        'seller_image_url': data.get('current_seller', {}).get('logo', 0) if data.get('current_seller') else 0,
        'category_id': data['categories']['id'] if data['categories']['is_leaf'] else data['breadcrumbs'][-2]['category_id'],
        'sub_category_id': sub_category_id,
        'brand_name': row['BrandName']
    }

    product_data_list.append(product_data)

print(f"Success fetching data for {len(product_data_list)} products")
product_df = pd.DataFrame(product_data_list, columns=['product_id', 'product_name', 'product_url', 'pricing_current', 'pricing_original', 'product_image_url', 'inventory_status', 'inventory_type', 'created_date', 'quantity_sold', 'brand_id', 'brand_name', 'brand_slug', 'seller_id', 'seller_name', 'seller_link', 'seller_image_url', 'category_id', 'sub_category_id', 'brand_name'])
product_df.to_csv("product_data_1.csv", index=False, encoding='utf-8-sig')
product_df.head(10)

Success fetching data for 15 products


Unnamed: 0,product_id,product_name,product_url,pricing_current,pricing_original,product_image_url,inventory_status,inventory_type,created_date,quantity_sold,brand_id,brand_name,brand_slug,seller_id,seller_name,seller_link,seller_image_url,category_id,sub_category_id,brand_name.1
0,184036446,Apple iPhone 11,https://tiki.vn/product-p184036446.html?spid=3...,8650000,12990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,707,13990,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,21/ce/5c/b52d0b8576680dc3666474ae31b091ec.jpg,1795,1795,Apple
1,184059211,Apple iPhone 13,https://tiki.vn/product-p184059211.html?spid=1...,13490000,24990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,707,5835,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,21/ce/5c/b52d0b8576680dc3666474ae31b091ec.jpg,1795,1795,Apple
2,123345348,Apple iPhone 12,https://tiki.vn/product-p123345348.html?spid=7...,11990000,16990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,965,2811,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,21/ce/5c/b52d0b8576680dc3666474ae31b091ec.jpg,1795,1795,Apple
3,197214029,Apple iPhone 14,https://tiki.vn/product-p197214029.html?spid=1...,16490000,24990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,606,586,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,21/ce/5c/b52d0b8576680dc3666474ae31b091ec.jpg,1795,1795,Apple
4,271966786,Apple iPhone 15,https://tiki.vn/product-p271966786.html?spid=2...,18990000,22490000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,236,422,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,https://vcdn.tikicdn.com/ts/seller/21/ce/5c/b5...,1795,1795,Apple
5,197214015,Apple iPhone 14 Plus,https://tiki.vn/product-p197214015.html?spid=1...,19490000,27990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,606,387,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,21/ce/5c/b52d0b8576680dc3666474ae31b091ec.jpg,1795,1795,Apple
6,271973414,Apple iPhone 15 Pro Max,https://tiki.vn/product-p271973414.html?spid=2...,29490000,33490000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,236,825,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,https://vcdn.tikicdn.com/ts/seller/21/ce/5c/b5...,1795,1795,Apple
7,271967379,Apple iPhone 15 Plus,https://tiki.vn/product-p271967379.html?spid=2...,21990000,25990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,236,196,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,https://vcdn.tikicdn.com/ts/seller/21/ce/5c/b5...,1795,1795,Apple
8,271972435,Apple iPhone 15 Pro,https://tiki.vn/product-p271972435.html?spid=2...,24490000,27990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,236,344,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,https://vcdn.tikicdn.com/ts/seller/21/ce/5c/b5...,1795,1795,Apple
9,183330021,"Apple iPad 10.2-inch (9th Gen) Wi-Fi, 2021",https://tiki.vn/product-p183330021.html?spid=1...,6990000,10990000,https://salt.tikicdn.com/cache/280x280/ts/prod...,available,instock,711,3378,17827,Apple,apple,1,Tiki Trading,https://tiki.vn/cua-hang/tiki-trading,21/ce/5c/b52d0b8576680dc3666474ae31b091ec.jpg,1794,1794,Apple


### RETRIEVE FEEDBACK DATA

In [12]:
feedback_data_list = []

for _, row in product_df.iterrows():
    sub_category_id = row['sub_category_id']
    product_id = row['product_id']

    URL = "https://tiki.vn/api/v2/reviews"
    PARAMS = {"limit": 20, "spid": sub_category_id, "product_id": product_id}

    # Get the total number of pages
    response = requests.get(URL, headers=HEADERS, params=PARAMS)
    data = response.json()
    total_pages = data.get("paging", {}).get("last_page", 1)

    # Fetch data from each page
    for page in range(1, total_pages + 1):
        PARAMS["page"] = page
        response = requests.get(URL, headers=HEADERS, params=PARAMS)
        time.sleep(random.uniform(3.2, 4.7))
        data = response.json()

        stars = data.get("stars", {})
        OneStarCount = stars.get("1", {}).get("count", 0)
        TwoStarCount = stars.get("2", {}).get("count", 0)
        ThreeStarCount = stars.get("3", {}).get("count", 0)
        FourStarCount = stars.get("4", {}).get("count", 0)
        FiveStarCount = stars.get("5", {}).get("count", 0)
        reviews_count = data.get("reviews_count", 0)
        review_data = data.get("data", [])

        for review in review_data:
            review_id = review.get("id")
            review_title = review.get("title")
            review_content = review.get("content")
            review_upvote = review.get("thank_count", 0)
            review_rating = review.get("rating")
            review_created_at = review.get("created_at")
            reviewer = review.get("created_by", {})
            user_id = reviewer.get("id")
            username = reviewer.get("name")
            joined_time = reviewer.get("created_time")
            total_reviews = reviewer.get("contribute_info", {}).get("summary", {}).get("total_review", 0)
            total_upvotes = reviewer.get("contribute_info", {}).get("summary", {}).get("total_thank", 0)

            feedback_data_list.append([OneStarCount, TwoStarCount, ThreeStarCount, FourStarCount, FiveStarCount, reviews_count, review_id, review_title, review_content, review_upvote, review_rating, review_created_at, user_id, username, joined_time, total_reviews, total_upvotes])


print(f"Success fetching data for {len(feedback_data_list)} feedbacks")
feedback_df = pd.DataFrame(feedback_data_list, columns=["OneStarCount", "TwoStarCount", "ThreeStarCount", "FourStarCount", "FiveStarCount", "reviews_count", "review_id", "review_title", "review_content", "review_upvote", "review_rating", "review_created_at", "user_id", "username", "joined_time", "total_reviews", "total_upvotes"])
feedback_df.to_csv("feedback_data_1.csv", index=False, encoding='utf-8-sig')
feedback_df.head(10)

Success fetching data for 9099 feedbacks


Unnamed: 0,OneStarCount,TwoStarCount,ThreeStarCount,FourStarCount,FiveStarCount,reviews_count,review_id,review_title,review_content,review_upvote,review_rating,review_created_at,user_id,username,joined_time,total_reviews,total_upvotes
0,61,13,32,183,2700,2989,19708302,Cực kì hài lòng,Đợt này mua tặng 2 cây trắng đen nhưng quên ch...,3,5,1704256818,5178168,Đại Hồng Bào,2017-04-10 14:05:29,273,369
1,61,13,32,183,2700,2989,19708307,Cực kì hài lòng,Đợt này mua tặng 2 cây trắng (giao từ Hanoi) đ...,2,5,1704256916,5178168,Đại Hồng Bào,2017-04-10 14:05:29,273,369
2,61,13,32,183,2700,2989,17356319,Cực kì hài lòng,"5* cho máy còn nguyên seal, hàng chính hãng VN...",184,5,1660043992,945720,muasach,2015-11-07 17:41:14,154,234
3,61,13,32,183,2700,2989,19694107,Rất không hài lòng,"Mua xài mới có 1 ngày mà đã lỗi, có cuộc gọi t...",1,1,1703434010,15052027,TRAN THUY,2019-11-20 17:08:29,39,1
4,61,13,32,183,2700,2989,18794713,Cực kì hài lòng,Chất lượng sản phẩm của Shop rất tốt đẹp full ...,20,5,1676287655,28830179,Minh Phương,2023-02-13 13:39:52,1,20
5,61,13,32,183,2700,2989,19563486,Cực kì hài lòng,Mình mua trả góp (chi tiết hình ảnh đính kèm)\...,3,5,1696170522,28677471,ANH DUY,2023-01-02 20:58:04,3,3
6,61,13,32,183,2700,2989,18695745,Cực kì hài lòng,Tuyệt vời\r\n\r\nApple đã chính thức ra mắt bộ...,3,5,1674967848,688542,Len Nguyen,2015-07-05 12:45:09,5,3
7,61,13,32,183,2700,2989,14487454,Cực kì hài lòng,Hiện tôi đang bị màn hình có kẻ sọc và chuyển ...,150,5,1642298876,10604241,Hang Bui,2019-01-29 22:50:45,14,152
8,61,13,32,183,2700,2989,18244942,Cực kì hài lòng,Tiki giao hàng sp là ip rất tốt. Chế độ bảo mậ...,1,5,1668857673,6954401,Phuong Nguyen,2018-04-18 11:09:08,4,1
9,61,13,32,183,2700,2989,14444430,Hài lòng,"Mình mua sp trên tiki giá trị cao khá nhiều, n...",10,4,1642128504,1063872,Nguyễn Văn Triều,2015-12-20 08:23:48,51,43
