TITLE HERE!

In [61]:
# Import thư viện cần thiết
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from pathlib import Path
import re


In [62]:
# Đường dẫn thư mục chứa các file parquet
base_path = Path(r"D:\recommendation dataset")

In [63]:
# Tắt cảnh báo không cần thiết
warnings.filterwarnings('ignore')

# Cài đặt style cho biểu đồ
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [64]:
# Tìm tất cả file parquet trong thư mục
all_parquet_files = list(base_path.glob("*.parquet"))
print(f"Tìm thấy {len(all_parquet_files)} file parquet trong thư mục.")

# Phân loại file theo tên
sales_item_files = []
sales_purchase_files = []
sales_user_files = []

for file_path in all_parquet_files:
    name = file_path.name.lower()
    
    if "sales_pers.item" in name:
        sales_item_files.append(file_path)
    elif "sales_pers.purchase" in name or "sales_pers.purchase_history_daily" in name:
        sales_purchase_files.append(file_path)
    elif "sales_pers.user" in name:
        sales_user_files.append(file_path)

# In kết quả phân loại
print(f"""
 Tổng kết:
   sales_pers.item: {len(sales_item_files)} files
   sales_pers.purchase: {len(sales_purchase_files)} files
   sales_pers.user: {len(sales_user_files)} files
""")

# Hàm đọc nhóm file parquet
def read_parquet_group(file_list, group_name):
    if not file_list:
        print(f"Không có file nào cho nhóm {group_name}")
        return None
    
    print(f"Đang đọc {len(file_list)} file(s) cho nhóm {group_name}...")
    
    df = pl.read_parquet(file_list)
    
    print(f"{group_name} loaded: {df.shape[0]:,} rows × {df.shape[1]} columns")
    return df

# Đọc dữ liệu từ các nhóm file
sales_item_df = read_parquet_group(sales_item_files, "SALES ITEM")
sales_purchase_df = read_parquet_group(sales_purchase_files, "SALES PURCHASE")
sales_user_df = read_parquet_group(sales_user_files, "SALES USER")

# Tổng kết dữ liệu đã đọc
print("\nTỔNG KẾT DỮ LIỆU:")
if sales_item_df is not None:
    print(f"sales_item_df: {sales_item_df.shape}")
if sales_purchase_df is not None:
    print(f"sales_purchase_df: {sales_purchase_df.shape}")
if sales_user_df is not None:
    print(f"sales_user_df: {sales_user_df.shape}")

Tìm thấy 83 file parquet trong thư mục.

 Tổng kết:
   sales_pers.item: 1 files
   sales_pers.purchase: 72 files
   sales_pers.user: 10 files

Đang đọc 1 file(s) cho nhóm SALES ITEM...
SALES ITEM loaded: 27,332 rows × 34 columns
Đang đọc 72 file(s) cho nhóm SALES PURCHASE...
SALES PURCHASE loaded: 35,729,825 rows × 16 columns
Đang đọc 10 file(s) cho nhóm SALES USER...
SALES USER loaded: 4,573,964 rows × 18 columns

TỔNG KẾT DỮ LIỆU:
sales_item_df: (27332, 34)
sales_purchase_df: (35729825, 16)
sales_user_df: (4573964, 18)


In [65]:
# Tạo dictionary chứa các DataFrame
datasets = {
    "SALES ITEM": sales_item_df,
    "SALES PURCHASE": sales_purchase_df,
    "SALES USER": sales_user_df
}
# Lặp qua từng nhóm dữ liệu trong dictionary 'datasets'
for name, df in datasets.items():
    
    # Nếu không có dữ liệu (None), bỏ qua nhóm đó
    if df is None:
        continue

    # In tiêu đề nhóm dữ liệu
    print(f"\n[ {name} ]")

    # In kích thước của DataFrame: số dòng và số cột
    print(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns")

    # In danh sách tên các cột
    print("Columns:", df.columns)

    # Kiểm tra số lượng giá trị thiếu (null) trong từng cột
    print("\nMissing values:")
    null_counts = df.null_count().to_pandas().T
    print(null_counts)

    # Mô tả thống kê cho các cột số
    print("\nMô tả dữ liệu số:")
    display(df.describe())


[ SALES ITEM ]
Shape: 27,332 rows × 34 columns
Columns: ['p_id', 'item_id', 'price', 'category_l1_id', 'category_l1', 'category_l2_id', 'category_l2', 'category_l3_id', 'category_l3', 'category_id', 'category', 'description', 'brand', 'manufacturer', 'creation_timestamp', 'is_deleted', 'created_date', 'updated_date', 'sync_status_id', 'last_sync_date', 'sync_error_message', 'image_url', 'gender_target', 'age_group', 'item_type', 'gp', 'weight', 'color', 'size', 'origin', 'volume', 'material', 'sale_status', 'description_new']

Missing values:
                        0
p_id                    0
item_id                 0
price                   0
category_l1_id          0
category_l1             0
category_l2_id          0
category_l2             0
category_l3_id          0
category_l3             0
category_id             0
category                0
description             0
brand                   0
manufacturer            0
creation_timestamp      0
is_deleted              0
created_

statistic,p_id,item_id,price,category_l1_id,category_l1,category_l2_id,category_l2,category_l3_id,category_l3,category_id,category,description,brand,manufacturer,creation_timestamp,is_deleted,created_date,updated_date,sync_status_id,last_sync_date,sync_error_message,image_url,gender_target,age_group,item_type,gp,weight,color,size,origin,volume,material,sale_status,description_new
str,f64,str,f64,f64,str,f64,str,f64,str,f64,str,str,str,str,f64,f64,str,str,f64,str,str,str,str,str,str,f64,f64,str,str,str,str,str,f64,str
"""count""",27332.0,"""27332""",27332.0,27332.0,"""27332""",27332.0,"""27332""",27332.0,"""27332""",27332.0,"""27332""","""27332""","""27332""","""27332""",27332.0,27332.0,"""27332""","""27332""",26546.0,"""26546""","""0""","""27332""","""27332""","""27332""","""27332""",27332.0,0.0,"""27332""","""27332""","""27332""","""27332""","""27332""",27332.0,"""22317"""
"""null_count""",0.0,"""0""",0.0,0.0,"""0""",0.0,"""0""",0.0,"""0""",0.0,"""0""","""0""","""0""","""0""",0.0,0.0,"""0""","""0""",786.0,"""786""","""27332""","""0""","""0""","""0""","""0""",0.0,27332.0,"""0""","""0""","""0""","""0""","""0""",0.0,"""5015"""
"""mean""",106317.679643,,190456.829467,2836.189192,,3719.039843,,4876.768294,,4920.712718,,,,,1622500000.0,0.0,"""2021-05-31 17:17:33.849833""","""2025-08-30 04:07:07.766157""",2.0,"""2025-07-18 18:00:25.086391""",,,,,,49696.651226,,,,,,,0.167203,
"""std""",25425.734073,,511123.971813,1417.521137,,1935.042044,,2035.952741,,1999.308355,,,,,75666000.0,,,,0.0,,,,,,,144155.68753,,,,,,,0.373164,
"""min""",1675.0,"""0000280000019""",0.0,1.0,"""Babycare""",2.0,"""0-1Y""",14.0,"""A2 milk""",4.0,"""0-12M Bodysuit bé trai đùi""","""Với kiểu dáng đơn giản nhưng …","""137 Degrees""","""(L) Địa điểm kinh doanh 1 - Ch…",1298300000.0,0.0,"""2011-02-21 11:54:02.047000""","""2023-12-26 17:21:11.663000""",2.0,"""2025-07-18 17:59:29.898256""",,"""Không xác định""","""Bé Gái""","""0-10M""","""BCS""",0.0,,"""Bạc""","""0 tháng""","""	Singapore, Thụy Sĩ""","""1 lít""","""	Vải không dệt, bông cellulose…",0.0,"""Chi tiết sản phẩm …"
"""25%""",88905.0,,49000.0,2788.0,,2038.0,,3516.0,,3522.0,,,,,1575500000.0,,"""2019-12-04 18:24:16.260000""","""2025-08-18 09:59:19.847000""",2.0,"""2025-07-18 17:59:29.898256""",,,,,,0.0,,,,,,,0.0,
"""50%""",104510.0,,119000.0,3292.0,,3958.0,,5254.0,,5511.0,,,,,1635800000.0,,"""2021-11-01 13:19:26.080000""","""2025-09-26 08:05:23.153000""",2.0,"""2025-07-18 17:59:29.898256""",,,,,,25047.0,,,,,,,0.0,
"""75%""",120262.0,,199000.0,3292.0,,4571.0,,6984.0,,6988.0,,,,,1669900000.0,,"""2022-12-01 15:40:42.260000""","""2025-09-27 00:05:36.233000""",2.0,"""2025-07-18 17:59:29.898256""",,,,,,65532.0,,,,,,,0.0,
"""max""",167147.0,"""7497000000006""",20990000.0,7376.0,"""Đồ chơi & Sách""",7492.0,"""Đồ uống""",7493.0,"""Đồ hộp""",7501.0,"""Động vật mô hình""","""﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿﻿Bỉm …","""Đức Thành""","""﻿Shantou City Chenghai Jia Ron…",1755900000.0,0.0,"""2025-08-22 17:35:14.377000""","""2025-10-01 08:05:42.990000""",2.0,"""2025-07-21 10:59:04.045058""",,"""Không xác định""","""Unisex""","""[""Từ 6M"", ""Từ 9M"", ""Từ 1Y""]""","""Ứng dụng mẹ và bé""",5541360.0,,"""Đỏ""","""XXL(15-25kg) - 26 miếng""","""Ấn Độ, Bulgaria""","""hộp 3 lọ x 250ml""","""﻿100% cotton﻿<br>""",1.0,"""Xin lỗi, nhưng tôi không thể t…"



[ SALES PURCHASE ]
Shape: 35,729,825 rows × 16 columns
Columns: ['timestamp', 'user_id', 'item_id', 'event_type', 'event_value', 'price', 'date_key', 'quantity', 'customer_id', 'created_date', 'updated_date', 'channel', 'payment', 'location', 'discount', 'is_deleted']

Missing values:
              0
timestamp     0
user_id       0
item_id       0
event_type    0
event_value   0
price         0
date_key      0
quantity      0
customer_id   0
created_date  0
updated_date  0
channel       0
payment       0
location      0
discount      0
is_deleted    0

Mô tả dữ liệu số:


statistic,timestamp,user_id,item_id,event_type,event_value,price,date_key,quantity,customer_id,created_date,updated_date,channel,payment,location,discount,is_deleted
str,f64,str,str,str,f64,f64,f64,f64,f64,str,str,str,str,f64,f64,f64
"""count""",35729825.0,"""35729825""","""35729825""","""35729825""",35729825.0,35729825.0,35729825.0,35729825.0,35729825.0,"""35729825""","""35729825""","""35729825""","""35729825""",35729825.0,35729825.0,35729825.0
"""null_count""",0.0,"""0""","""0""","""0""",0.0,0.0,0.0,0.0,0.0,"""0""","""0""","""0""","""0""",0.0,0.0,0.0
"""mean""",1719900000.0,,,,1.508098,169188.742494,20241000.0,1.508098,4871300.0,"""2024-07-02 17:13:24.220955""","""2024-07-02 18:27:59.561504""",,,448.414508,17235.651914,0.0
"""std""",9156800.0,,,,2.264976,195539.88976,346.231465,2.264976,2269700.0,,,,,247.505432,46673.708499,
"""min""",1704100000.0,"""0000063a29e92d643437aa09266fdc…","""0000280000019""","""Purchase""",1.0,0.0444,20240101.0,1.0,14732.0,"""2024-01-01 06:44:59.037000""","""2024-01-01 06:44:59.233000""","""Android""","""Chuyển khoản""",16.0,0.0,0.0
"""25%""",1712000000.0,,,,1.0,48316.3914,20240401.0,1.0,3066410.0,"""2024-04-01 16:44:10.513000""","""2024-04-01 17:15:12.017000""",,,234.0,0.0,
"""50%""",1719900000.0,,,,1.0,87000.0,20240702.0,1.0,5373736.0,"""2024-07-02 19:06:52.293000""","""2024-07-02 19:42:24.550000""",,,439.0,0.0,
"""75%""",1727900000.0,,,,1.0,244000.0,20241002.0,1.0,6853734.0,"""2024-10-02 19:55:22.447000""","""2024-10-02 20:24:53.813000""",,,653.0,15000.0,
"""max""",1735700000.0,"""fffff7e197ebf5c523fa686e6a305c…","""7352000000004""","""Purchase""",240.0,20990000.0,20241231.0,240.0,8208027.0,"""2024-12-31 22:35:19.510000""","""2025-09-27 21:25:22.960000""","""iOS""","""ZaloPay""",1036.0,7995000.0,0.0



[ SALES USER ]
Shape: 4,573,964 rows × 18 columns
Columns: ['customer_id', 'gender', 'location', 'province', 'membership', 'timestamp', 'created_date', 'updated_date', 'sync_status_id', 'last_sync_date', 'sync_error_message', 'region', 'location_name', 'install_app', 'install_date', 'district', 'user_id', 'is_deleted']

Missing values:
                          0
customer_id               0
gender                    0
location                  0
province                  0
membership                0
timestamp                 0
created_date              0
updated_date              0
sync_status_id       338285
last_sync_date       338285
sync_error_message  4573964
region                    0
location_name             0
install_app               0
install_date              0
district                  0
user_id                   0
is_deleted                0

Mô tả dữ liệu số:


statistic,customer_id,gender,location,province,membership,timestamp,created_date,updated_date,sync_status_id,last_sync_date,sync_error_message,region,location_name,install_app,install_date,district,user_id,is_deleted
str,f64,str,f64,str,str,f64,str,str,f64,str,str,str,str,str,f64,str,str,f64
"""count""",4573964.0,"""4573964""",4573964.0,"""4573964""","""4573964""",4573964.0,"""4573964""","""4573964""",4235679.0,"""4235679""","""0""","""4573964""","""4573964""","""4573964""",4573964.0,"""4573964""","""4573964""",4573964.0
"""null_count""",0.0,"""0""",0.0,"""0""","""0""",0.0,"""0""","""0""",338285.0,"""338285""","""4573964""","""0""","""0""","""0""",0.0,"""0""","""0""",0.0
"""mean""",6126600.0,,554.053518,,,1671200000.0,"""2022-12-16 22:48:44.025646""","""2025-06-11 05:52:40.770554""",2.0,"""2025-07-16 15:20:34.188640""",,,,,1684100000.0,,,0.0
"""std""",2441200.0,,295.523361,,,79743000.0,,,0.0,,,,,,71374000.0,,,
"""min""",14732.0,"""Khác""",42.0,"""An Giang""","""Diamond""",1306400000.0,"""2011-05-25 21:11:51.677000""","""2019-06-30 08:38:47.100000""",2.0,"""2025-07-16 11:54:29.816986""",,"""Bắc Trung Bộ""","""AGI - 110 Quốc Lộ 91""","""Android""",1306300000.0,""" Vũng Liêm""","""000004697bc1f0ecab42b6a3c2f0bc…",0.0
"""25%""",4532680.0,,302.0,,,1634300000.0,"""2021-10-15 10:09:34.280000""","""2025-07-07 15:33:10.201316""",2.0,"""2025-07-16 11:58:58.022193""",,,,,1655000000.0,,,
"""50%""",6843955.0,,547.0,,,1696000000.0,"""2023-09-29 16:33:25.217000""","""2025-07-07 15:33:10.201316""",2.0,"""2025-07-16 12:03:34.550186""",,,,,1705400000.0,,,
"""75%""",8029649.0,,765.0,,,1731200000.0,"""2024-11-09 14:38:22.223000""","""2025-08-02 10:54:03.860000""",2.0,"""2025-07-16 12:11:53.888687""",,,,,1736600000.0,,,
"""max""",9264159.0,"""Nữ""",1310.0,"""Đồng Tháp""","""Standard""",1759300000.0,"""2025-09-30 22:00:36.623000""","""2025-09-30 23:22:08.957000""",2.0,"""2025-07-21 11:19:29.058389""",,"""Đồng bằng sông Cửu Long""","""ĐTH - Trần Hưng Đạo""","""iOS""",1759200000.0,"""Ứng Hòa""","""fffffbd469d500229dd000e9fc48ed…",0.0


TASK 1:

In [74]:
# LOẠI BỎ CÁC CỘT KHÔNG CẦN THIẾT

# Danh sách các cột cần loại bỏ
cols_to_drop = [
    # SALES ITEM
    "is_deleted",
    "last_sync_date",
    "sync_error_message",
    "image_url",
    "description_new",
    "weight",
    "sync_status_id",
    "p_id",
    "item_id",
    "category_l1_id",
    "category_l2_id",
    "category_l3_id",
    "category_id",
    "description",
    "manufacturer",
    "created_date",
    "updated_date",

    # SALES PURCHASE
    "is_deleted",
    "event_type",
    "user_id",
    "item_id",
    "customer_id",
    "created_date",
    "updated_date",

    # SALES USER
    "is_deleted",
    "sync_status_id",
    "last_sync_date",
    "sync_error_message",
    "customer_id",
    "user_id",
    "created_date",
    "updated_date"

]

print("\nBẮT ĐẦU LOẠI BỎ CÁC CỘT KHÔNG CẦN THIẾT...\n")

# Lặp qua từng DataFrame trong datasets
for name, df in datasets.items():
    if df is None:
        continue

    print(f"\n>>> {name}")
    existing_cols = [c for c in cols_to_drop if c in df.columns]

    if existing_cols:
        print(f" - Các cột sẽ bị loại bỏ: {existing_cols}")
        df = df.drop(existing_cols)
        datasets[name] = df  # Cập nhật lại DataFrame sau khi loại bỏ
    else:
        print(" - Không có cột nào trong danh sách cần loại bỏ.")

    print(f" - Kích thước mới: {df.shape[0]:,} rows × {df.shape[1]} columns")

print("\nHOÀN TẤT VIỆC LOẠI BỎ CÁC CỘT KHÔNG CẦN THIẾT.")



BẮT ĐẦU LOẠI BỎ CÁC CỘT KHÔNG CẦN THIẾT...


>>> SALES ITEM
 - Các cột sẽ bị loại bỏ: ['description', 'manufacturer', 'created_date', 'updated_date', 'created_date', 'updated_date', 'created_date', 'updated_date']
 - Kích thước mới: 27,332 rows × 17 columns

>>> SALES PURCHASE
 - Các cột sẽ bị loại bỏ: ['created_date', 'updated_date', 'created_date', 'updated_date', 'created_date', 'updated_date']
 - Kích thước mới: 35,729,825 rows × 9 columns

>>> SALES USER
 - Các cột sẽ bị loại bỏ: ['created_date', 'updated_date', 'created_date', 'updated_date', 'created_date', 'updated_date']
 - Kích thước mới: 4,573,964 rows × 10 columns

HOÀN TẤT VIỆC LOẠI BỎ CÁC CỘT KHÔNG CẦN THIẾT.


TASK 2.1: NULL

TASK 2.2: OUTLIER


>>> Đang xử lý nhóm SALES ITEM
 - category_l1: null -> Khác
 - category_l2: null -> Khác
 - category_l3: null -> Khác
 - category: null -> Khác
 - description: null -> Không xác định
 - brand: null -> Không rõ
 - manufacturer: null -> Không rõ
 - created_date: null -> 1970-01-01
 - gender_target: null -> Unisex
 - age_group: null -> Không xác định
 - item_type: null -> Khác
 - color: null -> Không xác định
 - size: null -> Free size
 - origin: null -> Không rõ
 - volume: null -> Không xác định
 - material: null -> Không rõ

Null count sau khi điền:
                    0
price               0
category_l1         0
category_l2         0
category_l3         0
category            0
description         0
brand               0
manufacturer        0
creation_timestamp  0
created_date        0
updated_date        0
gender_target       0
age_group           0
item_type           0
gp                  0
color               0
size                0
origin              0
volume              0
mate

TASK 3:

TASK 4:

TASK 5:

TEST
IN RA DU LIEU

In [75]:
# In ra Test
# Lặp qua từng nhóm dữ liệu trong dictionary 'datasets'
for name, df in datasets.items():
    
    # Nếu không có dữ liệu (None), bỏ qua nhóm đó
    if df is None:
        continue

    # In tiêu đề nhóm dữ liệu
    print(f"\n[ {name} ]")

    # In kích thước của DataFrame: số dòng và số cột
    print(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns")

    # In danh sách tên các cột
    print("Columns:", df.columns)

    # Kiểm tra số lượng giá trị thiếu (null) trong từng cột
    print("\nMissing values:")
    null_counts = df.null_count().to_pandas().T
    print(null_counts)

    # Mô tả thống kê cho các cột số
    print("\nMô tả dữ liệu số:")
    display(df.describe())


[ SALES ITEM ]
Shape: 27,332 rows × 17 columns
Columns: ['price', 'category_l1', 'category_l2', 'category_l3', 'category', 'brand', 'creation_timestamp', 'gender_target', 'age_group', 'item_type', 'gp', 'color', 'size', 'origin', 'volume', 'material', 'sale_status']

Missing values:
                    0
price               0
category_l1         0
category_l2         0
category_l3         0
category            0
brand               0
creation_timestamp  0
gender_target       0
age_group           0
item_type           0
gp                  0
color               0
size                0
origin              0
volume              0
material            0
sale_status         0

Mô tả dữ liệu số:


statistic,price,category_l1,category_l2,category_l3,category,brand,creation_timestamp,gender_target,age_group,item_type,gp,color,size,origin,volume,material,sale_status
str,f64,str,str,str,str,str,f64,str,str,str,f64,str,str,str,str,str,f64
"""count""",27332.0,"""27332""","""27332""","""27332""","""27332""","""27332""",27332.0,"""27332""","""27332""","""27332""",27332.0,"""27332""","""27332""","""27332""","""27332""","""27332""",27332.0
"""null_count""",0.0,"""0""","""0""","""0""","""0""","""0""",0.0,"""0""","""0""","""0""",0.0,"""0""","""0""","""0""","""0""","""0""",0.0
"""mean""",190456.829467,,,,,,1622500000.0,,,,49696.651226,,,,,,0.167203
"""std""",511123.971813,,,,,,75666000.0,,,,144155.68753,,,,,,0.373164
"""min""",0.0,"""Babycare""","""0-1Y""","""A2 milk""","""0-12M Bodysuit bé trai đùi""","""137 Degrees""",1298300000.0,"""Bé Gái""","""0-10M""","""BCS""",0.0,"""Bạc""","""0 tháng""","""	Singapore, Thụy Sĩ""","""1 lít""","""	Vải không dệt, bông cellulose…",0.0
"""25%""",49000.0,,,,,,1575500000.0,,,,0.0,,,,,,0.0
"""50%""",119000.0,,,,,,1635800000.0,,,,25047.0,,,,,,0.0
"""75%""",199000.0,,,,,,1669900000.0,,,,65532.0,,,,,,0.0
"""max""",20990000.0,"""Đồ chơi & Sách""","""Đồ uống""","""Đồ hộp""","""Động vật mô hình""","""Đức Thành""",1755900000.0,"""Unisex""","""[""Từ 6M"", ""Từ 9M"", ""Từ 1Y""]""","""Ứng dụng mẹ và bé""",5541360.0,"""Đỏ""","""XXL(15-25kg) - 26 miếng""","""Ấn Độ, Bulgaria""","""hộp 3 lọ x 250ml""","""﻿100% cotton﻿<br>""",1.0



[ SALES PURCHASE ]
Shape: 35,729,825 rows × 9 columns
Columns: ['timestamp', 'event_value', 'price', 'date_key', 'quantity', 'channel', 'payment', 'location', 'discount']

Missing values:
             0
timestamp    0
event_value  0
price        0
date_key     0
quantity     0
channel      0
payment      0
location     0
discount     0

Mô tả dữ liệu số:


statistic,timestamp,event_value,price,date_key,quantity,channel,payment,location,discount
str,f64,f64,f64,f64,f64,str,str,f64,f64
"""count""",35729825.0,35729825.0,35729825.0,35729825.0,35729825.0,"""35729825""","""35729825""",35729825.0,35729825.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,"""0""","""0""",0.0,0.0
"""mean""",1719900000.0,1.508098,169188.742494,20241000.0,1.508098,,,448.414508,17235.651914
"""std""",9156800.0,2.264976,195539.88976,346.231465,2.264976,,,247.505432,46673.708499
"""min""",1704100000.0,1.0,0.0444,20240101.0,1.0,"""Android""","""Chuyển khoản""",16.0,0.0
"""25%""",1712000000.0,1.0,48316.3914,20240401.0,1.0,,,234.0,0.0
"""50%""",1719900000.0,1.0,87000.0,20240702.0,1.0,,,439.0,0.0
"""75%""",1727900000.0,1.0,244000.0,20241002.0,1.0,,,653.0,15000.0
"""max""",1735700000.0,240.0,20990000.0,20241231.0,240.0,"""iOS""","""ZaloPay""",1036.0,7995000.0



[ SALES USER ]
Shape: 4,573,964 rows × 10 columns
Columns: ['gender', 'location', 'province', 'membership', 'timestamp', 'region', 'location_name', 'install_app', 'install_date', 'district']

Missing values:
               0
gender         0
location       0
province       0
membership     0
timestamp      0
region         0
location_name  0
install_app    0
install_date   0
district       0

Mô tả dữ liệu số:


statistic,gender,location,province,membership,timestamp,region,location_name,install_app,install_date,district
str,str,f64,str,str,f64,str,str,str,f64,str
"""count""","""4573964""",4573964.0,"""4573964""","""4573964""",4573964.0,"""4573964""","""4573964""","""4573964""",4573964.0,"""4573964"""
"""null_count""","""0""",0.0,"""0""","""0""",0.0,"""0""","""0""","""0""",0.0,"""0"""
"""mean""",,554.053518,,,1671200000.0,,,,1684100000.0,
"""std""",,295.523361,,,79743000.0,,,,71374000.0,
"""min""","""Khác""",42.0,"""An Giang""","""Diamond""",1306400000.0,"""Bắc Trung Bộ""","""AGI - 110 Quốc Lộ 91""","""Android""",1306300000.0,""" Vũng Liêm"""
"""25%""",,302.0,,,1634300000.0,,,,1655000000.0,
"""50%""",,547.0,,,1696000000.0,,,,1705400000.0,
"""75%""",,765.0,,,1731200000.0,,,,1736600000.0,
"""max""","""Nữ""",1310.0,"""Đồng Tháp""","""Standard""",1759300000.0,"""Đồng bằng sông Cửu Long""","""ĐTH - Trần Hưng Đạo""","""iOS""",1759200000.0,"""Ứng Hòa"""
