In [2]:
import pandas as pd
import numpy as np

# 1. LOAD DỮ LIỆU
customer_df = pd.read_csv('customer_rating_clean.csv')
order_df = pd.read_csv('order_info_clean.csv') 
shipper_df = pd.read_csv('shipper_info_clean.csv')

# Merge dữ liệu
order_shipper = pd.merge(order_df, shipper_df, on='shipper_id', how='left')


In [3]:
# 2. PIVOT THỜI GIAN GIAO THEO REGION × SHIPPER
delivery_pivot = order_shipper.pivot_table(
    values='delivery_time_min', 
    index='region', 
    columns='shipper_id', 
    aggfunc='mean',
    fill_value=0
).round(1)
print("Pivot thời gian giao theo region × shipper:")
print(delivery_pivot)

Pivot thời gian giao theo region × shipper:
shipper_id  S101  S102  S103  S105  S107  S111  S116
region                                              
Bắc         32.5  25.0  33.8   0.0   0.0   0.0   0.0
Nam          0.0   0.0   0.0  18.7  17.5   5.0   0.0
Trung        0.0   0.0   0.0   0.0   0.0   0.0  30.7


In [4]:

# 3. PIVOT RATING THEO SHIPPER × THÁNG
order_shipper['month'] = order_shipper['order_id'].str.extract(r'OD(\d{4})')[0].astype(int) % 100
rating_data = pd.merge(customer_df, order_shipper[['order_id', 'shipper_id', 'month']], on='order_id', how='left')

rating_pivot = rating_data.pivot_table(
    values='rating', 
    index='shipper_id', 
    columns='month', 
    aggfunc='mean',
    fill_value=0
).round(1)
print("\nPivot rating theo shipper × tháng:")
print(rating_pivot)



Pivot rating theo shipper × tháng:
month        0    1    2    4    7    8    9    11   12   14  ...   64   66  \
shipper_id                                                    ...             
 S104       0.0  0.0  0.0  0.0  3.3  0.0  0.0  0.0  3.0  0.0  ...  0.0  0.0   
 S106       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  3.0   
 S114       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
S100        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
S101        0.0  0.0  0.0  0.0  0.0  0.0  2.0  0.0  0.0  0.0  ...  0.0  0.0   
S102        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
S103        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
S105        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  5.0  0.0   
S107        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
S108        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
S109        0.0 

In [5]:

# 4. STACK/UNSTACK
stacked = delivery_pivot.stack().reset_index(name='avg_time_min')
print("\nStacked format:")
print(stacked.head(10))

unstacked = stacked.pivot(index='region', columns='shipper_id', values='avg_time_min')
print("\nUnstacked format:")
print(unstacked.head())


Stacked format:
  region shipper_id  avg_time_min
0    Bắc       S101          32.5
1    Bắc       S102          25.0
2    Bắc       S103          33.8
3    Bắc       S105           0.0
4    Bắc       S107           0.0
5    Bắc       S111           0.0
6    Bắc       S116           0.0
7    Nam       S101           0.0
8    Nam       S102           0.0
9    Nam       S103           0.0

Unstacked format:
shipper_id  S101  S102  S103  S105  S107  S111  S116
region                                              
Bắc         32.5  25.0  33.8   0.0   0.0   0.0   0.0
Nam          0.0   0.0   0.0  18.7  17.5   5.0   0.0
Trung        0.0   0.0   0.0   0.0   0.0   0.0  30.7


In [None]:
#NHẬN XÉT SHIPPER CÓ HIỆU SUẤT TỐT NHẤT:
#Shipper s111 S111 dẫn đầu tuyệt đối với tốc độ giao hàng siêu nhanh (chỉ 5 phút trung bình) và đánh giá ổn định, vượt trội toàn diện so với các shipper khác