# Library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from model import *
from utils import *

# Data

In [2]:
df = pd.read_csv(
    r'../../data/cleaned/clean3.csv',
    encoding='utf-8',
)

In [3]:
df.head()

Unnamed: 0,price,availability,number_of_products_sold,revenue_generated,stock_levels,lead_times,order_quantities,shipping_times,shipping_costs,lead_time,...,product_type,sku,customer_demographics,shipping_carriers,location,inspection_results,transportation_modes,routes,shipping_times_category,supplier_name
0,69.808006,55,802,8661.996792,58,7,96,4,2.956572,29,...,haircare,SKU0,Non-binary,Carrier B,Mumbai,Pending,Road,Route B,Standard,Supplier 3
1,14.843523,95,736,7460.900065,53,30,37,2,9.716575,23,...,skincare,SKU1,Female,Carrier A,Mumbai,Pending,Road,Route B,Express,Supplier 3
2,11.319683,34,8,9577.749626,1,10,88,2,8.054479,12,...,haircare,SKU2,Unknown,Carrier B,Mumbai,Pending,Air,Route C,Express,Supplier 1
3,61.163343,68,83,7766.836426,23,13,59,6,1.729569,24,...,skincare,SKU3,Non-binary,Carrier C,Kolkata,Fail,Rail,Route A,Standard,Supplier 5
4,4.805496,26,871,2686.505152,5,3,56,8,3.890548,5,...,skincare,SKU4,Non-binary,Carrier A,Delhi,Fail,Air,Route A,Economy,Supplier 1


# Load metadata

In [4]:
df = load_metadata(
    df,
    r"../../data/metadata/metadata3.json",
)

# Overview

In [5]:
print(f"Num of varibles:", df.shape[1])
print(f"Num of rows:", df.shape[0])

Num of varibles: 25
Num of rows: 100


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 25 columns):
 #   Column                   Non-Null Count  Dtype   
---  ------                   --------------  -----   
 0   price                    100 non-null    float64 
 1   availability             100 non-null    int64   
 2   number_of_products_sold  100 non-null    int64   
 3   revenue_generated        100 non-null    float64 
 4   stock_levels             100 non-null    int64   
 5   lead_times               100 non-null    int64   
 6   order_quantities         100 non-null    int64   
 7   shipping_times           100 non-null    int64   
 8   shipping_costs           100 non-null    float64 
 9   lead_time                100 non-null    int64   
 10  production_volumes       100 non-null    int64   
 11  manufacturing_lead_time  100 non-null    int64   
 12  manufacturing_costs      100 non-null    float64 
 13  defect_rates             100 non-null    float64 
 14  costs      

In [7]:
df.isnull().sum()

price                      0
availability               0
number_of_products_sold    0
revenue_generated          0
stock_levels               0
lead_times                 0
order_quantities           0
shipping_times             0
shipping_costs             0
lead_time                  0
production_volumes         0
manufacturing_lead_time    0
manufacturing_costs        0
defect_rates               0
costs                      0
product_type               0
sku                        0
customer_demographics      0
shipping_carriers          0
location                   0
inspection_results         0
transportation_modes       0
routes                     0
shipping_times_category    0
supplier_name              0
dtype: int64

# Tiêu Chí Đánh Giá Hiệu Suất Nhà Cung Cấp

| Tiêu Chí | Biến Đại Diện | Hướng Đánh Giá |
| :--- | :--- | :--- |
| **Chi phí** | `manufacturing_costs`, `shipping_costs`, `costs` | **Tối thiểu hóa (Minimize)** |
| **Chất lượng và độ tin cậy** | `inspection_results`, `defect_rates` | **Tối đa hóa (Maximize)** |
| **Thời gian, tốc độ giao hàng** | `lead_times`, `manufacturing_lead_time`, `shipping_times` | **Tối thiểu hóa (Minimize)** |
| **Năng lực, độ sẵn sàng cung ứng** | `production_volumes`, `availability`, `stock_levels`, `order_quantities` | **Tối đa hóa (Maximize)** |
| **Hiệu suất kinh doanh** | `revenue_generated`, `number_of_products_sold` | **Tối đa hóa (Maximize)** |

In [8]:
print(df["inspection_results"].unique())

# Chuyển thành dạng ordinal
map = {
    "Fail": 0,
    "Pending": 1,
    "Pass": 2,
}
df["inspection_results"] = df["inspection_results"].map(map)

df["inspection_results"] =  pd.to_numeric(df["inspection_results"], errors='coerce')

print(df["inspection_results"].unique())

['Pending', 'Fail', 'Pass']
Categories (3, object): ['Fail', 'Pass', 'Pending']
[1 0 2]


# Tổng hợp dữ liệu về các nhà cung cấp

In [9]:
agg_dict = {
    # Mean đặc trưng hiệu suất
    'manufacturing_costs': 'mean',
    'shipping_costs': 'mean',
    'costs': 'mean',
    'defect_rates': 'mean',
    'lead_times': 'mean',
    'manufacturing_lead_time': 'mean',
    'shipping_times': 'mean',
    'availability': 'mean',
    'stock_levels': 'mean',
    "inspection_results": "mean",
    
    # Sum quy mô
    'production_volumes': 'sum',
    'order_quantities': 'sum',
    'revenue_generated': 'sum',
    'number_of_products_sold': 'sum'
}

In [10]:
df_supplier = df.groupby('supplier_name').agg(agg_dict).reset_index()

In [11]:
df_supplier.head()

Unnamed: 0,supplier_name,manufacturing_costs,shipping_costs,costs,defect_rates,lead_times,manufacturing_lead_time,shipping_times,availability,stock_levels,inspection_results,production_volumes,order_quantities,revenue_generated,number_of_products_sold
0,Supplier 1,45.254027,5.512302,574.851139,1.80363,16.777778,12.592593,6.074074,46.407407,42.296296,1.259259,13545,1458,157528.995039,11080
1,Supplier 2,41.622514,5.739178,515.027241,2.36275,16.227273,15.590909,5.5,39.681818,46.454545,0.863636,14105,1022,125467.418605,11068
2,Supplier 3,43.634121,4.788771,468.800102,2.465786,14.333333,14.933333,5.2,48.933333,43.6,0.933333,7997,632,97795.979638,8083
3,Supplier 4,62.709727,5.759573,521.810418,2.337397,17.0,15.333333,5.555556,57.944444,58.944444,0.333333,11756,842,86468.961799,7206
4,Supplier 5,44.768243,5.789832,536.02273,2.665408,14.722222,16.333333,6.222222,52.055556,49.888889,0.777778,9381,968,110343.463656,8662


In [12]:
df_supplier.index = df_supplier['supplier_name']
df_supplier = df_supplier.drop(columns=['supplier_name'])

In [13]:
df_supplier.head()

Unnamed: 0_level_0,manufacturing_costs,shipping_costs,costs,defect_rates,lead_times,manufacturing_lead_time,shipping_times,availability,stock_levels,inspection_results,production_volumes,order_quantities,revenue_generated,number_of_products_sold
supplier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Supplier 1,45.254027,5.512302,574.851139,1.80363,16.777778,12.592593,6.074074,46.407407,42.296296,1.259259,13545,1458,157528.995039,11080
Supplier 2,41.622514,5.739178,515.027241,2.36275,16.227273,15.590909,5.5,39.681818,46.454545,0.863636,14105,1022,125467.418605,11068
Supplier 3,43.634121,4.788771,468.800102,2.465786,14.333333,14.933333,5.2,48.933333,43.6,0.933333,7997,632,97795.979638,8083
Supplier 4,62.709727,5.759573,521.810418,2.337397,17.0,15.333333,5.555556,57.944444,58.944444,0.333333,11756,842,86468.961799,7206
Supplier 5,44.768243,5.789832,536.02273,2.665408,14.722222,16.333333,6.222222,52.055556,49.888889,0.777778,9381,968,110343.463656,8662


# Topsis

In [14]:
df_topsis = df_supplier.copy()

In [15]:
df_topsis.head()

Unnamed: 0_level_0,manufacturing_costs,shipping_costs,costs,defect_rates,lead_times,manufacturing_lead_time,shipping_times,availability,stock_levels,inspection_results,production_volumes,order_quantities,revenue_generated,number_of_products_sold
supplier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Supplier 1,45.254027,5.512302,574.851139,1.80363,16.777778,12.592593,6.074074,46.407407,42.296296,1.259259,13545,1458,157528.995039,11080
Supplier 2,41.622514,5.739178,515.027241,2.36275,16.227273,15.590909,5.5,39.681818,46.454545,0.863636,14105,1022,125467.418605,11068
Supplier 3,43.634121,4.788771,468.800102,2.465786,14.333333,14.933333,5.2,48.933333,43.6,0.933333,7997,632,97795.979638,8083
Supplier 4,62.709727,5.759573,521.810418,2.337397,17.0,15.333333,5.555556,57.944444,58.944444,0.333333,11756,842,86468.961799,7206
Supplier 5,44.768243,5.789832,536.02273,2.665408,14.722222,16.333333,6.222222,52.055556,49.888889,0.777778,9381,968,110343.463656,8662


In [16]:
criteria_types = {
    "revenue_generated": "max",
    "number_of_products_sold": "max",
    "production_volumes": "max",
    "availability": "max",
    "stock_levels": "max",
    "order_quantities": "max",
    "lead_times": "min",
    "manufacturing_lead_time": "min",
    "shipping_times": "min",
    "inspection_results": "max",
    "defect_rates": "max",
    "manufacturing_costs": "min",
    "shipping_costs": "min",
    "costs": "min",
}

In [17]:
model = TOPSIS(df_topsis, criteria_types)

# Output

In [18]:
model.normalize() # Chuẩn hóa dữ liệu

Unnamed: 0_level_0,manufacturing_costs,shipping_costs,costs,defect_rates,lead_times,manufacturing_lead_time,shipping_times,availability,stock_levels,inspection_results,production_volumes,order_quantities,revenue_generated,number_of_products_sold
supplier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Supplier 1,0.419793,0.445715,0.490219,0.344041,0.473411,0.375181,0.474646,0.420323,0.389182,0.636122,0.522286,0.638415,0.596367,0.52969
Supplier 2,0.386106,0.46406,0.439203,0.450693,0.457878,0.464513,0.429786,0.359408,0.427444,0.436271,0.543879,0.447503,0.474989,0.529116
Supplier 3,0.404766,0.387211,0.399781,0.470347,0.404437,0.444921,0.406343,0.443201,0.401178,0.471479,0.308359,0.276734,0.370232,0.386415
Supplier 4,0.581719,0.465709,0.444987,0.445857,0.479682,0.456838,0.434128,0.524817,0.542368,0.168385,0.453303,0.368687,0.327351,0.34449
Supplier 5,0.415287,0.468155,0.457107,0.508425,0.41541,0.486632,0.486223,0.47148,0.459045,0.392899,0.361725,0.423858,0.417734,0.414095


In [19]:
model.calculate_entropy_weights() # Tính trọng số tiêu chí bằng phương pháp Entropy

manufacturing_costs        0.056308
shipping_costs             0.011219
costs                      0.009982
defect_rates               0.036604
lead_times                 0.011028
manufacturing_lead_time    0.017205
shipping_times             0.010279
availability               0.035679
stock_levels               0.034329
inspection_results         0.330722
production_volumes         0.102372
order_quantities           0.172784
revenue_generated          0.103129
number_of_products_sold    0.068360
dtype: float64

In [20]:
model.weighted_normalize() # Ma trận chuẩn hóa có trọng số

Unnamed: 0_level_0,manufacturing_costs,shipping_costs,costs,defect_rates,lead_times,manufacturing_lead_time,shipping_times,availability,stock_levels,inspection_results,production_volumes,order_quantities,revenue_generated,number_of_products_sold
supplier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Supplier 1,0.023638,0.005,0.004893,0.012593,0.005221,0.006455,0.004879,0.014997,0.01336,0.21038,0.053467,0.110308,0.061503,0.03621
Supplier 2,0.021741,0.005206,0.004384,0.016497,0.00505,0.007992,0.004418,0.012823,0.014674,0.144285,0.055678,0.077321,0.048985,0.036171
Supplier 3,0.022791,0.004344,0.00399,0.017216,0.00446,0.007655,0.004177,0.015813,0.013772,0.155929,0.031567,0.047815,0.038182,0.026416
Supplier 4,0.032755,0.005225,0.004442,0.01632,0.00529,0.00786,0.004462,0.018725,0.018619,0.055689,0.046405,0.063703,0.033759,0.023549
Supplier 5,0.023384,0.005252,0.004563,0.01861,0.004581,0.008373,0.004998,0.016822,0.015759,0.129941,0.03703,0.073236,0.043081,0.028308


In [21]:
model.calculate_Ci_and_ranking() # Tính chỉ số Ci và xếp hạng

Unnamed: 0_level_0,Ci,Ranking
Supplier,Unnamed: 1_level_1,Unnamed: 2_level_1
Supplier 1,0.947918,1
Supplier 2,0.568326,2
Supplier 3,0.528467,3
Supplier 5,0.463803,4
Supplier 4,0.124374,5
