# Library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from model import *
from utils import *

# Data

In [None]:
df = pd.read_csv(
    r'../../data/cleaned/clean4.csv',
    encoding='utf-8',
)

In [None]:
df.head()

Unnamed: 0,price,availability,number_of_products_sold,revenue_generated,stock_levels,lead_times,order_quantities,shipping_times,shipping_costs,lead_time,...,sku,customer_demographics,shipping_carriers,location,inspection_results,transportation_modes,routes,shipping_times_category,supplier_name,cluster
0,69.808006,55,802,8661.996792,58,7,96,4,2.956572,29,...,SKU0,Non-binary,Carrier B,Mumbai,Pending,Road,Route B,Standard,2.0,0
1,14.843523,95,736,7460.900065,53,30,37,2,9.716575,23,...,SKU1,Female,Carrier A,Mumbai,Pending,Road,Route B,Express,2.0,1
2,11.319683,34,8,9577.749626,1,10,88,2,8.054479,12,...,SKU2,Unknown,Carrier B,Mumbai,Pending,Air,Route C,Express,0.0,1
3,61.163343,68,83,7766.836426,23,13,59,6,1.729569,24,...,SKU3,Non-binary,Carrier C,Kolkata,Fail,Rail,Route A,Standard,4.0,1
4,4.805496,26,871,2686.505152,5,3,56,8,3.890548,5,...,SKU4,Non-binary,Carrier A,Delhi,Fail,Air,Route A,Economy,0.0,1


# Load metadata

In [None]:
df = load_metadata(
    df,
    r"../../data/metadata/metadata4.json",
)

# Overview

## Check

In [None]:
print(f"Num of varibles:", df.shape[1])
print(f"Num of rows:", df.shape[0])

Num of varibles: 26
Num of rows: 100


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 26 columns):
 #   Column                   Non-Null Count  Dtype   
---  ------                   --------------  -----   
 0   price                    100 non-null    float64 
 1   availability             100 non-null    int64   
 2   number_of_products_sold  100 non-null    int64   
 3   revenue_generated        100 non-null    float64 
 4   stock_levels             100 non-null    int64   
 5   lead_times               100 non-null    int64   
 6   order_quantities         100 non-null    int64   
 7   shipping_times           100 non-null    int64   
 8   shipping_costs           100 non-null    float64 
 9   lead_time                100 non-null    int64   
 10  production_volumes       100 non-null    int64   
 11  manufacturing_lead_time  100 non-null    int64   
 12  manufacturing_costs      100 non-null    float64 
 13  defect_rates             100 non-null    float64 
 14  costs      

In [None]:
df.isnull().sum()

price                      0
availability               0
number_of_products_sold    0
revenue_generated          0
stock_levels               0
lead_times                 0
order_quantities           0
shipping_times             0
shipping_costs             0
lead_time                  0
production_volumes         0
manufacturing_lead_time    0
manufacturing_costs        0
defect_rates               0
costs                      0
product_type               0
sku                        0
customer_demographics      0
shipping_carriers          0
location                   0
inspection_results         0
transportation_modes       0
routes                     0
shipping_times_category    0
supplier_name              0
cluster                    0
dtype: int64

# Grouping Varibles by their respective data types

In [None]:
numerical_cols, categorical_cols, _ = group_columns_by_type(df, display_info=True)

Total numeric columns: 17
Numeric columns: ['price', 'availability', 'number_of_products_sold', 'revenue_generated', 'stock_levels', 'lead_times', 'order_quantities', 'shipping_times', 'shipping_costs', 'lead_time', 'production_volumes', 'manufacturing_lead_time', 'manufacturing_costs', 'defect_rates', 'costs', 'supplier_name', 'cluster']

Total categorical columns: 9
Categorical columns: ['product_type', 'sku', 'customer_demographics', 'shipping_carriers', 'location', 'inspection_results', 'transportation_modes', 'routes', 'shipping_times_category']



# Tiêu Chí Đánh Giá Hiệu Suất Nhà Cung Cấp

| Tiêu Chí | Biến Đại Diện | Hướng Đánh Giá |
| :--- | :--- | :--- |
| **Chi phí** | `manufacturing_costs`, `shipping_costs`, `costs` | **Tối thiểu hóa (Minimize)** |
| **Chất lượng và Độ tin cậy** | `inspection_results`, `defect_rates` | **Tối đa hóa (Maximize)** |
| **Thời gian, Tốc độ giao hàng** | `lead_times`, `manufacturing_lead_time`, `shipping_times` | **Tối thiểu hóa (Minimize)** |
| **Năng lực, Độ sẵn sàng cung ứng** | `production_volumes`, `availability`, `stock_levels`, `order_quantities` | **Tối đa hóa (Maximize)** |
| **Hiệu suất kinh doanh** | `revenue_generated`, `number_of_products_sold` | **Tối đa hóa (Maximize)** |

In [None]:
criteria_types = [
    "revenue_generated",
    "number_of_products_sold",
    "production_volumes",
    "availability",
    "stock_levels",
    "order_quantities",
    "lead_times",
    "manufacturing_lead_time",
    "shipping_times",
    "inspection_results",
    "defect_rates",
    "manufacturing_costs",
    "shipping_costs",
    "costs",
]

# Tổng hợp dữ liệu về các nhà cung cấp

In [None]:
agg_dict = {
    # Mean đặc trưng hiệu suất
    'manufacturing_costs': 'mean',
    'shipping_costs': 'mean',
    'costs': 'mean',
    'defect_rates': 'mean',
    'lead_times': 'mean',
    'manufacturing_lead_time': 'mean',
    'shipping_times': 'mean',
    'availability': 'mean',
    'stock_levels': 'mean',

    # Sum quy mô
    'production_volumes': 'sum',
    'order_quantities': 'sum',
    'revenue_generated': 'sum',
    'number_of_products_sold': 'sum'
}

In [None]:
df_supplier = df.groupby('supplier_name').agg(agg_dict).reset_index()

In [None]:
df_supplier.head()

Unnamed: 0,supplier_name,manufacturing_costs,shipping_costs,costs,defect_rates,lead_times,manufacturing_lead_time,shipping_times,availability,stock_levels,production_volumes,order_quantities,revenue_generated,number_of_products_sold
0,0.0,45.254027,5.512302,574.851139,1.80363,16.777778,12.592593,6.074074,46.407407,42.296296,13545,1458,157528.995039,11080
1,1.0,41.622514,5.739178,515.027241,2.36275,16.227273,15.590909,5.5,39.681818,46.454545,14105,1022,125467.418605,11068
2,2.0,43.634121,4.788771,468.800102,2.465786,14.333333,14.933333,5.2,48.933333,43.6,7997,632,97795.979638,8083
3,3.0,62.709727,5.759573,521.810418,2.337397,17.0,15.333333,5.555556,57.944444,58.944444,11756,842,86468.961799,7206
4,4.0,44.768243,5.789832,536.02273,2.665408,14.722222,16.333333,6.222222,52.055556,49.888889,9381,968,110343.463656,8662


In [None]:
df_supplier.index = df_supplier['supplier_name']
df_supplier = df_supplier.drop(columns=['supplier_name'])

In [None]:
df_supplier.head()

Unnamed: 0_level_0,manufacturing_costs,shipping_costs,costs,defect_rates,lead_times,manufacturing_lead_time,shipping_times,availability,stock_levels,production_volumes,order_quantities,revenue_generated,number_of_products_sold
supplier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0.0,45.254027,5.512302,574.851139,1.80363,16.777778,12.592593,6.074074,46.407407,42.296296,13545,1458,157528.995039,11080
1.0,41.622514,5.739178,515.027241,2.36275,16.227273,15.590909,5.5,39.681818,46.454545,14105,1022,125467.418605,11068
2.0,43.634121,4.788771,468.800102,2.465786,14.333333,14.933333,5.2,48.933333,43.6,7997,632,97795.979638,8083
3.0,62.709727,5.759573,521.810418,2.337397,17.0,15.333333,5.555556,57.944444,58.944444,11756,842,86468.961799,7206
4.0,44.768243,5.789832,536.02273,2.665408,14.722222,16.333333,6.222222,52.055556,49.888889,9381,968,110343.463656,8662


# Preprocessing

In [None]:
criteria_types_maximize = [
    "inspection_results",
    "defect_rates",
    "production_volumes",
    "availability",
    "stock_levels",
    "order_quantities",
    "revenue_generated",
    "number_of_products_sold",
]

criteria_types_minimize = [
    "manufacturing_costs",
    "shipping_costs",
    "costs",
    "lead_times",
    "manufacturing_lead_time",
    "shipping_times",
]

In [None]:
df_topsis = df[criteria_types].copy()

In [None]:
df_topsis.head()

Unnamed: 0,revenue_generated,number_of_products_sold,production_volumes,availability,stock_levels,order_quantities,lead_times,manufacturing_lead_time,shipping_times,inspection_results,defect_rates,manufacturing_costs,shipping_costs,costs
0,8661.996792,802,215,55,58,96,7,29,4,Pending,0.22641,46.279879,2.956572,187.752075
1,7460.900065,736,517,95,53,37,30,30,2,Pending,4.854068,33.616769,9.716575,503.065579
2,9577.749626,8,971,34,1,88,10,27,2,Pending,4.580593,30.688019,8.054479,141.920282
3,7766.836426,83,937,68,23,59,13,18,6,Fail,4.746649,35.624741,1.729569,254.776159
4,2686.505152,871,414,26,5,56,3,3,8,Fail,3.14558,92.065161,3.890548,923.440632


## Maximize

In [None]:
df_topsis[criteria_types_maximize].head()

Unnamed: 0,inspection_results,defect_rates,production_volumes,availability,stock_levels,order_quantities,revenue_generated,number_of_products_sold
0,Pending,0.22641,215,55,58,96,8661.996792,802
1,Pending,4.854068,517,95,53,37,7460.900065,736
2,Pending,4.580593,971,34,1,88,9577.749626,8
3,Fail,4.746649,937,68,23,59,7766.836426,83
4,Fail,3.14558,414,26,5,56,2686.505152,871


In [None]:
df_topsis[criteria_types_maximize] = normalize(
    df_topsis[criteria_types_maximize], method="Max"
)

NameError: name 'normalize' is not defined

## Minimize

In [None]:
oooo

# Topsis

In [None]:
topsis = TOPSIS(df, criteria_types)
norm = topsis.normalize()
weights = topsis.calculate_entropy_weights()
weighted_norm = topsis.weighted_normalize()
Ci_ranking = topsis.calculate_Ci_and_ranking()

ValueError: Cannot cast object dtype to float64

In [None]:
topsis.print_normalization()

In [None]:
topsis.print_entropy_weights()

In [None]:
topsis.print_weighted_normalize()

In [None]:
topsis.print_Ci_ranking()