In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv(r'/content/drive/MyDrive/Datasets/supply_chain_data.csv')
df.head()

Unnamed: 0,Product type,SKU,Price,Availability,Number of products sold,Revenue generated,Customer demographics,Stock levels,Lead times,Order quantities,...,Location,Lead time,Production volumes,Manufacturing lead time,Manufacturing costs,Inspection results,Defect rates,Transportation modes,Routes,Costs
0,haircare,SKU0,69.808006,55,802,8661.996792,Non-binary,58,7,96,...,Mumbai,29,215,29,46.279879,Pending,0.22641,Road,Route B,187.752075
1,skincare,SKU1,14.843523,95,736,7460.900065,Female,53,30,37,...,Mumbai,23,517,30,33.616769,Pending,4.854068,Road,Route B,503.065579
2,haircare,SKU2,11.319683,34,8,9577.749626,Unknown,1,10,88,...,Mumbai,12,971,27,30.688019,Pending,4.580593,Air,Route C,141.920282
3,skincare,SKU3,61.163343,68,83,7766.836426,Non-binary,23,13,59,...,Kolkata,24,937,18,35.624741,Fail,4.746649,Rail,Route A,254.776159
4,skincare,SKU4,4.805496,26,871,2686.505152,Non-binary,5,3,56,...,Delhi,5,414,3,92.065161,Fail,3.14558,Air,Route A,923.440632


In [None]:
df.columns

Index(['Product type', 'SKU', 'Price', 'Availability',
       'Number of products sold', 'Revenue generated', 'Customer demographics',
       'Stock levels', 'Lead times', 'Order quantities', 'Shipping times',
       'Shipping carriers', 'Shipping costs', 'Supplier name', 'Location',
       'Lead time', 'Production volumes', 'Manufacturing lead time',
       'Manufacturing costs', 'Inspection results', 'Defect rates',
       'Transportation modes', 'Routes', 'Costs'],
      dtype='object')

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Product type             100 non-null    object 
 1   SKU                      100 non-null    object 
 2   Price                    100 non-null    float64
 3   Availability             100 non-null    int64  
 4   Number of products sold  100 non-null    int64  
 5   Revenue generated        100 non-null    float64
 6   Customer demographics    100 non-null    object 
 7   Stock levels             100 non-null    int64  
 8   Lead times               100 non-null    int64  
 9   Order quantities         100 non-null    int64  
 10  Shipping times           100 non-null    int64  
 11  Shipping carriers        100 non-null    object 
 12  Shipping costs           100 non-null    float64
 13  Supplier name            100 non-null    object 
 14  Location                 10

In [None]:
df.isnull().sum()

Unnamed: 0,0
Product type,0
SKU,0
Price,0
Availability,0
Number of products sold,0
Revenue generated,0
Customer demographics,0
Stock levels,0
Lead times,0
Order quantities,0


In [None]:
window_size = 7

df['Demand Forecast'] = df['Number of products sold'].rolling(window=7).mean()

df['Demand Forecast'].fillna(df['Number of products sold'].expanding().mean(), inplace=True)


threshold_stock = 50
df['Restock Needed'] = df['Demand Forecast'] > df['Stock levels']

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Demand Forecast'].fillna(df['Number of products sold'].expanding().mean(), inplace=True)


In [None]:
df.head()

Unnamed: 0,Product type,SKU,Price,Availability,Number of products sold,Revenue generated,Customer demographics,Stock levels,Lead times,Order quantities,...,Production volumes,Manufacturing lead time,Manufacturing costs,Inspection results,Defect rates,Transportation modes,Routes,Costs,Demand Forecast,Restock Needed
0,haircare,SKU0,69.808006,55,802,8661.996792,Non-binary,58,7,96,...,215,29,46.279879,Pending,0.22641,Road,Route B,187.752075,802.0,True
1,skincare,SKU1,14.843523,95,736,7460.900065,Female,53,30,37,...,517,30,33.616769,Pending,4.854068,Road,Route B,503.065579,769.0,True
2,haircare,SKU2,11.319683,34,8,9577.749626,Unknown,1,10,88,...,971,27,30.688019,Pending,4.580593,Air,Route C,141.920282,515.333333,True
3,skincare,SKU3,61.163343,68,83,7766.836426,Non-binary,23,13,59,...,937,18,35.624741,Fail,4.746649,Rail,Route A,254.776159,407.25,True
4,skincare,SKU4,4.805496,26,871,2686.505152,Non-binary,5,3,56,...,414,3,92.065161,Fail,3.14558,Air,Route A,923.440632,500.0,True


In [None]:
columns_to_drop = ['SKU', 'Inspection results', 'Supplier name', 'Routes']
df = df.drop(columns=columns_to_drop, axis=1)

In [None]:
categorical_cols = df.select_dtypes(include='object').columns
print(categorical_cols)

for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])


Index(['Product type', 'Customer demographics', 'Shipping carriers',
       'Location', 'Transportation modes'],
      dtype='object')


In [None]:
print(len(df))
df = pd.get_dummies(df, drop_first=True)
len(df)

100


100

In [None]:
scaler = StandardScaler()
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

In [None]:
df.head()

Unnamed: 0,Product type,Price,Availability,Number of products sold,Revenue generated,Customer demographics,Stock levels,Lead times,Order quantities,Shipping times,...,Location,Lead time,Production volumes,Manufacturing lead time,Manufacturing costs,Defect rates,Transportation modes,Costs,Demand Forecast,Restock Needed
0,-0.174945,0.656055,0.215762,1.128211,1.061345,0.342997,0.327757,-1.024965,1.755336,-0.645607,...,1.311482,1.354252,-1.348115,1.60469,-0.03422,-1.410379,0.602574,-1.328733,3.141245,True
1,1.074664,-1.116309,1.523413,0.909854,0.619626,-1.371989,0.167563,1.606084,-0.458534,-1.383444,...,1.311482,0.672581,-0.194247,1.717458,-0.473338,1.772241,0.602574,-0.101866,2.835336,True
2,-0.174945,-1.229938,-0.470754,-1.49869,1.398125,1.20049,-1.498456,-0.681785,1.45515,-1.383444,...,1.311482,-0.577148,1.540376,1.379154,-0.574899,1.584161,-1.31036,-1.507062,0.483854,True
3,1.074664,0.377302,0.640749,-1.250557,0.732138,0.342997,-0.793602,-0.338605,0.366977,0.09223,...,0.610155,0.786193,1.41047,0.364241,-0.403708,1.698365,-0.353893,-1.067946,-0.518075,True
4,1.074664,-1.439992,-0.732284,1.356493,-1.136219,0.342997,-1.370301,-1.482539,0.254407,0.830066,...,-0.091173,-1.37243,-0.587785,-1.327281,1.553476,0.597247,-1.31036,1.533791,0.341714,True


In [None]:
X = df.drop('Number of products sold', axis=1)
y = df['Number of products to be restocked']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna(df.mode().iloc[0], inplace=True)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
svm_model = SVR(kernel='rbf')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

In [None]:
mae_svm = mean_absolute_error(y_test, y_pred_svm)
mse_svm = mean_squared_error(y_test, y_pred_svm)
rmse_svm = np.sqrt(mse_svm)
r2_svm = r2_score(y_test, y_pred_svm)

print(f'SVM - MAE: {mae_svm}, RMSE: {rmse_svm}, R2: {r2_svm}')

SVM - MAE: 1.0486685241695588, RMSE: 1.2094918224849187, R2: -0.40045048596511434


In [None]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

In [None]:
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mse_rf = mean_squared_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest - MAE: {mae_rf}, RMSE: {rmse_rf},R2: {r2_rf}')

Random Forest - MAE: 1.0731434402442077, RMSE: 1.2109486099612026,R2: -0.4038260976698713
