In [76]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

#modeling
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error,mean_absolute_percentage_error
import openpyxl

In [77]:
df = pd.read_excel('data/Replenishment.xlsx')

In [9]:
df.head()

Unnamed: 0,uniware_sku_id,style_id,pd_size,fit,sub_category,sub_brand,store_category,warehouse,opening_total_inventory,weekly_opening_inventory,...,replen_flag,possible_inward,store_capacity,total_styles,sales_cover,po_qty,updated_at,store_name,style_broken_flag,Remark
0,1350162,641008,2xl,oversized fit,hoodies,bewakoof,female-hoodies,bk_ebo_008,0,0,...,Replen,3,0,0,0.084286,3,2025-12-03,bk_ebo_008,0,NON EOSS
1,1442508,691840,2xl,oversized fit,hoodies,bewakoof heavy duty,male-hoodies,bk_ebo_008,0,0,...,Replen,1,0,0,0.099089,1,2025-12-03,bk_ebo_008,0,NON EOSS
2,1305688,624277,34,baggy fit,jeans,bewakoof,male-jeans,bk_ebo_008,0,0,...,Replen,3,0,0,0.050608,3,2025-12-03,bk_ebo_008,1,NON EOSS
3,1305687,624277,32,baggy fit,jeans,bewakoof,male-jeans,bk_ebo_008,0,0,...,Replen,2,0,0,0.088564,2,2025-12-03,bk_ebo_008,1,NON EOSS
4,1305689,624277,36,baggy fit,jeans,bewakoof,male-jeans,bk_ebo_008,0,1,...,Replen,1,0,0,0.105433,1,2025-12-03,bk_ebo_008,1,NON EOSS


In [14]:
df1 = df.drop(['Remark','store_name','updated_at','style_broken_flag','total_styles','sub_category',
               'expected_demand_in_16_days'],axis=1)

In [15]:
df1.columns

Index(['uniware_sku_id', 'style_id', 'pd_size', 'fit', 'sub_brand',
       'store_category', 'warehouse', 'opening_total_inventory',
       'weekly_opening_inventory', 'current_total_inventory',
       'intransit_inventory', 'unbroken_days', 'L45_sold_qty', 'L30_sold_qty',
       'L15_sold_qty', 'L7_sold_qty', 'l30_ros', 'unbroken_l30_ros',
       'broken_flag', 'style_ros', 'sales_size_ratio', 'cap_ros',
       'final_unbroken_l30_ros', 'inventory', 'inventory_need_to_inward',
       'available_inventory_to_inward', 'replen_flag', 'possible_inward',
       'store_capacity', 'sales_cover', 'po_qty'],
      dtype='object')

In [16]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2563 entries, 0 to 2562
Data columns (total 31 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   uniware_sku_id                 2563 non-null   int64  
 1   style_id                       2563 non-null   int64  
 2   pd_size                        2563 non-null   object 
 3   fit                            2563 non-null   object 
 4   sub_brand                      2563 non-null   object 
 5   store_category                 2563 non-null   object 
 6   warehouse                      2563 non-null   object 
 7   opening_total_inventory        2563 non-null   int64  
 8   weekly_opening_inventory       2563 non-null   int64  
 9   current_total_inventory        2563 non-null   int64  
 10  intransit_inventory            2563 non-null   int64  
 11  unbroken_days                  2563 non-null   int64  
 12  L45_sold_qty                   2563 non-null   i

In [17]:
X = df1.drop(['po_qty'],axis=1)
y = df1['po_qty']

## Transformation of data

In [25]:
cat_features = X.select_dtypes(include='O').columns
num_features = X.select_dtypes(exclude='O').columns

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

std_scaler = StandardScaler()
one_coder = OneHotEncoder()

preprocessor = ColumnTransformer(
    [
        ("Onehotencoder",one_coder,cat_features),
        ("StandardScaler",std_scaler,num_features)
    ]
)

In [27]:
X = preprocessor.fit_transform(X)
X.shape

(2563, 105)

### Train test split

In [30]:
from sklearn.model_selection import train_test_split

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape

((2050, 105), (513, 105))

### Evaluation metrics

In [55]:
def eval(actual,pred):
    r2_ = r2_score(actual,pred)
    mape = mean_absolute_percentage_error(actual,pred)
    mae = mean_absolute_error(actual,pred)
    mse = mean_squared_error(actual,pred)

    return r2_,mape,mae,mse

## Model development

In [85]:
models = {
    "Linear Regression": LinearRegression(),
    "Lasso": Lasso(),
    "Ridge": Ridge(),
    "K-Neighbors Regressor": KNeighborsRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "XGBRegressor": XGBRegressor(), 
    "CatBoosting Regressor": CatBoostRegressor(verbose=False),
    "AdaBoost Regressor": AdaBoostRegressor()
}

dict1={}
r2_list=[]
m=[]

for i in range(0,len(models)-1):
    model = list(models.values())[i]
    model.fit(X_train,y_train)

    #Model prediction
    pred_y_train = model.predict(X_train)
    pred_y_test = model.predict(X_test)

    #Finding evaluation metrics
    r2_train,mape_train,mae_train,mse_train = eval(y_train,pred_y_train)
    r2_test,mape_test,mae_test,mse_test = eval(y_test,pred_y_test)

    print(list(models.keys())[i])
    print("The Evaluation metrics for the Training Data")
    print(f"The R2 value is: {r2_train}") 
    print(f"The MAPE value is: {mape_train}") 
    print(f"The MAE value is: {mae_train}") 
    print(f"The MSE value is: {mse_train}") 
    print("---------------------------------------------") 
    print("\n")

    print("The Evaluation metrics for the Test Data")
    print(f"The R2 value is: {r2_test}") 
    print(f"The MAPE value is: {mape_test}") 
    print(f"The MAE value is: {mae_test}") 
    print(f"The MSE value is: {mse_test}") 

    print('='*30)
    print('\n')

    r2_list.append(r2_test)
    m.append(list(models.keys())[i])
    dict1[list(models.keys())[i]]=r2_test


Linear Regression
The Evaluation metrics for the Training Data
The R2 value is: 0.9999999998770273
The MAPE value is: 4.030543029493482e-06
The MAE value is: 5.866891702820154e-06
The MSE value is: 1.2261386073691888e-10
---------------------------------------------


The Evaluation metrics for the Test Data
The R2 value is: 0.9999999997760166
The MAPE value is: 4.968144212402491e-06
The MAE value is: 6.915775127300244e-06
The MSE value is: 2.2818018660480629e-10


Lasso
The Evaluation metrics for the Training Data
The R2 value is: 0.0
The MAPE value is: 0.47532195688507406
The MAE value is: 0.7330312908982749
The MSE value is: 0.9970817370612729
---------------------------------------------


The Evaluation metrics for the Test Data
The R2 value is: -0.0006349114284069923
The MAPE value is: 0.4743054383927298
The MAE value is: 0.7462653924784862
The MSE value is: 1.0193838170679523


Ridge
The Evaluation metrics for the Training Data
The R2 value is: 0.9999887556454244
The MAPE value 

In [83]:
pd.DataFrame({
    "Model":m,
    "R2_score":r2_list
})

Unnamed: 0,Model,R2_score
0,Linear Regression,1.0
1,Lasso,-0.000635
2,Ridge,0.99999
3,K-Neighbors Regressor,0.894224
4,Random Forest Regressor,0.999923
5,XGBRegressor,1.0
6,CatBoosting Regressor,0.999771


In [95]:
r2_df = pd.DataFrame(dict1.items(),columns=['Model','R2_score'])
r2_df.sort_values(by=['R2_score'],ascending=False)

Unnamed: 0,Model,R2_score
5,XGBRegressor,1.0
0,Linear Regression,1.0
2,Ridge,0.99999
4,Random Forest Regressor,0.999848
6,CatBoosting Regressor,0.999771
3,K-Neighbors Regressor,0.894224
1,Lasso,-0.000635


In [97]:
model = XGBRegressor()
model.fit(X,y)
prd = model.predict(X)
prd

array([2.9999776, 1.0000186, 2.9999776, ..., 1.0000186, 1.0000186,
       1.0000186], shape=(2563,), dtype=float32)

In [102]:
df['pred'] = prd

In [105]:
df['pred'] = df['pred'].map(lambda x: round(x,0))

In [106]:
df.to_csv("replen_model.csv")

In [108]:
import os
os.getcwd()

'c:\\Python\\ML_Project\\notebook'