In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import LabelEncoder

# Load datasets
calendar = pd.read_csv("Datasets/calendar.csv")
inventory = pd.read_csv("Datasets/inventory.csv")
orders = pd.read_csv("Datasets/orders.csv")
pricing = pd.read_csv("Datasets/pricing.csv")
product = pd.read_csv("Datasets/product.csv")
sales = pd.read_csv("Datasets/sales.csv")
supplier = pd.read_csv("Datasets/supplier.csv")

# Encode and clean
calendar["holiday"] = calendar["holiday"].map({"Yes": 1, "No": 0})
calendar["season_encoded"] = LabelEncoder().fit_transform(calendar["season"])
calendar["month"] = LabelEncoder().fit_transform(calendar["month"])
calendar["day_of_week"] = LabelEncoder().fit_transform(calendar["day_of_week"])
pricing["discount"] = pricing["discount"].astype(str).str.replace('%', '', regex=False).astype(float)
sales["discount"] = sales["discount"].astype(str).str.replace('%', '', regex=False).astype(float)

# -------- 1. Demand Forecasting Model --------
df1 = sales.merge(calendar, on="date", how="left")
df1 = df1.merge(product[["product_id", "category", "shelf_life_days", "supplier_id"]], on="product_id", how="left")
df1["category_encoded"] = LabelEncoder().fit_transform(df1["category"])
X1 = df1[["price", "discount", "day_of_week", "month", "is_weekend", "is_festival", "season_encoded", "category_encoded", "shelf_life_days"]]
y1 = df1["units_sold"]
m1 = RandomForestRegressor()
m1.fit(X1, y1)
joblib.dump(m1, "models/demand_forecast_model.pkl")
print("✅ Model 1: Demand Forecasting Improved")

# -------- 2. Dynamic Pricing Model --------
df2 = pricing.merge(product[["product_id", "category", "cost_price", "supplier_id"]], on="product_id", how="left")
df2 = df2.merge(supplier[["supplier_id", "preferred"]], on="supplier_id", how="left")
df2["category_encoded"] = LabelEncoder().fit_transform(df2["category"])
X2 = df2[["discount", "competitor_price", "profit_margin", "cost_price", "category_encoded", "preferred"]]
y2 = df2["price"]
m2 = LinearRegression()
m2.fit(X2, y2)
joblib.dump(m2, "models/dynamic_pricing_model.pkl")
print("✅ Model 2: Dynamic Pricing")

# -------- 3. Reorder Level Predictor --------
df3 = inventory.merge(product[["product_id", "category", "shelf_life_days", "supplier_id"]], on="product_id", how="left")
df3 = df3.merge(supplier[["supplier_id", "return_rate"]], on="supplier_id", how="left")
df3["category_encoded"] = LabelEncoder().fit_transform(df3["category"])
X3 = df3[["current_stock", "days_to_expire", "shelf_life_days", "return_rate", "category_encoded"]]
y3 = df3["reorder_level"]
m3 = GradientBoostingRegressor()
m3.fit(X3, y3)
joblib.dump(m3, "models/reorder_level_model.pkl")
print("✅ Model 3: Reorder Level")

# -------- 4. Stockout Risk Classifier --------
df4 = sales.merge(calendar, on="date", how="left")
df4 = df4.merge(product[["product_id", "category", "supplier_id"]], on="product_id", how="left")
df4 = df4.merge(supplier[["supplier_id", "preferred"]], on="supplier_id", how="left")
df4 = df4.merge(inventory[["product_id", "reorder_level"]], on="product_id", how="left")
df4["stockout_risk"] = (df4["units_sold"] > 90).astype(int)
df4["category_encoded"] = LabelEncoder().fit_transform(df4["category"])
X4 = df4[["units_sold", "discount", "day_number", "is_festival", "category_encoded", "preferred", "reorder_level"]]
y4 = df4["stockout_risk"]
m4 = LogisticRegression(max_iter=1000)
m4.fit(X4, y4)
joblib.dump(m4, "models/stockout_model.pkl")
print("✅ Model 4: Stockout Classifier")

# -------- 5. Supplier Delay Predictor --------
X5 = supplier[["default_lead_days", "avg_delivery_delay", "quality_score", "return_rate", "preferred", "supply_cost_index"]]
y5 = supplier["on_time_delivery_rate"]
m5 = GradientBoostingRegressor()
m5.fit(X5, y5)
joblib.dump(m5, "models/supplier_delay_model.pkl")
print("✅ Model 5: Supplier Delay")

# -------- 6. Profit Margin Estimator --------
df6 = pricing.merge(product[["product_id", "category", "supplier_id"]], on="product_id", how="left")
df6 = df6.merge(supplier[["supplier_id", "quality_score"]], on="supplier_id", how="left")
df6["category_encoded"] = LabelEncoder().fit_transform(df6["category"])
X6 = df6[["price", "competitor_price", "discount", "quality_score", "category_encoded"]]
y6 = df6["profit_margin"]
m6 = LinearRegression()
m6.fit(X6, y6)
joblib.dump(m6, "models/profit_margin_model.pkl")
print("✅ Model 6: Profit Margin")

# -------- 7. Calendar Holiday Classifier --------
X7 = calendar[["day_number", "is_weekend", "is_festival", "season_encoded", "week_of_year"]]
y7 = calendar["holiday"]
m7 = LogisticRegression()
m7.fit(X7, y7)
joblib.dump(m7, "models/calendar_holiday_model.pkl")
print("✅ Model 7: Holiday Classifier")

# -------- 8. Order Delay Predictor --------
df8 = orders.merge(product[["product_id", "category", "supplier_id"]], on="product_id", how="left")
df8 = df8.merge(supplier[["supplier_id", "avg_delivery_delay", "quality_score"]], on="supplier_id", how="left")
df8["category_encoded"] = LabelEncoder().fit_transform(df8["category"])
X8 = df8[["quantity_ordered", "avg_delivery_delay", "quality_score", "category_encoded"]]
y8 = df8["delay_days"]
m8 = RandomForestRegressor()
m8.fit(X8, y8)
joblib.dump(m8, "models/order_delay_model.pkl")
print("✅ Model 8: Order Delay")

print("\n🎯 All enhanced models trained and saved successfully!")


✅ Model 1: Demand Forecasting Improved
✅ Model 2: Dynamic Pricing
✅ Model 3: Reorder Level
✅ Model 4: Stockout Classifier
✅ Model 5: Supplier Delay
✅ Model 6: Profit Margin
✅ Model 7: Holiday Classifier
✅ Model 8: Order Delay

🎯 All enhanced models trained and saved successfully!


In [2]:
# -------- Show Inputs and Outputs of All Models --------
print("\n📥 INPUTS and 📤 OUTPUTS for each model (Enhanced Version):")

print("1️⃣ Demand Forecasting:")
print("   📥 INPUT  - price, discount, day_of_week, month, is_weekend, is_festival, season_encoded, category_encoded, shelf_life_days")
print("   📤 OUTPUT - units_sold")

print("2️⃣ Dynamic Pricing:")
print("   📥 INPUT  - discount, competitor_price, profit_margin, cost_price, category_encoded, preferred")
print("   📤 OUTPUT - price")

print("3️⃣ Reorder Predictor:")
print("   📥 INPUT  - current_stock, days_to_expire, shelf_life_days, return_rate, category_encoded")
print("   📤 OUTPUT - reorder_level")

print("4️⃣ Stockout Classifier:")
print("   📥 INPUT  - units_sold, discount, day_number, is_festival, category_encoded, preferred, reorder_level")
print("   📤 OUTPUT - stockout_risk (0 or 1)")

print("5️⃣ Supplier Delay:")
print("   📥 INPUT  - default_lead_days, avg_delivery_delay, quality_score, return_rate, preferred, supply_cost_index")
print("   📤 OUTPUT - on_time_delivery_rate")

print("6️⃣ Profit Margin:")
print("   📥 INPUT  - price, competitor_price, discount, quality_score, category_encoded")
print("   📤 OUTPUT - profit_margin")

print("7️⃣ Holiday Classifier:")
print("   📥 INPUT  - day_number, is_weekend, is_festival, season_encoded, week_of_year")
print("   📤 OUTPUT - holiday (0 or 1)")

print("8️⃣ Order Delay:")
print("   📥 INPUT  - quantity_ordered, avg_delivery_delay, quality_score, category_encoded")
print("   📤 OUTPUT - delay_days")



📥 INPUTS and 📤 OUTPUTS for each model (Enhanced Version):
1️⃣ Demand Forecasting:
   📥 INPUT  - price, discount, day_of_week, month, is_weekend, is_festival, season_encoded, category_encoded, shelf_life_days
   📤 OUTPUT - units_sold
2️⃣ Dynamic Pricing:
   📥 INPUT  - discount, competitor_price, profit_margin, cost_price, category_encoded, preferred
   📤 OUTPUT - price
3️⃣ Reorder Predictor:
   📥 INPUT  - current_stock, days_to_expire, shelf_life_days, return_rate, category_encoded
   📤 OUTPUT - reorder_level
4️⃣ Stockout Classifier:
   📥 INPUT  - units_sold, discount, day_number, is_festival, category_encoded, preferred, reorder_level
   📤 OUTPUT - stockout_risk (0 or 1)
5️⃣ Supplier Delay:
   📥 INPUT  - default_lead_days, avg_delivery_delay, quality_score, return_rate, preferred, supply_cost_index
   📤 OUTPUT - on_time_delivery_rate
6️⃣ Profit Margin:
   📥 INPUT  - price, competitor_price, discount, quality_score, category_encoded
   📤 OUTPUT - profit_margin
7️⃣ Holiday Classifier:
 

In [4]:
# -------- Test Cell to Predict using All Models --------
print("\n🔍 Sample predictions from each model (Enhanced Version):")

# 1️⃣ Demand Forecasting
# Inputs: price, discount, day_of_week, month, is_weekend, is_festival, season_encoded, category_encoded, shelf_life_days
print("Demand Forecast →", m1.predict([[120.0, 10.0, 2, 5, 0, 1, 3, 1, 30]])[0])

# 2️⃣ Dynamic Pricing
# Inputs: discount, competitor_price, profit_margin, cost_price, category_encoded, preferred
print("Dynamic Pricing →", m2.predict([[5.0, 130.0, 12.5, 100.0, 2, 1]])[0])

# 3️⃣ Reorder Level
# Inputs: current_stock, days_to_expire, shelf_life_days, return_rate, category_encoded
print("Reorder Level →", m3.predict([[50, 10, 60, 0.05, 2]])[0])

# 4️⃣ Stockout Risk
# Inputs: units_sold, discount, day_number, is_festival, category_encoded, preferred, reorder_level
print("Stockout Risk →", m4.predict([[100, 10.0, 15, 1, 2, 1, 40]])[0])

# 5️⃣ Supplier Delay
# Inputs: default_lead_days, avg_delivery_delay, quality_score, return_rate, preferred, supply_cost_index
print("Supplier Delay →", m5.predict([[7, 2.0, 0.85, 0.03, 1, 1.1]])[0])

# 6️⃣ Profit Margin
# Inputs: price, competitor_price, discount, quality_score, category_encoded
print("Profit Margin →", m6.predict([[150.0, 130.0, 5.0, 0.9, 2]])[0])

# 7️⃣ Holiday Classifier
# Inputs: day_number, is_weekend, is_festival, season_encoded, week_of_year
print("Holiday Classifier →", m7.predict([[3, 1, 1, 2, 12]])[0])

# 8️⃣ Order Delay
# Inputs: quantity_ordered, avg_delivery_delay, quality_score, category_encoded
print("Order Delay →", m8.predict([[25, 2.0, 0.9, 2]])[0])



🔍 Sample predictions from each model (Enhanced Version):
Demand Forecast → 69.33
Dynamic Pricing → 68.89888332928938
Reorder Level → 67.44342777841547
Stockout Risk → 1
Supplier Delay → 89.28288121491829
Profit Margin → 9.742784907948302
Holiday Classifier → 0
Order Delay → 2.24


