In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import LabelEncoder

# Load datasets
calendar = pd.read_csv("Datasets/calendar.csv")
inventory = pd.read_csv("Datasets/inventory.csv")
orders = pd.read_csv("Datasets/orders.csv")
pricing = pd.read_csv("Datasets/pricing.csv")
product = pd.read_csv("Datasets/product.csv")
sales = pd.read_csv("Datasets/sales.csv")
supplier = pd.read_csv("Datasets/supplier.csv")

# Encode and clean
calendar["holiday"] = calendar["holiday"].map({"Yes": 1, "No": 0})
calendar["season_encoded"] = LabelEncoder().fit_transform(calendar["season"])
calendar["month"] = LabelEncoder().fit_transform(calendar["month"])
calendar["day_of_week"] = LabelEncoder().fit_transform(calendar["day_of_week"])
pricing["discount"] = pricing["discount"].astype(str).str.replace('%', '', regex=False).astype(float)
sales["discount"] = sales["discount"].astype(str).str.replace('%', '', regex=False).astype(float)

# -------- 1. Demand Forecasting Model --------
df1 = sales.merge(calendar, on="date", how="left")
df1 = df1.merge(product[["product_id", "category", "shelf_life_days", "supplier_id"]], on="product_id", how="left")
df1["category_encoded"] = LabelEncoder().fit_transform(df1["category"])
X1 = df1[["price", "discount", "day_of_week", "month", "is_weekend", "is_festival", "season_encoded", "category_encoded", "shelf_life_days"]]
y1 = df1["units_sold"]
m1 = RandomForestRegressor()
m1.fit(X1, y1)
joblib.dump(m1, "models/demand_forecast_model.pkl")
print("‚úÖ Model 1: Demand Forecasting Improved")

# -------- 2. Dynamic Pricing Model --------
df2 = pricing.merge(product[["product_id", "category", "cost_price", "supplier_id"]], on="product_id", how="left")
df2 = df2.merge(supplier[["supplier_id", "preferred"]], on="supplier_id", how="left")
df2["category_encoded"] = LabelEncoder().fit_transform(df2["category"])
X2 = df2[["discount", "competitor_price", "profit_margin", "cost_price", "category_encoded", "preferred"]]
y2 = df2["price"]
m2 = LinearRegression()
m2.fit(X2, y2)
joblib.dump(m2, "models/dynamic_pricing_model.pkl")
print("‚úÖ Model 2: Dynamic Pricing")

# -------- 3. Reorder Level Predictor --------
df3 = inventory.merge(product[["product_id", "category", "shelf_life_days", "supplier_id"]], on="product_id", how="left")
df3 = df3.merge(supplier[["supplier_id", "return_rate"]], on="supplier_id", how="left")
df3["category_encoded"] = LabelEncoder().fit_transform(df3["category"])
X3 = df3[["current_stock", "days_to_expire", "shelf_life_days", "return_rate", "category_encoded"]]
y3 = df3["reorder_level"]
m3 = GradientBoostingRegressor()
m3.fit(X3, y3)
joblib.dump(m3, "models/reorder_level_model.pkl")
print("‚úÖ Model 3: Reorder Level")

# -------- 4. Stockout Risk Classifier --------
df4 = sales.merge(calendar, on="date", how="left")
df4 = df4.merge(product[["product_id", "category", "supplier_id"]], on="product_id", how="left")
df4 = df4.merge(supplier[["supplier_id", "preferred"]], on="supplier_id", how="left")
df4 = df4.merge(inventory[["product_id", "reorder_level"]], on="product_id", how="left")
df4["stockout_risk"] = (df4["units_sold"] > 90).astype(int)
df4["category_encoded"] = LabelEncoder().fit_transform(df4["category"])
X4 = df4[["units_sold", "discount", "day_number", "is_festival", "category_encoded", "preferred", "reorder_level"]]
y4 = df4["stockout_risk"]
m4 = LogisticRegression(max_iter=1000)
m4.fit(X4, y4)
joblib.dump(m4, "models/stockout_model.pkl")
print("‚úÖ Model 4: Stockout Classifier")

# -------- 5. Supplier Delay Predictor --------
X5 = supplier[["default_lead_days", "avg_delivery_delay", "quality_score", "return_rate", "preferred", "supply_cost_index"]]
y5 = supplier["on_time_delivery_rate"]
m5 = GradientBoostingRegressor()
m5.fit(X5, y5)
joblib.dump(m5, "models/supplier_delay_model.pkl")
print("‚úÖ Model 5: Supplier Delay")

# -------- 6. Profit Margin Estimator --------
df6 = pricing.merge(product[["product_id", "category", "supplier_id"]], on="product_id", how="left")
df6 = df6.merge(supplier[["supplier_id", "quality_score"]], on="supplier_id", how="left")
df6["category_encoded"] = LabelEncoder().fit_transform(df6["category"])
X6 = df6[["price", "competitor_price", "discount", "quality_score", "category_encoded"]]
y6 = df6["profit_margin"]
m6 = LinearRegression()
m6.fit(X6, y6)
joblib.dump(m6, "models/profit_margin_model.pkl")
print("‚úÖ Model 6: Profit Margin")

# -------- 7. Calendar Holiday Classifier --------
X7 = calendar[["day_number", "is_weekend", "is_festival", "season_encoded", "week_of_year"]]
y7 = calendar["holiday"]
m7 = LogisticRegression()
m7.fit(X7, y7)
joblib.dump(m7, "models/calendar_holiday_model.pkl")
print("‚úÖ Model 7: Holiday Classifier")

# -------- 8. Order Delay Predictor --------
df8 = orders.merge(product[["product_id", "category", "supplier_id"]], on="product_id", how="left")
df8 = df8.merge(supplier[["supplier_id", "avg_delivery_delay", "quality_score"]], on="supplier_id", how="left")
df8["category_encoded"] = LabelEncoder().fit_transform(df8["category"])
X8 = df8[["quantity_ordered", "avg_delivery_delay", "quality_score", "category_encoded"]]
y8 = df8["delay_days"]
m8 = RandomForestRegressor()
m8.fit(X8, y8)
joblib.dump(m8, "models/order_delay_model.pkl")
print("‚úÖ Model 8: Order Delay")

print("\nüéØ All enhanced models trained and saved successfully!")


‚úÖ Model 1: Demand Forecasting Improved
‚úÖ Model 2: Dynamic Pricing
‚úÖ Model 3: Reorder Level
‚úÖ Model 4: Stockout Classifier
‚úÖ Model 5: Supplier Delay
‚úÖ Model 6: Profit Margin
‚úÖ Model 7: Holiday Classifier
‚úÖ Model 8: Order Delay

üéØ All enhanced models trained and saved successfully!


In [2]:
# -------- Show Inputs and Outputs of All Models --------
print("\nüì• INPUTS and üì§ OUTPUTS for each model (Enhanced Version):")

print("1Ô∏è‚É£ Demand Forecasting:")
print("   üì• INPUT  - price, discount, day_of_week, month, is_weekend, is_festival, season_encoded, category_encoded, shelf_life_days")
print("   üì§ OUTPUT - units_sold")

print("2Ô∏è‚É£ Dynamic Pricing:")
print("   üì• INPUT  - discount, competitor_price, profit_margin, cost_price, category_encoded, preferred")
print("   üì§ OUTPUT - price")

print("3Ô∏è‚É£ Reorder Predictor:")
print("   üì• INPUT  - current_stock, days_to_expire, shelf_life_days, return_rate, category_encoded")
print("   üì§ OUTPUT - reorder_level")

print("4Ô∏è‚É£ Stockout Classifier:")
print("   üì• INPUT  - units_sold, discount, day_number, is_festival, category_encoded, preferred, reorder_level")
print("   üì§ OUTPUT - stockout_risk (0 or 1)")

print("5Ô∏è‚É£ Supplier Delay:")
print("   üì• INPUT  - default_lead_days, avg_delivery_delay, quality_score, return_rate, preferred, supply_cost_index")
print("   üì§ OUTPUT - on_time_delivery_rate")

print("6Ô∏è‚É£ Profit Margin:")
print("   üì• INPUT  - price, competitor_price, discount, quality_score, category_encoded")
print("   üì§ OUTPUT - profit_margin")

print("7Ô∏è‚É£ Holiday Classifier:")
print("   üì• INPUT  - day_number, is_weekend, is_festival, season_encoded, week_of_year")
print("   üì§ OUTPUT - holiday (0 or 1)")

print("8Ô∏è‚É£ Order Delay:")
print("   üì• INPUT  - quantity_ordered, avg_delivery_delay, quality_score, category_encoded")
print("   üì§ OUTPUT - delay_days")



üì• INPUTS and üì§ OUTPUTS for each model (Enhanced Version):
1Ô∏è‚É£ Demand Forecasting:
   üì• INPUT  - price, discount, day_of_week, month, is_weekend, is_festival, season_encoded, category_encoded, shelf_life_days
   üì§ OUTPUT - units_sold
2Ô∏è‚É£ Dynamic Pricing:
   üì• INPUT  - discount, competitor_price, profit_margin, cost_price, category_encoded, preferred
   üì§ OUTPUT - price
3Ô∏è‚É£ Reorder Predictor:
   üì• INPUT  - current_stock, days_to_expire, shelf_life_days, return_rate, category_encoded
   üì§ OUTPUT - reorder_level
4Ô∏è‚É£ Stockout Classifier:
   üì• INPUT  - units_sold, discount, day_number, is_festival, category_encoded, preferred, reorder_level
   üì§ OUTPUT - stockout_risk (0 or 1)
5Ô∏è‚É£ Supplier Delay:
   üì• INPUT  - default_lead_days, avg_delivery_delay, quality_score, return_rate, preferred, supply_cost_index
   üì§ OUTPUT - on_time_delivery_rate
6Ô∏è‚É£ Profit Margin:
   üì• INPUT  - price, competitor_price, discount, quality_score, categor

In [4]:
# -------- Test Cell to Predict using All Models --------
print("\nüîç Sample predictions from each model (Enhanced Version):")

# 1Ô∏è‚É£ Demand Forecasting
# Inputs: price, discount, day_of_week, month, is_weekend, is_festival, season_encoded, category_encoded, shelf_life_days
print("Demand Forecast ‚Üí", m1.predict([[120.0, 10.0, 2, 5, 0, 1, 3, 1, 30]])[0])

# 2Ô∏è‚É£ Dynamic Pricing
# Inputs: discount, competitor_price, profit_margin, cost_price, category_encoded, preferred
print("Dynamic Pricing ‚Üí", m2.predict([[5.0, 130.0, 12.5, 100.0, 2, 1]])[0])

# 3Ô∏è‚É£ Reorder Level
# Inputs: current_stock, days_to_expire, shelf_life_days, return_rate, category_encoded
print("Reorder Level ‚Üí", m3.predict([[50, 10, 60, 0.05, 2]])[0])

# 4Ô∏è‚É£ Stockout Risk
# Inputs: units_sold, discount, day_number, is_festival, category_encoded, preferred, reorder_level
print("Stockout Risk ‚Üí", m4.predict([[100, 10.0, 15, 1, 2, 1, 40]])[0])

# 5Ô∏è‚É£ Supplier Delay
# Inputs: default_lead_days, avg_delivery_delay, quality_score, return_rate, preferred, supply_cost_index
print("Supplier Delay ‚Üí", m5.predict([[7, 2.0, 0.85, 0.03, 1, 1.1]])[0])

# 6Ô∏è‚É£ Profit Margin
# Inputs: price, competitor_price, discount, quality_score, category_encoded
print("Profit Margin ‚Üí", m6.predict([[150.0, 130.0, 5.0, 0.9, 2]])[0])

# 7Ô∏è‚É£ Holiday Classifier
# Inputs: day_number, is_weekend, is_festival, season_encoded, week_of_year
print("Holiday Classifier ‚Üí", m7.predict([[3, 1, 1, 2, 12]])[0])

# 8Ô∏è‚É£ Order Delay
# Inputs: quantity_ordered, avg_delivery_delay, quality_score, category_encoded
print("Order Delay ‚Üí", m8.predict([[25, 2.0, 0.9, 2]])[0])



üîç Sample predictions from each model (Enhanced Version):
Demand Forecast ‚Üí 69.33
Dynamic Pricing ‚Üí 68.89888332928938
Reorder Level ‚Üí 67.44342777841547
Stockout Risk ‚Üí 1
Supplier Delay ‚Üí 89.28288121491829
Profit Margin ‚Üí 9.742784907948302
Holiday Classifier ‚Üí 0
Order Delay ‚Üí 2.24


