In [2]:
# 📦 Cell 1: Imports
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from datetime import datetime


In [6]:
# 📄 Cell 2: Load the CSV
df = pd.read_csv('pricing_data_with_history.csv')
df.head()


Unnamed: 0,productId,supplierId,supplierName,name,sku,category,currentPrice,originalPrice,cost,stock,...,minStockLevel,isPerishable,expirationDate,daysUntilExpiry,status,lastUpdated,updatedAt,history_price,history_reason,history_date
0,199d91c8-fcb9-4f9c-bd80-27d1d48aff67,ea7897fa-e382-4b3d-9a3a-44d0f9295540,Supplier 9,Chicken Breast,CHI-645,Meat,5.81,6.46,4.55,272,...,19,True,2025-07-21T14:28:43.699830,18.0,stable,2025-07-02T14:28:43.699852,2025-07-02T14:28:43.699852,5.81,Initial creation,2025-07-02T14:28:43.699852
1,e99ceca2-7fe7-405d-b943-ac93e6864c12,3995e366-8aaf-490c-b7b0-2b34d652ff2b,Supplier 7,Baguette,BAG-755,Bakery,3.2,3.76,2.39,0,...,46,False,,,low_stock,2025-07-02T14:28:43.700003,2025-07-02T14:28:43.700003,3.2,Initial creation,2025-07-02T14:28:43.700003
2,1c0fde24-71b1-4109-aebe-b95a6d08e723,4c670941-8123-471c-bfdd-7960ca0ca95e,Supplier 9,Bagels,BAG-313,Bakery,4.62,4.75,3.59,268,...,33,False,,,stable,2025-07-02T14:28:43.700168,2025-07-02T14:28:43.700168,4.62,Initial creation,2025-07-02T14:28:43.700168
3,fb8498bf-0063-4033-ad14-6cce74b132ad,69660683-81b0-46e7-9069-72ef30a411be,Supplier 7,Bagels,BAG-571,Bakery,2.96,3.36,1.92,127,...,20,False,,,stable,2025-07-02T14:28:43.700713,2025-07-02T14:28:43.700713,2.96,Initial creation,2025-07-02T14:28:43.700713
4,0ac1f0a3-3646-4fd9-ba7b-fb4ffe2ae0a4,05cb99c7-2c92-48a3-b85f-be3fc7659e29,Supplier 5,Orange Juice,ORA-759,Beverages,6.75,7.64,5.3,27,...,40,False,,,low_stock,2025-07-02T14:28:43.700885,2025-07-02T14:28:43.700885,6.75,Initial creation,2025-07-02T14:28:43.700885


In [8]:
# 🧠 Cell 3: Feature Engineering
df['isPerishable'] = df['isPerishable'].astype(int)
df['expirationDate'] = pd.to_datetime(df['expirationDate'])
df['daysUntilExpiry'] = (df['expirationDate'] - pd.Timestamp.now()).dt.days.clip(lower=0)

# Optional: check for nulls
df.isnull().sum()


productId           0
supplierId          0
supplierName        0
name                0
sku                 0
category            0
currentPrice        0
originalPrice       0
cost                0
stock               0
maxStock            0
minStockLevel       0
isPerishable        0
expirationDate     28
daysUntilExpiry    28
status              0
lastUpdated         0
updatedAt           0
history_price       0
history_reason      0
history_date        0
dtype: int64

In [10]:
# 🏗️ Cell 4: Define Features and Target
features = ['cost', 'stock', 'maxStock', 'minStockLevel', 'isPerishable', 'daysUntilExpiry']
target = 'currentPrice'

X = df[features]
y = df[target]


In [12]:
# 🔀 Cell 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
# 🤖 Cell 6: Train Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [16]:
# ✅ Cell 7: Evaluate (Optional)
print("Training Score:", model.score(X_train, y_train))
print("Test Score:", model.score(X_test, y_test))


Training Score: 0.9854466290597028
Test Score: 0.8950454476156018


In [18]:
# 💾 Cell 8: Save Model
joblib.dump(model, 'price_optimizer.pkl')
print("✅ Model trained and saved as 'price_optimizer.pkl'")


✅ Model trained and saved as 'price_optimizer.pkl'
