In [2]:
import pandas as pd
from datetime import datetime


In [4]:
df = pd.read_csv("../data/inventory.csv")

# Convert dates
df["purchase_date"] = pd.to_datetime(df["purchase_date"], dayfirst=True)
df["expiry_date"] = pd.to_datetime(df["expiry_date"], dayfirst=True)

# Calculate days to expiry
df["days_to_expiry"] = (df["expiry_date"] - pd.Timestamp.today()).dt.days

In [5]:
from sklearn.preprocessing import LabelEncoder

le_cat = LabelEncoder()
le_storage = LabelEncoder()

df["category_encoded"] = le_cat.fit_transform(df["category"])
df["storage_encoded"] = le_storage.fit_transform(df["storage_condition"])


In [6]:
features = ["days_to_expiry", "quantity", "used_quantity", "category_encoded", "storage_encoded"]
X = df[features]
y = df["waste_risk"]


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "../models/waste_model.pkl")


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      1.00      1.00         1

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6



['../models/waste_model.pkl']