In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
import joblib


In [42]:
df = pd.read_csv("../data/items.csv")

In [43]:
df["Food Item"] = df["Food Item"].astype(str).str.strip()
df["Storage"] = df["Storage"].astype(str).str.strip()
df["Expiry (days)"] = df["Expiry (days)"].astype(int)


In [44]:
print(df.head())
print(df["Expiry (days)"].describe())

  Food Item    Storage  Expiry (days)
0      Milk  Room Temp              2
1      Milk     Fridge              7
2      Milk    Freezer             90
3      Eggs  Room Temp              0
4      Eggs     Fridge             35
count    174.000000
mean     105.609195
std      135.732426
min        0.000000
25%        5.000000
50%       14.000000
75%      180.000000
max      365.000000
Name: Expiry (days), dtype: float64


In [45]:
X = df[["Food Item", "Storage"]]
y = df["Expiry (days)"]

In [46]:
VALID_DAYS = np.sort(df["Expiry (days)"].unique())
VALID_DAYS

array([  0,   1,   2,   4,   5,   6,   7,  10,  14,  21,  30,  35,  60,
        90, 180, 300, 365])

In [47]:
preprocessor = ColumnTransformer(
    transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), ["Food Item", "Storage"])]
)

X_encoded = preprocessor.fit_transform(X)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)


In [49]:
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

In [50]:
y_pred = model.predict(X_test)
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))

Mean Absolute Error: 42.804


In [51]:
sample_food = "Milk"      
sample_storage = "Fridge"

In [52]:
sample_input = pd.DataFrame([[sample_food, sample_storage]], columns=["Food Item", "Storage"])
sample_encoded = preprocessor.transform(sample_input)

In [53]:
prediction = model.predict(sample_encoded)
prediction_rounded = int(round(prediction[0]))

print(f"\nPrediction for {sample_food} stored in {sample_storage}: {prediction_rounded} days")



Prediction for Milk stored in Fridge: 7 days


In [54]:
joblib.dump(model, "food_expiry_model.pkl")
joblib.dump(preprocessor, "preprocessor.pkl")

['preprocessor.pkl']

In [55]:
import pickle

with open("train_model_betterversion.pkl", "wb") as f:
    pickle.dump(model, f)

with open("preprocessor.pkl", "wb") as f:
    pickle.dump(preprocessor, f)

print("Model and preprocessor saved successfully!")


Model and preprocessor saved successfully!
