In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import joblib

In [40]:
df = pd.read_csv('vegetable_expiry_data.csv')
df.head()

Unnamed: 0,Name,Temperature (°C),Humidity (%),pH,Microbial Count (CFU/g),Expiry Date (Days)
0,Potato,17.62,57.31,4.79,6404152,0
1,Tomato,19.77,61.77,7.9,4989421,0
2,Cucumber,8.32,36.4,5.05,8594029,4
3,Onion,28.85,71.51,4.6,7700396,0
4,Potato,10.6,83.06,7.83,2763088,0


In [5]:
df["Name"].unique()

array(['Potato', 'Tomato', 'Cucumber', 'Onion', 'Carrot', 'Garlic',
       'Broccoli', 'Lettuce', 'Pepper', 'Spinach'], dtype=object)

In [7]:
map_dict = dict()
for idx, name in enumerate(df["Name"].unique()):
    map_dict[name] = idx

In [9]:
df["Name"] = df["Name"].map(lambda x: map_dict[x])

In [10]:
df["Name"].unique()

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

In [11]:
# Features and target variable
X = df[["Name", "Temperature (°C)", "Humidity (%)", "pH", "Microbial Count (CFU/g)"]]
y = df["Expiry Date (Days)"]

In [12]:
# Standardize the features (optional but recommended for some models)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Step 4: Model - Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [36]:
# Step 5: Predictions
y_pred = model.predict(X_test)

In [32]:
y_pred_ = []
for i in range(len(y_pred)):
    y_pred_.append(int(np.round(y_pred[i])))

In [33]:
y_test[:5], y_pred_[:5]

(521    0
 737    0
 740    9
 660    4
 411    8
 Name: Expiry Date (Days), dtype: int64,
 [0, 2, 5, 2, 7])

In [34]:
accuracy_score(y_test, y_pred_)

0.375

In [37]:
# Step 6: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ac = accuracy_score(y_test, y_pred_)

mse, r2, ac

(5.2032359999999995, 0.31517445609971184, 0.375)

In [38]:
joblib.dump(model, 'vegetable_expiry_model.pkl')

['vegetable_expiry_model.pkl']

In [39]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [48]:
loaded_model = joblib.load('vegetable_expiry_model.pkl')
loaded_scaler = joblib.load('scaler.pkl')

X_test = np.array([[0], [17.62], [57.31], [4.79], [6404152]]).reshape(1, -1)
X_scaled_test = loaded_scaler.transform(X_test)  # Don't forget to scale the new data!
y_pred = loaded_model.predict(X_scaled_test)



In [52]:
np.round(y_pred)

array([0.])