In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib  # 🔥 for saving models

# Load dataset
df = pd.read_csv("Cleaned_Walmart_Stock_Analysis.csv")

# Preprocess
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')
df['City_encoder'] = LabelEncoder().fit_transform(df['City'])
df['Product_encoder'] = LabelEncoder().fit_transform(df['Product line'])

# Save the encoders (important if we use same logic later)
city_encoder = LabelEncoder()
product_encoder = LabelEncoder()
df['City_encoded'] = city_encoder.fit_transform(df['City'])
df['Product_encoded'] = product_encoder.fit_transform(df['Product line'])

# Features and target
X = df[['Year', 'Month', 'Day', 'Weekday', 'City_encoded', 'Product_encoded', 'Unit price', 'Quantity']]
y = df['Predicted Quantity']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² Score:", r2_score(y_test, y_pred))

# ✅ Save model and encoders
joblib.dump(model, "model.pkl")
joblib.dump(city_encoder, "city_encoder.pkl")
joblib.dump(product_encoder, "product_encoder.pkl")


MAE: 7.02895
RMSE: 8.22299157849502
R² Score: 0.8834388430108585


['product_encoder.pkl']

In [3]:
import joblib

# Save the model and encoders safely
joblib.dump(model, "clean_model.pkl")
joblib.dump(LabelEncoder().fit(df['Product line']), "clean_product_encoder.pkl")
joblib.dump(LabelEncoder().fit(df['City']), "clean_city_encoder.pkl")


['clean_city_encoder.pkl']

In [4]:
import joblib

model = joblib.load("clean_model.pkl")
city_encoder = joblib.load("clean_city_encoder.pkl")
product_encoder = joblib.load("clean_product_encoder.pkl")
