In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [None]:
# Set your folder path
folder_path = "data/"

#load  dataset
df=pd.read_csv(folder_path+ "inventory_delivery_forecast_data.csv")



In [None]:
# Sort by date to ensure proper time-series order
df = df.sort_values("delivery_date").reset_index(drop=True)

df.head()

In [None]:
# 2. Feature Engineering
df_fe = df.copy()

# Convert 'delivery_date' to datetime objects
df_fe["delivery_date"] = pd.to_datetime(df_fe["delivery_date"])

# Create lag features for each inventory column
for col in df.columns:
    if col not in ["delivery_date"]:
        df_fe[f"{col}_lag1"] = df_fe[col].shift(1)
        df_fe[f"{col}_lag2"] = df_fe[col].shift(2)

# Rolling averages (last 2 deliveries)
for col in df.columns:
    if col not in ["delivery_date"]:
        df_fe[f"{col}_roll2"] = df_fe[col].rolling(window=2).mean().shift(1)

# Calendar features
df_fe["day_of_week"] = df_fe["delivery_date"].dt.dayofweek
df_fe["month"] = df_fe["delivery_date"].dt.month

# Drop first rows with NaN from lags/rolling
df_fe = df_fe.dropna().reset_index(drop=True)

df_fe.head()

In [None]:
# 3. Train/Test Split
target_cols = ["wings", "tenders", "fries_reg", "fries_large", "veggies", "dips", "drinks", "flavours"]

X = df_fe.drop(columns=["delivery_date"] + target_cols)
y = df_fe[target_cols]

# Use last 20% of data for testing
split_index = int(len(df_fe) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

X_train.shape, X_test.shape


In [None]:
# 4. Train Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)




In [None]:
# 4b. Train Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)

In [None]:
# 5. Evaluation Function
def evaluate_model(name, model, X_test, y_test):
    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)
    print(f"{name} Performance:")
    print(f"  MAE:  {mae:.2f}")
    print(f"  RMSE: {rmse:.2f}")
    print(f"  R²:   {r2:.3f}")
    print("-" * 40)
    return preds

# Evaluate both models
lr_preds = evaluate_model("Linear Regression", lr_model, X_test, y_test)
ridge_preds = evaluate_model("Ridge Regression", ridge_model, X_test, y_test)


In [None]:
print("y_test['wings'] values:", y_test["wings"].values)
print("LR Predictions shape and values:", lr_preds.shape, lr_preds[:, 0])
print("Ridge Predictions shape and values:", ridge_preds.shape, ridge_preds[:, 0])


In [None]:
plt.plot(lr_preds[:, 0], label="LR Prediction", marker='x')
plt.legend()
plt.show()


In [None]:
# 6. Forecast Visualization for one target (e.g., wings)
plt.figure(figsize=(10,5))
plt.plot(y_test["wings"].values, label="Actual Wings", marker='o')
plt.plot(ridge_preds[:, 0], label="Ridge Prediction", marker='s')
plt.title("Wings Forecast - Actual vs Predicted")
plt.xlabel("Test Period")
plt.ylabel("Quantity")
plt.legend()
plt.show()


In [None]:
#get the accuracy for LinearRegression
lr_model.score(X_test, y_test)

In [None]:
#get the accuracy for Ridge
ridge_model.score(X_test, y_test)

In [None]:
#save the model LR
import joblib
joblib.dump(lr_model, 'lr_model.pkl')


In [None]:
#sane the model Ridge
joblib.dump(ridge_model, 'ridge_model.pkl')