In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import joblib

In [None]:
df = pd.read_csv("../../data/processed/combined_data.csv")

In [None]:
df["demand"] = df.groupby("itemID")["order"].transform("sum")

In [None]:
df["demand"].head()

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)

In [None]:
df['time'] = pd.to_datetime(df['time'])
df['year'] = df['time'].dt.year
df['month'] = df['time'].dt.month
df['day'] = df['time'].dt.day
df['dayofweek'] = df['time'].dt.dayofweek
df['hour'] = df['time'].dt.hour
df = df.drop(columns=['time'])

In [None]:
X = df.drop(columns="demand")
y = df["demand"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print(f"Mean Squared Error: {mse}")
print(f"R2 Score: {r2}")

In [None]:
joblib.dump(model, "../../models/random_forest_model.pkl")
