In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

# Load the preprocessed data for training
preprocessed_data_path = r'..\src\utils\preprocessed_data.csv'
data = pd.read_csv(preprocessed_data_path)

# Define features and target variable
target = 'hourly_sales'
all_features = ['item_code', 'hour', 'avg_hourly_sales', 'category', 'hour_of_day', 'day_of_week', 'month']

# Check if the specified features exist in the data
features = [feature for feature in all_features if feature in data.columns]

# Ensure that you have enough features to proceed
if len(features) < 2:
    raise ValueError("Insufficient features for training. Please check your data and feature selection.")

# Split the data into training and testing sets
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

# Save the trained model to a file
model_path = r'..\src\models\sales_forecasting_model.pkl'
joblib.dump(model, model_path)
