In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import joblib

In [None]:
# Load the trained model
rf_model = joblib.load('../models/rf_model_2023.pkl')

In [None]:
# Load validation data
X_val = pd.read_csv('../data/processed/validation_data.csv')
y_val = pd.read_csv('../data/processed/validation_target.csv')

In [None]:
# Predictions
y_pred = rf_model.predict(X_val)

In [None]:
# Mean Squared Error
mse = mean_squared_error(y_val, y_pred)
print(f"Mean Squared Error: {mse}")

In [None]:
# Feature importance
importances = rf_model.feature_importances_
features = X_val.columns
plt.figure(figsize=(10, 6))
plt.barh(features, importances, color='skyblue')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance for Random Forest Model')
plt.show()

In [None]:
# Confidence intervals
predictions_std = np.std([tree.predict(X_val) for tree in rf_model.estimators_], axis=0)
confidence_intervals = 1.96 * predictions_std

In [None]:
# Plot predictions with confidence intervals
plt.errorbar(range(5), y_pred[:5], yerr=confidence_intervals[:5], fmt='o', color='blue', ecolor='lightgray', elinewidth=3, capsize=5)
plt.title('Predictions with Confidence Intervals (First 5)')
plt.xlabel('Prediction Index')
plt.ylabel('Predicted Sales')
plt.show()