In [None]:



import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


url = 'https://github.com/CharlesCLuo/Application-of-AI-in-Supply-Chain-Risk-Management-Series/raw/main/Demand_Forecsting/random_forest.csv'
data = pd.read_csv(url)


print("Descriptive statistics of the loaded data:")
print(data.describe().round(2))


print("First few rows of the loaded data:")
print(data.head().round(2))



plt.figure(figsize=(12, 6), num='Figure 5.2')
plt.plot(data['Week'], data['Demand'], label='Demand', color='blue')
plt.plot(data['Week'], data['Price_Index'], label='Price Index', linestyle='--', color='orange')
plt.plot(data['Week'], data['Weather_Spikes'], label='Weather Spikes', linestyle='--', color='green')
plt.plot(data['Week'], data['Seasonality'], label='Seasonality', linestyle='--', color='red')
plt.plot(data['Week'], data['Economic_Indicator'], label='Economic Indicator', linestyle='--', color='purple')
plt.title('Figure 5.2 Demand Data and Influencing Factors Over Time')
plt.xlabel('Week')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()



X = data.drop(['Week', 'Demand'], axis=1)
y = data['Demand']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)



y_pred = rf_model.predict(X_test)



mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


print(f"Model Evaluation Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R2 Score): {r2:.2f}")



plt.figure(figsize=(10, 6), num='Figure 5.3')
plt.plot(y_test.values, label='Actual Demand', color='blue')
plt.plot(y_pred, label='Predicted Demand', linestyle='--', color='red')
plt.title('Figure 5.3 Actual vs Predicted Demand')
plt.xlabel('Data Points')
plt.ylabel('Demand')
plt.legend()
plt.grid(True)
plt.show()



feature_importances = rf_model.feature_importances_
features = X.columns
importance_df = pd.DataFrame({'Feature': features, 'Importance': feature_importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)


print("\nFeature Importance Scores:")
print(importance_df)


plt.figure(figsize=(10, 5), num='Figure 5.4')
plt.barh(importance_df['Feature'], importance_df['Importance'], color='skyblue')
plt.title('Figure 5.4 Feature Importance for Random Forest Model')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.gca().invert_yaxis()
plt.show()

