In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import joblib
import shap
import matplotlib.pyplot as plt

In [None]:
merged_data = pd.read_csv("data/merged_data.csv")

In [None]:
X = merged_data[["carbon_emissions", "diversity_ratio", "governance_score", "sentiment", "volatility", "carbon_tax_rate"]]
y = merged_data["risk_score"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [None]:
model = XGBRegressor(n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Model MSE: {mse}")

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, show=False)
plt.savefig("static/shap_summary.png")
plt.close()

In [None]:
joblib.dump(model, "models/esg_risk_model.pkl")