In [2]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split

In [25]:
# Load data
df = pd.read_csv("data_with_spread.csv")

# Define target variable
target = "monthly_return"

# Drop non-numeric columns
X = df.select_dtypes(include=["number"]).drop(columns=[target], errors="ignore")
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions (In-Sample)
y_train_pred_rf = rf_model.predict(X_train)

# Predictions (Out-of-Sample)
y_test_pred_rf = rf_model.predict(X_test)

# Evaluate Model - In-Sample (Training)
mse_rf_train = mean_squared_error(y_train, y_train_pred_rf)
mae_rf_train = mean_absolute_error(y_train, y_train_pred_rf)
r2_rf_train = r2_score(y_train, y_train_pred_rf)

# Evaluate Model - Out-of-Sample (Testing)
mse_rf_test = mean_squared_error(y_test, y_test_pred_rf)
mae_rf_test = mean_absolute_error(y_test, y_test_pred_rf)
r2_rf_test = r2_score(y_test, y_test_pred_rf)

print(f"\n📊 Random Forest (General Model) - Training Set:")
print(f"MSE: {mse_rf_train:.4f}, MAE: {mae_rf_train:.4f}, R²: {r2_rf_train:.4f}")

print(f"\n📊 Random Forest (General Model) - Test Set:")
print(f"MSE: {mse_rf_test:.4f}, MAE: {mae_rf_test:.4f}, R²: {r2_rf_test:.4f}")


📊 Random Forest (General Model) - Training Set:
MSE: 0.0052, MAE: 0.0426, R²: 0.8961

📊 Random Forest (General Model) - Test Set:
MSE: 0.0283, MAE: 0.1129, R²: 0.3216


In [26]:
sector_models = {}  # Dictionary to store trained models
sector_results_rf = []  # List to store sector results

# Loop through each sector and train a separate model
for sector in df["Sector"].unique():
    print(f"\n🌍 Training Random Forest for Sector: {sector}")

    # Filter data for this sector
    sector_data = df[df["Sector"] == sector]

    # Define X and y
    X_sector = sector_data.select_dtypes(include=["number"]).drop(columns=[target], errors="ignore")
    y_sector = sector_data[target]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_sector, y_sector, test_size=0.2, random_state=42)

    # Train Random Forest Model
    rf_sector = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_sector.fit(X_train, y_train)

    # Store trained model and data for later use
    sector_models[sector] = {"rf_model": rf_sector, "X_train": X_train}

    # Predictions - In-Sample
    y_train_pred_rf_sector = rf_sector.predict(X_train)

    # Predictions - Out-of-Sample
    y_test_pred_rf_sector = rf_sector.predict(X_test)

    # Evaluate Model - In-Sample (Training)
    mse_rf_train_sector = mean_squared_error(y_train, y_train_pred_rf_sector)
    mae_rf_train_sector = mean_absolute_error(y_train, y_train_pred_rf_sector)
    r2_rf_train_sector = r2_score(y_train, y_train_pred_rf_sector)

    # Evaluate Model - Out-of-Sample (Testing)
    mse_rf_test_sector = mean_squared_error(y_test, y_test_pred_rf_sector)
    mae_rf_test_sector = mean_absolute_error(y_test, y_test_pred_rf_sector)
    r2_rf_test_sector = r2_score(y_test, y_test_pred_rf_sector)

    print(f"\n📊 Sector: {sector} → Training Set:")
    print(f"MSE: {mse_rf_train_sector:.4f}, MAE: {mae_rf_train_sector:.4f}, R²: {r2_rf_train_sector:.4f}")

    print(f"\n📊 Sector: {sector} → Test Set:")
    print(f"MSE: {mse_rf_test_sector:.4f}, MAE: {mae_rf_test_sector:.4f}, R²: {r2_rf_test_sector:.4f}")

    # Store results
    sector_results_rf.append({
        "Sector": sector,
        "MSE_Train": mse_rf_train_sector,
        "MAE_Train": mae_rf_train_sector,
        "R²_Train": r2_rf_train_sector,
        "MSE_Test": mse_rf_test_sector,
        "MAE_Test": mae_rf_test_sector,
        "R²_Test": r2_rf_test_sector
    })

# Convert results to DataFrame and save
rf_results_df = pd.DataFrame(sector_results_rf)
rf_results_df.to_csv("random_forest_sector_results_initial.csv", index=False)

print("\n✅ 📂 Sector-Specific Random Forest Models Trained & Stored Successfully!")



🌍 Training Random Forest for Sector: Health Care

📊 Sector: Health Care → Training Set:
MSE: 0.0043, MAE: 0.0470, R²: 0.8610

📊 Sector: Health Care → Test Set:
MSE: 0.0269, MAE: 0.1281, R²: 0.1680

🌍 Training Random Forest for Sector: Information Technology

📊 Sector: Information Technology → Training Set:
MSE: 0.0056, MAE: 0.0519, R²: 0.8967

📊 Sector: Information Technology → Test Set:
MSE: 0.0410, MAE: 0.1411, R²: 0.1858

🌍 Training Random Forest for Sector: Financials

📊 Sector: Financials → Training Set:
MSE: 0.0026, MAE: 0.0351, R²: 0.9281

📊 Sector: Financials → Test Set:
MSE: 0.0221, MAE: 0.1003, R²: 0.4796

🌍 Training Random Forest for Sector: Consumer Staples

📊 Sector: Consumer Staples → Training Set:
MSE: 0.0030, MAE: 0.0394, R²: 0.8547

📊 Sector: Consumer Staples → Test Set:
MSE: 0.0135, MAE: 0.0888, R²: -0.1087

🌍 Training Random Forest for Sector: Industrials

📊 Sector: Industrials → Training Set:
MSE: 0.0120, MAE: 0.0485, R²: 0.8640

📊 Sector: Industrials → Test Set:
M

In [28]:
import pandas as pd

# 🛠️ **Ensure Sector-Based Results Exist**
if "rf_results_df" not in globals():
    raise ValueError("🚨 `rf_results_df` is missing. Make sure the sector-based results are stored correctly.")

# 🛠️ **Create General Model Results DataFrame**
general_rf_results = {
    "Sector": ["General Model"],
    "MSE_Train": [mse_rf_train],
    "MAE_Train": [mae_rf_train],
    "R²_Train": [r2_rf_train],
    "MSE_Test": [mse_rf_test],
    "MAE_Test": [mae_rf_test],
    "R²_Test": [r2_rf_test]
}

general_rf_df = pd.DataFrame(general_rf_results)

# 🛠️ **Ensure Column Names Match Before Merging**
rf_results_df.rename(columns={"MSE_Train": "MSE_Train", "MAE_Train": "MAE_Train", "R²_Train": "R²_Train",
                              "MSE_Test": "MSE_Test", "MAE_Test": "MAE_Test", "R²_Test": "R²_Test"}, inplace=True)
general_rf_df.rename(columns={"MSE_Train": "MSE_Train", "MAE_Train": "MAE_Train", "R²_Train": "R²_Train",
                              "MSE_Test": "MSE_Test", "MAE_Test": "MAE_Test", "R²_Test": "R²_Test"}, inplace=True)

# 🛠️ **Merge General & Sector-Specific Results**
full_rf_results_df = pd.concat([general_rf_df, rf_results_df], ignore_index=True)

# 🛠️ **Save All Results**
full_rf_results_df.to_csv("random_forest_all_results_initial.csv", index=False)

print("\n✅ 📂 Initial Random Forest results (with in-sample and out-of-sample metrics) saved successfully!")

# 🛠️ **Load and Check Merged Results**
df_check = pd.read_csv("random_forest_all_results_initial.csv")
print("\n📊 Merged Results Preview:")
print(df_check.head())



✅ 📂 Initial Random Forest results (with in-sample and out-of-sample metrics) saved successfully!

📊 Merged Results Preview:
                   Sector  MSE_Train  MAE_Train  R²_Train  MSE_Test  MAE_Test  \
0           General Model   0.005176   0.042584  0.896052  0.028256  0.112904   
1             Health Care   0.004335   0.047012  0.860980  0.026873  0.128071   
2  Information Technology   0.005640   0.051891  0.896744  0.040978  0.141100   
3              Financials   0.002629   0.035142  0.928080  0.022137  0.100258   
4        Consumer Staples   0.003013   0.039352  0.854687  0.013550  0.088757   

    R²_Test  
0  0.321601  
1  0.167992  
2  0.185823  
3  0.479578  
4 -0.108669  


In [17]:
# Compute feature importance for the general model
general_feature_importance = pd.Series(rf_model.feature_importances_, index=X.columns)
general_feature_importance = general_feature_importance.sort_values(ascending=False)

# Plot general model feature importance
plt.figure(figsize=(12, 6))
sns.barplot(x=general_feature_importance.values[:15], y=general_feature_importance.index[:15])
plt.xlabel("Feature Importance")
plt.ylabel("Features")
plt.title("Top 15 Feature Importances - General Model")

# Save the figure
plt.savefig(f"general_model_feature_importance.png", bbox_inches="tight")
plt.close()

print("\n✅ 📂 General Model Feature Importance Graph Saved Successfully!")


✅ 📂 General Model Feature Importance Graph Saved Successfully!


In [19]:
import pandas as pd

# Compute feature importance for the general model
general_feature_importance = pd.Series(rf_model.feature_importances_, index=X.columns)
general_feature_importance = general_feature_importance.sort_values(ascending=False)

# Convert to DataFrame
general_feature_importance_df = general_feature_importance.reset_index()
general_feature_importance_df.columns = ["Feature", "Importance"]

# Save to CSV
general_feature_importance_df.to_csv("general_feature_importance.csv", index=False)

print("\n✅ 📂 General Model Feature Importance CSV Saved Successfully!")


✅ 📂 General Model Feature Importance CSV Saved Successfully!


In [14]:
import os

# Define folder path
save_path = "sector_feature_importance"

# Create the folder if it doesn't exist
os.makedirs(save_path, exist_ok=True)

sector_feature_importance = {}

# Loop through each sector and compute feature importance
for sector, model_data in sector_models.items():
    print(f"\n📊 Computing Feature Importance for Sector: {sector}")

    # Get trained sector model and feature set
    rf_sector = model_data["rf_model"]
    X_sector = model_data["X_train"]

    # Compute feature importance
    feature_importance_sector = pd.Series(rf_sector.feature_importances_, index=X_sector.columns)
    feature_importance_sector = feature_importance_sector.sort_values(ascending=False)

    # Save for later analysis
    sector_feature_importance[sector] = feature_importance_sector

    # Plot sector feature importance
    plt.figure(figsize=(12, 6))
    sns.barplot(x=feature_importance_sector.values[:15], y=feature_importance_sector.index[:15])
    plt.xlabel("Feature Importance")
    plt.ylabel("Features")
    plt.title(f"Top 15 Feature Importances - {sector}")

    # Save the figure
    plt.savefig(f"{save_path}/{sector}_feature_importance.png", bbox_inches="tight")
    plt.close()  # Close plot to prevent display overflow

print("\n✅ 📂 Feature Importance Graphs for Each Sector Saved Successfully!")



📊 Computing Feature Importance for Sector: Health Care

📊 Computing Feature Importance for Sector: Information Technology

📊 Computing Feature Importance for Sector: Financials

📊 Computing Feature Importance for Sector: Consumer Staples

📊 Computing Feature Importance for Sector: Industrials

📊 Computing Feature Importance for Sector: Utilities

📊 Computing Feature Importance for Sector: Materials

📊 Computing Feature Importance for Sector: Real Estate

📊 Computing Feature Importance for Sector: Consumer Discretionary

📊 Computing Feature Importance for Sector: Energy

📊 Computing Feature Importance for Sector: Communication Services

✅ 📂 Feature Importance Graphs for Each Sector Saved Successfully!


In [3]:
from xgboost import XGBRegressor

# Load dataset
df = pd.read_csv("data_with_spread.csv")

# Define target variable
target = "monthly_return"

# Drop non-numeric columns
X = df.select_dtypes(include=["number"]).drop(columns=[target], errors="ignore")
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost Model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)

# Predictions - In-Sample
y_train_pred_xgb = xgb_model.predict(X_train)

# Predictions - Out-of-Sample
y_test_pred_xgb = xgb_model.predict(X_test)

# Evaluate Model - In-Sample
mse_train_xgb = mean_squared_error(y_train, y_train_pred_xgb)
mae_train_xgb = mean_absolute_error(y_train, y_train_pred_xgb)
r2_train_xgb = r2_score(y_train, y_train_pred_xgb)

# Evaluate Model - Out-of-Sample
mse_test_xgb = mean_squared_error(y_test, y_test_pred_xgb)
mae_test_xgb = mean_absolute_error(y_test, y_test_pred_xgb)
r2_test_xgb = r2_score(y_test, y_test_pred_xgb)

print(f"📊 General XGBoost Model - MSE (Train): {mse_train_xgb:.4f}, MSE (Test): {mse_test_xgb:.4f}")
print(f"📊 General XGBoost Model - MAE (Train): {mae_train_xgb:.4f}, MAE (Test): {mae_test_xgb:.4f}")
print(f"📊 General XGBoost Model - R² (Train): {r2_train_xgb:.4f}, R² (Test): {r2_test_xgb:.4f}")

# Store general results
general_xgb_results = pd.DataFrame([["General Model", mse_train_xgb, mae_train_xgb, r2_train_xgb,
                                      mse_test_xgb, mae_test_xgb, r2_test_xgb]],
                                   columns=["Sector", "MSE_Train", "MAE_Train", "R²_Train",
                                            "MSE_Test", "MAE_Test", "R²_Test"])



📊 General XGBoost Model - MSE (Train): 0.0069, MSE (Test): 0.0263
📊 General XGBoost Model - MAE (Train): 0.0631, MAE (Test): 0.1109
📊 General XGBoost Model - R² (Train): 0.8610, R² (Test): 0.3682


In [4]:
sector_models_xgb = {}  # Dictionary to store trained models
sector_results_xgb = []

# Loop through each sector and train a separate model
for sector in df["Sector"].unique():
    print(f"\n🚀 Training XGBoost for Sector: {sector}")

    # Filter data for this sector
    sector_data = df[df["Sector"] == sector]

    # Define X and y
    X_sector = sector_data.select_dtypes(include=["number"]).drop(columns=[target], errors="ignore")
    y_sector = sector_data[target]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_sector, y_sector, test_size=0.2, random_state=42)

    # Train XGBoost Model
    xgb_sector = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    xgb_sector.fit(X_train, y_train)

    # Store trained model
    sector_models_xgb[sector] = {"xgb_model": xgb_sector, "X_train": X_train}

    # Predictions - In-Sample
    y_train_pred_xgb = xgb_sector.predict(X_train)

    # Predictions - Out-of-Sample
    y_test_pred_xgb = xgb_sector.predict(X_test)

    # Evaluate Model - In-Sample
    mse_train = mean_squared_error(y_train, y_train_pred_xgb)
    mae_train = mean_absolute_error(y_train, y_train_pred_xgb)
    r2_train = r2_score(y_train, y_train_pred_xgb)

    # Evaluate Model - Out-of-Sample
    mse_test = mean_squared_error(y_test, y_test_pred_xgb)
    mae_test = mean_absolute_error(y_test, y_test_pred_xgb)
    r2_test = r2_score(y_test, y_test_pred_xgb)

    print(f"📊 Sector: {sector} → MSE (Train): {mse_train:.4f}, MSE (Test): {mse_test:.4f}")
    print(f"📊 Sector: {sector} → MAE (Train): {mae_train:.4f}, MAE (Test): {mae_test:.4f}")
    print(f"📊 Sector: {sector} → R² (Train): {r2_train:.4f}, R² (Test): {r2_test:.4f}")

    # Store results
    sector_results_xgb.append([sector, mse_train, mae_train, r2_train, mse_test, mae_test, r2_test])

# Convert results to DataFrame
xgb_results_df = pd.DataFrame(sector_results_xgb,
                              columns=["Sector", "MSE_Train", "MAE_Train", "R²_Train",
                                       "MSE_Test", "MAE_Test", "R²_Test"])



🚀 Training XGBoost for Sector: Health Care
📊 Sector: Health Care → MSE (Train): 0.0003, MSE (Test): 0.0243
📊 Sector: Health Care → MAE (Train): 0.0135, MAE (Test): 0.1156
📊 Sector: Health Care → R² (Train): 0.9895, R² (Test): 0.2488

🚀 Training XGBoost for Sector: Information Technology
📊 Sector: Information Technology → MSE (Train): 0.0009, MSE (Test): 0.0401
📊 Sector: Information Technology → MAE (Train): 0.0231, MAE (Test): 0.1401
📊 Sector: Information Technology → R² (Train): 0.9828, R² (Test): 0.2042

🚀 Training XGBoost for Sector: Financials
📊 Sector: Financials → MSE (Train): 0.0007, MSE (Test): 0.0227
📊 Sector: Financials → MAE (Train): 0.0202, MAE (Test): 0.1014
📊 Sector: Financials → R² (Train): 0.9800, R² (Test): 0.4654

🚀 Training XGBoost for Sector: Consumer Staples
📊 Sector: Consumer Staples → MSE (Train): 0.0000, MSE (Test): 0.0157
📊 Sector: Consumer Staples → MAE (Train): 0.0050, MAE (Test): 0.0934
📊 Sector: Consumer Staples → R² (Train): 0.9978, R² (Test): -0.2855

🚀 

In [5]:
# Merge sector-specific XGBoost results with general model
xgb_comparison_df = pd.concat([xgb_results_df, general_xgb_results], ignore_index=True)

# Format numbers for better readability
for col in ["MSE_Train", "MSE_Test", "MAE_Train", "MAE_Test", "R²_Train", "R²_Test"]:
    xgb_comparison_df[col] = xgb_comparison_df[col].apply(lambda x: f"{x:.6f}")

# Save as CSV
xgb_comparison_df.to_csv("xgb_sector_vs_general.csv", index=False)

# Display results
print("\n✅ 📂 XGBoost results saved successfully!")
print(xgb_comparison_df)


✅ 📂 XGBoost results saved successfully!
                    Sector MSE_Train MAE_Train  R²_Train  MSE_Test  MAE_Test  \
0              Health Care  0.000327  0.013535  0.989524  0.024262  0.115586   
1   Information Technology  0.000941  0.023084  0.982773  0.040055  0.140084   
2               Financials  0.000733  0.020222  0.979953  0.022739  0.101429   
3         Consumer Staples  0.000045  0.005040  0.997849  0.015712  0.093415   
4              Industrials  0.001457  0.029098  0.983514  0.027321  0.115482   
5                Utilities  0.000038  0.004546  0.998616  0.010904  0.068900   
6                Materials  0.000067  0.006097  0.998427  0.030943  0.134314   
7              Real Estate  0.000070  0.006264  0.997601  0.030891  0.114346   
8   Consumer Discretionary  0.000253  0.012424  0.995701  0.049882  0.152323   
9                   Energy  0.000078  0.006862  0.999071  0.061534  0.174629   
10  Communication Services  0.000032  0.004292  0.999278  0.045502  0.139324   

In [13]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

# Scale Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train MLP Neural Network with improved parameters
mlp_model = MLPRegressor(hidden_layer_sizes=(64, 32), activation='relu', solver='adam',
                         alpha=0.01, learning_rate_init=0.01, max_iter=1000, early_stopping=True,
                         random_state=42)
mlp_model.fit(X_train_scaled, y_train)

# Predictions - In-Sample
y_train_pred_mlp = mlp_model.predict(X_train_scaled)

# Predictions - Out-of-Sample
y_test_pred_mlp = mlp_model.predict(X_test_scaled)

# Evaluate Model - In-Sample
mse_train_mlp = mean_squared_error(y_train, y_train_pred_mlp)
mae_train_mlp = mean_absolute_error(y_train, y_train_pred_mlp)
r2_train_mlp = r2_score(y_train, y_train_pred_mlp)

# Evaluate Model - Out-of-Sample
mse_test_mlp = mean_squared_error(y_test, y_test_pred_mlp)
mae_test_mlp = mean_absolute_error(y_test, y_test_pred_mlp)
r2_test_mlp = r2_score(y_test, y_test_pred_mlp)

print(f"📊 General MLP Model - MSE (Train): {mse_train_mlp:.4f}, MSE (Test): {mse_test_mlp:.4f}")
print(f"📊 General MLP Model - MAE (Train): {mae_train_mlp:.4f}, MAE (Test): {mae_test_mlp:.4f}")
print(f"📊 General MLP Model - R² (Train): {r2_train_mlp:.4f}, R² (Test): {r2_test_mlp:.4f}")

# Store general results
general_mlp_results = pd.DataFrame([["General Model", mse_train_mlp, mae_train_mlp, r2_train_mlp,
                                      mse_test_mlp, mae_test_mlp, r2_test_mlp]],
                                   columns=["Sector", "MSE_Train", "MAE_Train", "R²_Train",
                                            "MSE_Test", "MAE_Test", "R²_Test"])


📊 General MLP Model - MSE (Train): 0.4140, MSE (Test): 0.6531
📊 General MLP Model - MAE (Train): 0.4382, MAE (Test): 0.5326
📊 General MLP Model - R² (Train): 0.0473, R² (Test): -0.4488


In [15]:
sector_models_mlp = {}  # Dictionary to store trained models
sector_results_mlp = []

# Loop through each sector and train a separate model
for sector in df["Sector"].unique():
    print(f"\n🚀 Training MLP Neural Network for Sector: {sector}")

    # Filter data for this sector
    sector_data = df[df["Sector"] == sector]

    # Define X and y
    X_sector = sector_data.select_dtypes(include=["number"]).drop(columns=[target], errors="ignore")
    y_sector = sector_data[target]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_sector, y_sector, test_size=0.2, random_state=42)

    # Scale Data (Sector-Specific)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train MLP Neural Network
    mlp_sector = MLPRegressor(hidden_layer_sizes=(64, 32), activation='relu', solver='adam',
                              alpha=0.01, learning_rate_init=0.01, max_iter=1000, early_stopping=True,
                              random_state=42)
    mlp_sector.fit(X_train_scaled, y_train)

    # Store trained model
    sector_models_mlp[sector] = {"mlp_model": mlp_sector, "X_train": X_train}

    # Predictions - In-Sample
    y_train_pred_mlp = mlp_sector.predict(X_train_scaled)

    # Predictions - Out-of-Sample
    y_test_pred_mlp = mlp_sector.predict(X_test_scaled)

    # Evaluate Model - In-Sample
    mse_train = mean_squared_error(y_train, y_train_pred_mlp)
    mae_train = mean_absolute_error(y_train, y_train_pred_mlp)
    r2_train = r2_score(y_train, y_train_pred_mlp)

    # Evaluate Model - Out-of-Sample
    mse_test = mean_squared_error(y_test, y_test_pred_mlp)
    mae_test = mean_absolute_error(y_test, y_test_pred_mlp)
    r2_test = r2_score(y_test, y_test_pred_mlp)

    print(f"📊 Sector: {sector} → MSE (Train): {mse_train:.4f}, MSE (Test): {mse_test:.4f}")
    print(f"📊 Sector: {sector} → MAE (Train): {mae_train:.4f}, MAE (Test): {mae_test:.4f}")
    print(f"📊 Sector: {sector} → R² (Train): {r2_train:.4f}, R² (Test): {r2_test:.4f}")

    # Store results
    sector_results_mlp.append([sector, mse_train, mae_train, r2_train, mse_test, mae_test, r2_test])

# Convert results to DataFrame
mlp_results_df = pd.DataFrame(sector_results_mlp,
                              columns=["Sector", "MSE_Train", "MAE_Train", "R²_Train",
                                       "MSE_Test", "MAE_Test", "R²_Test"])



🚀 Training MLP Neural Network for Sector: Health Care
📊 Sector: Health Care → MSE (Train): 0.2186, MSE (Test): 0.3397
📊 Sector: Health Care → MAE (Train): 0.3266, MAE (Test): 0.3765
📊 Sector: Health Care → R² (Train): 0.1180, R² (Test): -0.0788

🚀 Training MLP Neural Network for Sector: Information Technology
📊 Sector: Information Technology → MSE (Train): 0.4816, MSE (Test): 1.0463
📊 Sector: Information Technology → MAE (Train): 0.4527, MAE (Test): 0.4886
📊 Sector: Information Technology → R² (Train): 0.0127, R² (Test): -0.0119

🚀 Training MLP Neural Network for Sector: Consumer Discretionary
📊 Sector: Consumer Discretionary → MSE (Train): 0.5810, MSE (Test): 21.4687
📊 Sector: Consumer Discretionary → MAE (Train): 0.4450, MAE (Test): 0.7827
📊 Sector: Consumer Discretionary → R² (Train): 0.0508, R² (Test): -42.2080

🚀 Training MLP Neural Network for Sector: Financials
📊 Sector: Financials → MSE (Train): 0.3513, MSE (Test): 0.3064
📊 Sector: Financials → MAE (Train): 0.3457, MAE (Test):