#### Importing required libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
X_train = pd.read_csv("x_train_final.csv")
Y_train = pd.read_csv("y_train_final.csv")
X_test = pd.read_csv("x_test_final.csv")
Y_test = pd.read_csv("y_test_final.csv")

In [None]:
# Helper function to evaluate the model
def evaluate_model(model,output, X_train, X_test, Y_train, Y_test):
    train_preds = model.predict(X_train)
    test_preds = model.predict(X_test)

    metrics = {
        "output": output,
        "Train_MSE": mean_squared_error(Y_train[output], train_preds),
        "Train_MAE": mean_absolute_error(Y_train[output], train_preds),
        "Train_R2": r2_score(Y_train[output], train_preds),
        "Test_MSE": mean_squared_error(Y_test[output], test_preds),
        "Test_MAE": mean_absolute_error(Y_test[output], test_preds),
        "Test_R2": r2_score(Y_test[output], test_preds),
    }
    return metrics

#### Traditional ML Models

In [None]:
models = {
    "Linear Regression": LinearRegression(),
    "Extra Tree Regressor": ExtraTreesRegressor(n_estimators = 100, max_depth = 5, max_features = 50,random_state=42),
    "Random Forest Regressor": RandomForestRegressor(n_estimators = 50, max_depth = 5, max_features =50 ,random_state=42),
    "XGB Regressor": xgb.XGBRegressor(n_estimators = 100 , max_depth = 5 , learning_rate = 1e-2 ,objective='reg:squarederror', random_state=42)
}

In [None]:
output_labels = ['RET_ff4','exret_ff4','exret_ff3', 'exret_mkt']

In [None]:
results = {}

# Train and evaluate traditional ML models
for name, model in models.items():
    for output in output_labels:
        print(f"Training {name}... for... {output}")
        model.fit(X_train, Y_train[output])
        results[name] = evaluate_model(model,output, X_train, X_test, Y_train, Y_test)

#### MLP Models

In [None]:
# Define NN1 to NN5 architectures
def build_nn1():
    model = Sequential([
        Dense(50, kernel_initializer='normal', activation='relu'),
        Dense(1, kernel_initializer='normal', activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def build_nn2():
    model = Sequential([
        Dense(100, kernel_initializer='normal', activation='relu'),
        Dense(50, kernel_initializer='normal', activation='relu'),
        Dense(1, kernel_initializer='normal', activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def build_nn3():
    model = Sequential([
        Dense(150, kernel_initializer='normal', activation='relu'),
        Dense(100, kernel_initializer='normal', activation='relu'),
        Dense(50, kernel_initializer='normal', activation='relu'),
        Dense(1, kernel_initializer='normal', activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def build_nn4():
    model = Sequential([
        Dense(150, kernel_initializer='normal', activation='relu'),
        Dense(100, kernel_initializer='normal', activation='relu'),
        Dense(50, kernel_initializer='normal', activation='relu'),
        Dense(25, kernel_initializer='normal', activation='relu'),
        Dense(1, kernel_initializer='normal', activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def build_nn5():
    model = Sequential([
        Dense(150, kernel_initializer='normal', activation='relu'),
        Dense(100, kernel_initializer='normal', activation='relu'),
        Dense(50, kernel_initializer='normal', activation='relu'),
        Dense(25, kernel_initializer='normal', activation='relu'),
        Dense(12, kernel_initializer='normal', activation='relu'),
        Dense(1, kernel_initializer='normal', activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# Train and evaluate NN1 to NN5
nn_models = {
    "NN1": build_nn1(),
    "NN2": build_nn2(),
    "NN3": build_nn3(),
    "NN4": build_nn4(),
    "NN5": build_nn5(),
}

# Train and evaluate each NN
for name, nn_model in nn_models.items():
    for output in output_labels:
        print(f"Training {name}...")
        nn_model.fit(X_train, Y_train[output], epochs=50, batch_size=32, verbose=0, validation_split=0.1)

        train_preds = nn_model.predict(X_train)
        test_preds = nn_model.predict(X_test)

        results[name] = {
            "output": output,
            "Train_MSE": mean_squared_error(Y_train[output], train_preds),
            "Train_MAE": mean_absolute_error(Y_train[output], train_preds),
            "Train_R2": r2_score(Y_train[output], train_preds),
            "Test_MSE": mean_squared_error(Y_test[output], test_preds),
            "Test_MAE": mean_absolute_error(Y_test[output], test_preds),
            "Test_R2": r2_score(Y_test[output], test_preds),
        }

# Combine and display results
results_df = pd.DataFrame(results).T
results_df.index.name = "Model"