In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from Classes import LinearRegressionModel, NeuralNetworkModel, SVRModel, XGBoostModel

dataset_paths = [
    "daily_dbs/sortino_daily_HighCap.csv"
]

# Load and split data
def load_and_split_data(file_path, dataset_name):
    df = pd.read_csv(file_path).drop(columns=['Portfolio_Returns', 'Unnamed: 0', 'Date'])
    
    # Split into features (X) and target (y)
    X = df.drop(columns='Sortino_Ratio')
    y = df['Sortino_Ratio']

    # Normalize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Save the scaler for future use
    with open(f"{dataset_name}_scaler_mc.pkl", "wb") as f:
        pickle.dump(scaler, f)
        print(f"Saved scaler to {dataset_name}_scaler_mc.pkl")

    # Split data into train (70%), validation (15%), and test (15%)
    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    return X_train, X_val, X_test, y_train, y_val, y_test

# Train and save models
def train_models(X_train, y_train, X_val, y_val, dataset_name):
    models = {}

    print("\nTraining SVR Model...")
    svr_model = SVRModel()
    svr_model.fit(X_train, y_train)
    svr_model.evaluate(X_val, y_val)
    models["SVR"] = svr_model

    # Save models
    for model_name, model in models.items():
        with open(f"{dataset_name}_{model_name}_mc.pkl", "wb") as f:
            pickle.dump(model, f)
        print(f"Saved {model_name} model to {dataset_name}_{model_name}_mc.pkl")

    return models

# Plot predictions vs actuals
def plot_model_results(models, X_test, y_test, dataset_name):
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()

    for ax, (name, model) in zip(axes, models.items()):
        preds = model.predict(X_test)
        ax.scatter(y_test, preds, alpha=0.6, label='Predictions')
        ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', label='Ideal')
        ax.set_xlabel("Actual Sortino Ratio")
        ax.set_ylabel("Predicted Sortino Ratio")
        ax.set_title(f"{name} - {dataset_name}")
        ax.legend()

    plt.suptitle(f"Model Predictions vs Actual Sortino Ratio - {dataset_name}")
    plt.tight_layout()
    plt.show()

# Main process
def main():
    for i, dataset_path in enumerate(dataset_paths):
        dataset_name = f"Dataset_{i+1}"

        print(f"\nProcessing {dataset_name}...\n")
        X_train, X_val, X_test, y_train, y_val, y_test = load_and_split_data(dataset_path, dataset_name)

        models = train_models(X_train, y_train, X_val, y_val, dataset_name)

        print(f"\nFinal evaluation on test set for {dataset_name}:")
        for name, model in models.items():
            print(f"\n{name} Model:")
            model.evaluate(X_test, y_test)

        plot_model_results(models, X_test, y_test, dataset_name)

if __name__ == "__main__":
    main()



Processing Dataset_1...

Saved scaler to Dataset_1_scaler_mc.pkl

Training SVR Model...
