## Import library

In [2]:
import numpy as np
import pandas as pd

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error 

from kan import *
import warnings
import sys
sys.path.append('../utils')
from treasury_base import *

warnings.filterwarnings("ignore")

torch.set_default_dtype(torch.float64)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


## Retrieve data

In [None]:
WINDOW_LIST = [5, 10]
LAG = 1

def train_mse():
    predictions = model(dataset['train_input'])  # Model predictions
    mse = F.mse_loss(predictions, dataset['train_label'], reduction='mean')  # Compute MSE
    return mse ** 0.5  # Return scalar MSE value

def test_mse():
    predictions = model(dataset['test_input']) # Model predictions
    mse = F.mse_loss(predictions, dataset['test_label'], reduction='mean')  # Compute MSE
    return mse ** 0.5
    
df_ma = ma_data_retrieval(window_list=WINDOW_LIST, lag=LAG)
df_ma.head()

## KAN model training

In [25]:
TEST_SIZE = 1
LENGTH = len(df_ma)
TARGETS = df_ma.columns[:12]

# Store results for each fold
fold_results = {'train_mse': [], 'test_mse': [], 'naive_mse': []}

for cnt in range(0, 20, 5):
    print()
    print('WINDOW SLIDING: ', cnt)

    df_window = df_ma[(LENGTH-cnt-250):(LENGTH-cnt)]
    # Prepare data
    X, y = df_window.drop(columns=TARGETS), df_window[TARGETS]

    # scaler = StandardScaler()
    # X = pd.DataFrame(scaler.fit_transform(X))

    n_inputs = X.shape[1]
    n_outputs = y.shape[1]

    X_train, X_test = X[:-TEST_SIZE], X[-TEST_SIZE:]
    y_train, y_test = y[:-TEST_SIZE], y[-TEST_SIZE:]

    dataset = dict()
    dtype = torch.get_default_dtype()
    dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
    dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
    dataset['test_input'] = torch.from_numpy(X_test.values).type(dtype).to(device)
    dataset['test_label'] = torch.from_numpy(y_test.values).type(dtype).to(device)

    # Initialize the model
    model = KAN(width=[n_inputs, 48, 64, n_outputs], grid=4, k=2, seed=42, device=device)

    # Train the model and compute metrics
    results = model.fit(dataset, opt="Adam", lamb=0.0001, lr=0.001, steps=500, metrics=(train_mse, test_mse))
    df_naive = pd.DataFrame([y_train.iloc[-1]] * TEST_SIZE, columns=y_train.columns)
        
    # Store the metrics
    train_error = results['train_mse'][-1]
    test_error = results['test_mse'][-1]
    naive_error = mean_squared_error(df_naive, y_test, squared=False)

    fold_results['train_mse'].append(train_error)
    fold_results['test_mse'].append(test_error)
    fold_results['naive_mse'].append(naive_error)

    # Calculate average metrics across all windows
    print(f'Fold Train MSE: {train_error}')
    print(f'Fold Test MSE: {test_error}')
    print(f'Naive Test MSE: {naive_error}')

avg_train_mse = np.mean(fold_results['train_mse'])
avg_test_mse = np.mean(fold_results['test_mse'])
avg_naive_mse = np.mean(fold_results['naive_mse'])

print()
print("Sliding Window Cross-Validation Results")
print(f"Average Train MSE: {avg_train_mse}")
print(f"Average Test MSE: {avg_test_mse}")
print(f"Average Naive MSE: {avg_naive_mse}")

In [4]:
WINDOW_LIST = [5, 10]
TEST_SIZE = 1
LENGTH = len(df_ma)
TARGETS = df_ma.columns[:12]

# Store results for each fold
fold_results = {'train_mse': [], 'test_mse': [], 'naive_mse': []}

for LAG in range(1, 2):
    df_ma = ma_data_retrieval(window_list=WINDOW_LIST, lag=LAG)

    for cnt in range(0, 2):
        print()
        print(f'WINDOW SLIDING: {cnt}, LAG: {LAG}')

        df_window = df_ma[(LENGTH-cnt-100):(LENGTH-cnt)]
        # Prepare data
        X, y = df_window.drop(columns=TARGETS), df_window[TARGETS]

        # scaler = StandardScaler()
        # X = pd.DataFrame(scaler.fit_transform(X))

        n_inputs = X.shape[1]
        n_outputs = y.shape[1]

        X_train, X_test = X[:-TEST_SIZE], X[-TEST_SIZE:]
        y_train, y_test = y[:-TEST_SIZE], y[-TEST_SIZE:]

        dataset = dict()
        dtype = torch.get_default_dtype()
        dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
        dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
        dataset['test_input'] = torch.from_numpy(X_test.values).type(dtype).to(device)
        dataset['test_label'] = torch.from_numpy(y_test.values).type(dtype).to(device)

        # Initialize the model
        model = KAN(width=[n_inputs, 48, 64, n_outputs], grid=4, k=2, seed=42, device=device)

        # Train the model and compute metrics
        results = model.fit(dataset, opt="Adam", lamb=0.0001, lr=0.001, steps=10, metrics=(train_mse, test_mse))
        df_naive = pd.DataFrame([y_train.iloc[-LAG]] * TEST_SIZE, columns=y_train.columns)
            
        # Store the metrics
        train_error = results['train_mse'][-1]
        test_error = results['test_mse'][-1]
        naive_error = mean_squared_error(df_naive, y_test, squared=False)

        fold_results['train_mse'].append(train_error)
        fold_results['test_mse'].append(test_error)
        fold_results['naive_mse'].append(naive_error)

        # Calculate average metrics across all windows
        print(f'Fold Train MSE: {train_error}')
        print(f'Fold Test MSE: {test_error}')
        print(f'Naive Test MSE: {naive_error}')

avg_train_mse = np.mean(fold_results['train_mse'])
avg_test_mse = np.mean(fold_results['test_mse'])
avg_naive_mse = np.mean(fold_results['naive_mse'])

print()
print("Sliding Window Cross-Validation Results")
print(f"Average Train MSE: {avg_train_mse}")
print(f"Average Test MSE: {avg_test_mse}")
print(f"Average Naive MSE: {avg_naive_mse}")


WINDOW SLIDING: 0, LAG: 1
checkpoint directory created: ./model
saving model version 0.0


| train_loss: 3.74e+00 | test_loss: 3.54e+00 | reg: 2.19e+02 | : 100%|█| 10/10 [00:01<00:00,  7.88it


saving model version 0.1
Fold Train MSE: 3.6459810856273243
Fold Test MSE: 3.5396966201798845
Naive Test MSE: 0.031666666666666655

WINDOW SLIDING: 1, LAG: 1
checkpoint directory created: ./model
saving model version 0.0


| train_loss: 3.75e+00 | test_loss: 3.57e+00 | reg: 2.18e+02 | : 100%|█| 10/10 [00:00<00:00, 11.91it


saving model version 0.1
Fold Train MSE: 3.651486329433173
Fold Test MSE: 3.5667371309929905
Naive Test MSE: 0.01500000000000005

Sliding Window Cross-Validation Results
Average Train MSE: 3.6487337075302486
Average Test MSE: 3.5532168755864375
Average Naive MSE: 0.023333333333333352


## Optuna training

In [None]:
import optuna

def objective(trial):
    x = trial.suggest_float('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials=100)

study.best_params  # E.g. {'x': 2.002108042}

In [None]:
import optuna
import torch

def train_mse(model, dataset):
    predictions = model(dataset['train_input'])  # Model predictions
    loss = torch.nn.functional.mse_loss(predictions, dataset['train_label'])
    return loss

def test_mse(model, dataset):
    predictions = model(dataset['test_input'])  # Model predictions
    loss = torch.nn.functional.mse_loss(predictions, dataset['test_label'])
    return loss

# Define the objective function for Optuna
def objective(trial):
    # Define the hyperparameter search space
    n_layers = trial.suggest_int('n_layers', 1, 2)  # Number of layers in the network
    layer_sizes = [trial.suggest_int(f'n_units_l{i}', 16, 64, step=16) for i in range(n_layers)]
    grid = trial.suggest_int('grid', 2, 4)          # Example parameter for KAN
    lamb = trial.suggest_float('lamb', 1e-4, 1e-2, log=True)  # Regularization rate
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)       # Learning rate
    steps = trial.suggest_int('steps', 500, 2000, step=500)   # Training steps

    # Model architecture
    width = [n_inputs] + layer_sizes + [n_outputs]

    # Initialize dataset
    dataset = dict()
    dtype = torch.get_default_dtype()
    dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
    dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
    dataset['test_input'] = torch.from_numpy(X_test.values).type(dtype).to(device)
    dataset['test_label'] = torch.from_numpy(y_test.values).type(dtype).to(device)

    # Initialize the model
    model = KAN(width=width, grid=grid, k=2, seed=42, device=device)

    # Train the model
    results = model.fit(
        dataset, 
        opt="Adam", 
        lamb=lamb, 
        lr=lr, 
        steps=steps, 
        metrics=(lambda: train_mse(model, dataset), lambda: test_mse(model, dataset))
    )

    # Retrieve the metric (e.g., test MSE) from the results
    test_mse_value = results['test_loss'][-1]
    return test_mse_value  # Minimize test MSE

# Create an Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Best parameters and results
print("Best parameters:", study.best_params)
print("Best test MSE:", study.best_value)
