In [1]:
import numpy as np
import pandas as pd

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error 

from kan import *
import warnings

warnings.filterwarnings("ignore")

torch.set_default_dtype(torch.float64)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [10]:
def treasury_data_retrieval():
    df = pd.read_csv('../data/us_treasury_rates_large.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values(by='Date', ascending=True, inplace=True)
    df = df.reset_index(drop=True)

    return df

df = treasury_data_retrieval()

n = len(df)
h = 10

df_flat = pd.DataFrame()
for id in range(h, n):
    row = df.iloc[(id-h):(id), 1:].stack().reset_index(drop=True).to_frame().T
    df_flat = pd.concat([df_flat, row], ignore_index=True)

for id in range(1, 13):
    df_flat[f'y_{id}'] = df.iloc[h:, id].values

df_flat['Date'] = df['Date'].iloc[h:].values
# df_flat.dropna(inplace=True)
df_flat.columns = df_flat.columns.astype(str)
df_flat.set_index('Date', inplace=True)

# df_flat = df_flat.iloc[:-10]
df_flat.tail()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-02,4.7,4.67,4.6,4.44,4.34,4.31,4.27,4.3,4.36,4.43,...,4.51,4.43,4.3,4.17,4.11,4.08,4.13,4.19,4.46,4.36
2024-12-03,4.7,4.65,4.63,4.44,4.33,4.29,4.25,4.28,4.35,4.42,...,4.49,4.4,4.27,4.17,4.13,4.11,4.17,4.23,4.5,4.4
2024-12-04,4.67,4.63,4.62,4.44,4.34,4.27,4.24,4.25,4.32,4.39,...,4.47,4.38,4.23,4.13,4.09,4.07,4.13,4.19,4.45,4.35
2024-12-05,4.68,4.63,4.62,4.44,4.37,4.31,4.26,4.28,4.34,4.41,...,4.46,4.38,4.23,4.15,4.1,4.07,4.12,4.17,4.43,4.33
2024-12-06,4.72,4.67,4.63,4.45,4.39,4.34,4.3,4.3,4.36,4.43,...,4.42,4.34,4.19,4.1,4.05,4.03,4.09,4.15,4.42,4.34


In [3]:
def train_mse():
    predictions = model(dataset['train_input'])  # Model predictions
    mse = F.mse_loss(predictions, dataset['train_label'], reduction='mean')  # Compute MSE
    return mse ** 0.5  # Return scalar MSE value

def test_mse():
    predictions = model(dataset['test_input']) # Model predictions
    mse = F.mse_loss(predictions, dataset['test_label'], reduction='mean')  # Compute MSE
    return mse ** 0.5

In [None]:
import numpy as np

# Parameters for the sliding window
test_size = 10

# Store results for each fold
fold_results = {'train_mse': [], 'test_mse': []}

# Prepare data
X, y = df_flat.iloc[:, :-12], df_flat.iloc[:, -12:]
n_inputs = X.shape[1]
n_outputs = y.shape[1]

X_train, X_test = X[:-test_size], X[-test_size:]
y_train, y_test = y[:-test_size], y[-test_size:]

dataset = dict()
dtype = torch.get_default_dtype()
dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
dataset['test_input'] = torch.from_numpy(X_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)
dataset['test_label'] = torch.from_numpy(y_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)

# Initialize the model
model = KAN(width=[n_inputs, 20, n_outputs], grid=3, k=2, seed=42, device=device)

# Train the model and compute metrics
results = model.fit(dataset, opt="Adam", lamb=0.001, lr=0.001, steps=1000, metrics=(train_mse, test_mse))


feature = dataset['test_input']
output_list = list()
for id in range(1, test_size + 1):
    new = model(feature).cpu().detach().numpy().flatten()
    output_list.append(new)

    old = feature.cpu().detach().numpy().flatten()[(n_outputs):]
    feature = torch.from_numpy(np.append(old, new).reshape(1, -1)).type(dtype).to(device)
    
    
# Store the metrics
fold_results['train_mse'].append(results['train_mse'][-1])
fold_results['test_mse'].append(results['test_mse'][-1])

# Calculate average metrics across all windows
avg_train_mse = np.mean(fold_results['train_mse'])
avg_test_mse = np.mean(fold_results['test_mse'])

print("Sliding Window Cross-Validation Results")
print(f"Average Train MSE: {avg_train_mse}")
print(f"Average Test MSE: {mean_squared_error(output_list, y_test)}")

In [16]:
import numpy as np

# Parameters for the sliding window
test_size = 10
df_length = len(df_flat)

# Store results for each fold
fold_results = {'train_mse': [], 'test_mse': [], 'naive_mse': []}

for cnt in range(0, 10, 10):
    print()
    print('WINDOW SLIDING: ', cnt)

    df_window = df_flat[(df_length-cnt-250):(df_length-cnt)]
    # Prepare data
    X, y = df_window.iloc[:, :-12], df_window.iloc[:, -12:]

    # scaler = StandardScaler()
    # X = pd.DataFrame(scaler.fit_transform(X))

    n_inputs = X.shape[1]
    n_outputs = y.shape[1]

    X_train, X_test = X[:-test_size], X[-test_size:]
    y_train, y_test = y[:-test_size], y[-test_size:]

    dataset = dict()
    dtype = torch.get_default_dtype()
    dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
    dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
    dataset['test_input'] = torch.from_numpy(X_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)
    dataset['test_label'] = torch.from_numpy(y_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)

    # Initialize the model
    model = KAN(width=[n_inputs, 64, 32, n_outputs], grid=4, k=2, seed=42, device=device)

    # Train the model and compute metrics
    results = model.fit(dataset, opt="Adam", lamb=0.0001, lr=0.001, steps=1000, metrics=(train_mse, test_mse))


    feature = dataset['test_input']
    output_list = list()
    for id in range(1, test_size + 1):
        new = model(feature).cpu().detach().numpy().flatten()
        output_list.append(new)

        old = feature.cpu().detach().numpy().flatten()[(n_outputs):]
        feature = torch.from_numpy(np.append(old, new).reshape(1, -1)).type(dtype).to(device)
        
    df_naive = pd.DataFrame([y_train.iloc[-1]] * test_size, columns=y_train.columns)
        
    # Store the metrics
    train_error = results['train_mse'][-1]
    test_error = mean_squared_error(output_list, y_test, squared=False)
    naive_error = mean_squared_error(df_naive, y_test, squared=False)

    fold_results['train_mse'].append(train_error)
    fold_results['test_mse'].append(test_error)
    fold_results['naive_mse'].append(naive_error)

    # Calculate average metrics across all windows
    print(f'Fold Train MSE: {train_error}')
    print(f'Fold Test MSE: {test_error}')
    print(f'Naive Test MSE: {naive_error}')

avg_train_mse = np.mean(fold_results['train_mse'])
avg_test_mse = np.mean(fold_results['test_mse'])
avg_naive_mse = np.mean(fold_results['naive_mse'])

print()
print("Sliding Window Cross-Validation Results")
print(f"Average Train MSE: {avg_train_mse}")
print(f"Average Test MSE: {avg_test_mse}")
print(f"Average Naive MSE: {avg_naive_mse}")


WINDOW SLIDING:  0
checkpoint directory created: ./model
saving model version 0.0


| train_loss: 3.42e-02 | test_loss: 3.01e-02 | reg: 1.10e+02 | : 100%|█| 1000/1000 [03:45<00:00,  4.


saving model version 0.1
Fold Train MSE: 0.03485524219351308
Fold Test MSE: 0.16285927184746915
Naive Test MSE: 0.1503864698485291

Sliding Window Cross-Validation Results
Average Train MSE: 0.03485524219351308
Average Test MSE: 0.16285927184746915
Average Naive MSE: 0.1503864698485291


In [21]:
df_naive = pd.DataFrame([y_train.iloc[-1]] * test_size, columns=y_train.columns)
mean_squared_error(df_naive, y_test)
# df_naive

np.float64(0.02249166666666667)

In [6]:
pd.DataFrame(output_list).tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,4.794193,4.793798,4.68003,4.497216,4.341764,4.15488,4.119141,4.048332,4.084687,4.176087,4.504862,4.417732
1,4.786509,4.773281,4.673364,4.459898,4.292001,4.097328,4.065272,4.017815,4.109991,4.191843,4.513828,4.388622
2,4.802932,4.749233,4.659901,4.447083,4.281151,4.108349,4.090338,4.086611,4.107097,4.209507,4.541863,4.455868
3,4.781459,4.705803,4.639386,4.406309,4.264029,4.101513,4.065945,4.098537,4.146627,4.261815,4.578426,4.459857
4,4.796099,4.738114,4.659247,4.429075,4.273547,4.108882,4.059323,4.083745,4.138639,4.240941,4.565598,4.466382


In [7]:
y_test.tail()

Unnamed: 0_level_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-12-02,4.75,4.63,4.51,4.43,4.3,4.17,4.11,4.08,4.13,4.19,4.46,4.36
2024-12-03,4.66,4.56,4.49,4.4,4.27,4.17,4.13,4.11,4.17,4.23,4.5,4.4
2024-12-04,4.65,4.54,4.47,4.38,4.23,4.13,4.09,4.07,4.13,4.19,4.45,4.35
2024-12-05,4.59,4.53,4.46,4.38,4.23,4.15,4.1,4.07,4.12,4.17,4.43,4.33
2024-12-06,4.57,4.5,4.42,4.34,4.19,4.1,4.05,4.03,4.09,4.15,4.42,4.34


In [8]:
df_naive.tail()

Unnamed: 0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12
2024-11-29,4.76,4.69,4.58,4.42,4.3,4.13,4.1,4.05,4.1,4.18,4.45,4.36
2024-11-29,4.76,4.69,4.58,4.42,4.3,4.13,4.1,4.05,4.1,4.18,4.45,4.36
2024-11-29,4.76,4.69,4.58,4.42,4.3,4.13,4.1,4.05,4.1,4.18,4.45,4.36
2024-11-29,4.76,4.69,4.58,4.42,4.3,4.13,4.1,4.05,4.1,4.18,4.45,4.36
2024-11-29,4.76,4.69,4.58,4.42,4.3,4.13,4.1,4.05,4.1,4.18,4.45,4.36


In [88]:
help(model.fit)

Help on method fit in module kan.MultKAN:

fit(dataset, opt='LBFGS', steps=100, log=1, lamb=0.0, lamb_l1=1.0, lamb_entropy=2.0, lamb_coef=0.0, lamb_coefdiff=0.0, update_grid=True, grid_update_num=10, loss_fn=None, lr=1.0, start_grid_update_step=-1, stop_grid_update_step=50, batch=-1, metrics=None, save_fig=False, in_vars=None, out_vars=None, beta=3, save_fig_freq=1, img_folder='./video', singularity_avoiding=False, y_th=1000.0, reg_metric='edge_forward_spline_n', display_metrics=None) method of kan.MultKAN.MultKAN instance
    training

    Args:
    -----
        dataset : dic
            contains dataset['train_input'], dataset['train_label'], dataset['test_input'], dataset['test_label']
        opt : str
            "LBFGS" or "Adam"
        steps : int
            training steps
        log : int
            logging frequency
        lamb : float
            overall penalty strength
        lamb_l1 : float
            l1 penalty strength
        lamb_entropy : float
            ent