In [1]:
import numpy as np
import pandas as pd

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error 

from kan import *
import warnings

warnings.filterwarnings("ignore")

torch.set_default_dtype(torch.float64)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [2]:
def treasury_data_retrieval():
    df = pd.read_csv('../data/us_treasury_rates_large.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values(by='Date', ascending=True, inplace=True)
    df = df.reset_index(drop=True)

    return df

df = treasury_data_retrieval()

n = len(df)
h = 5

df_flat = pd.DataFrame()
for id in range(h, n):
    row = df.iloc[(id-h):(id), 1:].stack().reset_index(drop=True).to_frame().T
    df_flat = pd.concat([df_flat, row], ignore_index=True)

for id in range(1, 13):
    df_flat[f'y_{id}'] = df.iloc[h:, id].values

df_flat['Date'] = df['Date'].iloc[h:].values
# df_flat.dropna(inplace=True)
df_flat.columns = df_flat.columns.astype(str)
df_flat.set_index('Date', inplace=True)

# df_flat = df_flat.iloc[:-10]
df_flat.tail()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-02,4.72,4.67,4.63,4.46,4.42,4.37,4.32,4.3,4.35,4.41,...,4.51,4.43,4.3,4.17,4.11,4.08,4.13,4.19,4.46,4.36
2024-12-03,4.74,4.67,4.62,4.46,4.37,4.21,4.21,4.17,4.21,4.27,...,4.49,4.4,4.27,4.17,4.13,4.11,4.17,4.23,4.5,4.4
2024-12-04,4.74,4.67,4.61,4.45,4.37,4.21,4.21,4.17,4.24,4.3,...,4.47,4.38,4.23,4.13,4.09,4.07,4.13,4.19,4.45,4.35
2024-12-05,4.76,4.7,4.6,4.43,4.34,4.19,4.17,4.11,4.17,4.25,...,4.46,4.38,4.23,4.15,4.1,4.07,4.12,4.17,4.43,4.33
2024-12-06,4.76,4.69,4.58,4.42,4.3,4.13,4.1,4.05,4.1,4.18,...,4.42,4.34,4.19,4.1,4.05,4.03,4.09,4.15,4.42,4.34


In [3]:
df_ma = df.set_index('Date')
targets = df_ma.columns

for col in targets:
    for size in [5, 10]:
        df_ma[f'{col}_MA{size}'] = df_ma[col].shift(1).rolling(window=size).mean()

df_ma.dropna(inplace=True)
df_ma

Unnamed: 0_level_0,1 Mo,2 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,...,5 Yr_MA5,5 Yr_MA10,7 Yr_MA5,7 Yr_MA10,10 Yr_MA5,10 Yr_MA10,20 Yr_MA5,20 Yr_MA10,30 Yr_MA5,30 Yr_MA10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-16,2.41,2.40,2.43,2.49,2.57,2.55,2.53,2.54,2.62,2.73,...,2.542,2.517,2.616,2.587,2.724,2.694,2.910,2.870,3.054,3.013
2019-01-17,2.41,2.41,2.42,2.50,2.57,2.56,2.55,2.58,2.66,2.75,...,2.536,2.522,2.612,2.593,2.722,2.701,2.914,2.879,3.062,3.023
2019-01-18,2.40,2.40,2.41,2.50,2.60,2.62,2.60,2.62,2.70,2.79,...,2.540,2.543,2.618,2.615,2.724,2.720,2.916,2.897,3.064,3.038
2019-01-22,2.38,2.40,2.43,2.51,2.59,2.58,2.55,2.57,2.65,2.74,...,2.560,2.556,2.638,2.629,2.740,2.732,2.926,2.909,3.074,3.049
2019-01-23,2.37,2.38,2.41,2.51,2.59,2.58,2.57,2.59,2.66,2.76,...,2.568,2.560,2.648,2.634,2.746,2.736,2.926,2.914,3.074,3.056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-02,4.75,4.63,4.51,4.43,4.30,4.17,4.11,4.08,4.13,4.19,...,4.160,4.221,4.214,4.280,4.282,4.349,4.546,4.613,4.466,4.531
2024-12-03,4.66,4.56,4.49,4.40,4.27,4.17,4.13,4.11,4.17,4.23,...,4.116,4.199,4.170,4.257,4.238,4.325,4.504,4.589,4.418,4.507
2024-12-04,4.65,4.54,4.47,4.38,4.23,4.13,4.09,4.07,4.13,4.19,...,4.104,4.182,4.162,4.239,4.230,4.306,4.498,4.569,4.408,4.486
2024-12-05,4.59,4.53,4.46,4.38,4.23,4.15,4.10,4.07,4.12,4.17,...,4.084,4.164,4.140,4.220,4.208,4.286,4.476,4.548,4.382,4.464


In [4]:
def train_mse():
    predictions = model(dataset['train_input'])  # Model predictions
    mse = F.mse_loss(predictions, dataset['train_label'], reduction='mean')  # Compute MSE
    return mse ** 0.5  # Return scalar MSE value

def test_mse():
    predictions = model(dataset['test_input']) # Model predictions
    mse = F.mse_loss(predictions, dataset['test_label'], reduction='mean')  # Compute MSE
    return mse ** 0.5

In [None]:
import numpy as np

# Parameters for the sliding window
test_size = 10

# Store results for each fold
fold_results = {'train_mse': [], 'test_mse': []}

# Prepare data
X, y = df_flat.iloc[:, :-12], df_flat.iloc[:, -12:]
n_inputs = X.shape[1]
n_outputs = y.shape[1]

X_train, X_test = X[:-test_size], X[-test_size:]
y_train, y_test = y[:-test_size], y[-test_size:]

dataset = dict()
dtype = torch.get_default_dtype()
dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
dataset['test_input'] = torch.from_numpy(X_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)
dataset['test_label'] = torch.from_numpy(y_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)

# Initialize the model
model = KAN(width=[n_inputs, 20, n_outputs], grid=3, k=2, seed=42, device=device)

# Train the model and compute metrics
results = model.fit(dataset, opt="Adam", lamb=0.001, lr=0.001, steps=1000, metrics=(train_mse, test_mse))


feature = dataset['test_input']
output_list = list()
for id in range(1, test_size + 1):
    new = model(feature).cpu().detach().numpy().flatten()
    output_list.append(new)

    old = feature.cpu().detach().numpy().flatten()[(n_outputs):]
    feature = torch.from_numpy(np.append(old, new).reshape(1, -1)).type(dtype).to(device)
    
    
# Store the metrics
fold_results['train_mse'].append(results['train_mse'][-1])
fold_results['test_mse'].append(results['test_mse'][-1])

# Calculate average metrics across all windows
avg_train_mse = np.mean(fold_results['train_mse'])
avg_test_mse = np.mean(fold_results['test_mse'])

print("Sliding Window Cross-Validation Results")
print(f"Average Train MSE: {avg_train_mse}")
print(f"Average Test MSE: {mean_squared_error(output_list, y_test)}")

In [30]:
import numpy as np

# Parameters for the sliding window
test_size = 20
df_length = len(df_flat)

# Store results for each fold
fold_results = {'train_mse': [], 'test_mse': [], 'naive_mse': []}

for cnt in range(0, 20, 20):
    print()
    print('WINDOW SLIDING: ', cnt)

    df_window = df_flat[(df_length-cnt-1000):(df_length-cnt)]
    # Prepare data
    X, y = df_window.iloc[:, :-12], df_window.iloc[:, -12:]

    # scaler = StandardScaler()
    # X = pd.DataFrame(scaler.fit_transform(X))

    n_inputs = X.shape[1]
    n_outputs = y.shape[1]

    X_train, X_test = X[:-test_size], X[-test_size:]
    y_train, y_test = y[:-test_size], y[-test_size:]

    dataset = dict()
    dtype = torch.get_default_dtype()
    dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
    dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
    dataset['test_input'] = torch.from_numpy(X_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)
    dataset['test_label'] = torch.from_numpy(y_test.iloc[0, :].values.reshape(1, -1)).type(dtype).to(device)

    # Initialize the model
    model = KAN(width=[n_inputs, 48, 64, n_outputs], grid=4, k=2, seed=42, device=device)

    # Train the model and compute metrics
    results = model.fit(dataset, opt="Adam", lamb=0.0001, lr=0.001, steps=1000, metrics=(train_mse, test_mse))


    feature = dataset['test_input']
    output_list = list()
    for id in range(1, test_size + 1):
        new = model(feature).cpu().detach().numpy().flatten()
        output_list.append(new)

        old = feature.cpu().detach().numpy().flatten()[(n_outputs):]
        feature = torch.from_numpy(np.append(old, new).reshape(1, -1)).type(dtype).to(device)
        
    df_naive = pd.DataFrame([y_train.iloc[-1]] * test_size, columns=y_train.columns)
        
    # Store the metrics
    train_error = results['train_mse'][-1]
    test_error = mean_squared_error(output_list, y_test, squared=False)
    naive_error = mean_squared_error(df_naive, y_test, squared=False)

    fold_results['train_mse'].append(train_error)
    fold_results['test_mse'].append(test_error)
    fold_results['naive_mse'].append(naive_error)

    # Calculate average metrics across all windows
    print(f'Fold Train MSE: {train_error}')
    print(f'Fold Test MSE: {test_error}')
    print(f'Naive Test MSE: {naive_error}')

avg_train_mse = np.mean(fold_results['train_mse'])
avg_test_mse = np.mean(fold_results['test_mse'])
avg_naive_mse = np.mean(fold_results['naive_mse'])

print()
print("Sliding Window Cross-Validation Results")
print(f"Average Train MSE: {avg_train_mse}")
print(f"Average Test MSE: {avg_test_mse}")
print(f"Average Naive MSE: {avg_naive_mse}")


WINDOW SLIDING:  0
checkpoint directory created: ./model
saving model version 0.0


| train_loss: 6.74e-02 | test_loss: 5.32e-02 | reg: 2.83e+02 | : 100%|█| 1000/1000 [09:20<00:00,  1.


saving model version 0.1
Fold Train MSE: 0.06757737575083289
Fold Test MSE: 0.1515845604083666
Naive Test MSE: 0.10253294753385823

Sliding Window Cross-Validation Results
Average Train MSE: 0.06757737575083289
Average Test MSE: 0.1515845604083666
Average Naive MSE: 0.10253294753385823


In [None]:
import numpy as np

# Parameters for the sliding window
test_size = 1
df_length = len(df_ma)

# Store results for each fold
fold_results = {'train_mse': [], 'test_mse': [], 'naive_mse': []}

for cnt in range(0, 20, 5):
    print()
    print('WINDOW SLIDING: ', cnt)

    df_window = df_ma[(df_length-cnt-120):(df_length-cnt)]
    # Prepare data
    X, y = df_window.drop(columns=targets), df_window[targets]

    # scaler = StandardScaler()
    # X = pd.DataFrame(scaler.fit_transform(X))

    n_inputs = X.shape[1]
    n_outputs = y.shape[1]

    X_train, X_test = X[:-test_size], X[-test_size:]
    y_train, y_test = y[:-test_size], y[-test_size:]

    dataset = dict()
    dtype = torch.get_default_dtype()
    dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype).to(device)
    dataset['train_label'] = torch.from_numpy(y_train.values).type(dtype).to(device)
    dataset['test_input'] = torch.from_numpy(X_test.values).type(dtype).to(device)
    dataset['test_label'] = torch.from_numpy(y_test.values).type(dtype).to(device)

    # Initialize the model
    model = KAN(width=[n_inputs, 32, n_outputs], grid=3, k=2, seed=42, device=device)

    # Train the model and compute metrics
    results = model.fit(dataset, opt="Adam", lamb=0.001, lr=0.001, steps=1500, metrics=(train_mse, test_mse))

    # feature = dataset['test_input']
    # output_list = list()
    # for id in range(1, test_size + 1):
    #     new = model(feature).cpu().detach().numpy().flatten()
    #     output_list.append(new)

    #     old = feature.cpu().detach().numpy().flatten()[(n_outputs):]
    #     feature = torch.from_numpy(np.append(old, new).reshape(1, -1)).type(dtype).to(device)
    
    df_naive = pd.DataFrame([y_train.iloc[-1]] * test_size, columns=y_train.columns)
        
    # Store the metrics
    train_error = results['train_mse'][-1]
    test_error = results['test_mse'][-1]
    naive_error = mean_squared_error(df_naive, y_test, squared=False)

    fold_results['train_mse'].append(train_error)
    fold_results['test_mse'].append(test_error)
    fold_results['naive_mse'].append(naive_error)

    # Calculate average metrics across all windows
    print(f'Fold Train MSE: {train_error}')
    print(f'Fold Test MSE: {test_error}')
    print(f'Naive Test MSE: {naive_error}')

avg_train_mse = np.mean(fold_results['train_mse'])
avg_test_mse = np.mean(fold_results['test_mse'])
avg_naive_mse = np.mean(fold_results['naive_mse'])

print()
print("Sliding Window Cross-Validation Results")
print(f"Average Train MSE: {avg_train_mse}")
print(f"Average Test MSE: {avg_test_mse}")
print(f"Average Naive MSE: {avg_naive_mse}")

In [21]:
df_naive = pd.DataFrame([y_train.iloc[-1]] * test_size, columns=y_train.columns)
mean_squared_error(df_naive, y_test)
# df_naive

np.float64(0.02249166666666667)

In [31]:
pd.DataFrame(output_list).tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
15,4.646886,4.680236,4.636659,4.515665,4.320821,4.039805,3.971619,3.877077,3.89289,3.955074,4.273912,4.153266
16,4.659703,4.69358,4.653065,4.523063,4.308589,4.015352,3.940596,3.847665,3.867639,3.930352,4.253082,4.13564
17,4.673981,4.708522,4.671241,4.531381,4.295849,3.990843,3.908433,3.817734,3.842965,3.906796,4.233208,4.119601
18,4.688969,4.724609,4.690947,4.540435,4.282377,3.966684,3.87573,3.788116,3.819424,3.884815,4.214976,4.105567
19,4.703806,4.741495,4.712026,4.550225,4.268009,3.94308,3.843262,3.759596,3.797617,3.864783,4.199188,4.094111


In [32]:
y_test.tail()

Unnamed: 0_level_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-12-02,4.75,4.63,4.51,4.43,4.3,4.17,4.11,4.08,4.13,4.19,4.46,4.36
2024-12-03,4.66,4.56,4.49,4.4,4.27,4.17,4.13,4.11,4.17,4.23,4.5,4.4
2024-12-04,4.65,4.54,4.47,4.38,4.23,4.13,4.09,4.07,4.13,4.19,4.45,4.35
2024-12-05,4.59,4.53,4.46,4.38,4.23,4.15,4.1,4.07,4.12,4.17,4.43,4.33
2024-12-06,4.57,4.5,4.42,4.34,4.19,4.1,4.05,4.03,4.09,4.15,4.42,4.34


In [33]:
df_naive.tail()

Unnamed: 0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12
2024-11-06,4.68,4.71,4.64,4.41,4.31,4.27,4.2,4.27,4.37,4.42,4.71,4.6
2024-11-06,4.68,4.71,4.64,4.41,4.31,4.27,4.2,4.27,4.37,4.42,4.71,4.6
2024-11-06,4.68,4.71,4.64,4.41,4.31,4.27,4.2,4.27,4.37,4.42,4.71,4.6
2024-11-06,4.68,4.71,4.64,4.41,4.31,4.27,4.2,4.27,4.37,4.42,4.71,4.6
2024-11-06,4.68,4.71,4.64,4.41,4.31,4.27,4.2,4.27,4.37,4.42,4.71,4.6


In [88]:
help(model.fit)

Help on method fit in module kan.MultKAN:

fit(dataset, opt='LBFGS', steps=100, log=1, lamb=0.0, lamb_l1=1.0, lamb_entropy=2.0, lamb_coef=0.0, lamb_coefdiff=0.0, update_grid=True, grid_update_num=10, loss_fn=None, lr=1.0, start_grid_update_step=-1, stop_grid_update_step=50, batch=-1, metrics=None, save_fig=False, in_vars=None, out_vars=None, beta=3, save_fig_freq=1, img_folder='./video', singularity_avoiding=False, y_th=1000.0, reg_metric='edge_forward_spline_n', display_metrics=None) method of kan.MultKAN.MultKAN instance
    training

    Args:
    -----
        dataset : dic
            contains dataset['train_input'], dataset['train_label'], dataset['test_input'], dataset['test_label']
        opt : str
            "LBFGS" or "Adam"
        steps : int
            training steps
        log : int
            logging frequency
        lamb : float
            overall penalty strength
        lamb_l1 : float
            l1 penalty strength
        lamb_entropy : float
            ent