In [1]:
from kan import *
import pandas as pd
import numpy as np
import torch
from gridsearch import gridsearch

In [2]:
import os
os.chdir("..")
os.getcwd()

'd:\\Documents\\GitHub\\crux-inductions\\Task 1'

In [3]:
# Load the iris dataset and prepare it for KAN model

df = pd.read_csv("Dataset/iris.csv")
df["variety"].replace(["Setosa", "Versicolor", "Virginica"], [0., 1., 2.], inplace=True) #replace the categorical labels with numbers
df = df.sample(frac=1, random_state=0).reset_index(drop=True) #shuffle the dataset

X = np.array(df.iloc[:, :-1].values)
Y = np.array(df.iloc[:, -1].values)
X = torch.from_numpy(X).float() # KAN needs its data in torch.tensor dtype
Y = torch.from_numpy(Y).type(torch.LongTensor) # CrossEntropyLoss needs the labels to be in integer dtype

In [4]:
train_split = 0.85

X_train = X[:int(len(X)*train_split)]
Y_train = Y[:int(len(Y)*train_split)]
X_test = X[int(len(X)*train_split):]
Y_test = Y[int(len(Y)*train_split):]

dataset = {}
dataset["train_input"] = X_train
dataset["test_input"] = X_test
dataset["train_label"] = Y_train
dataset["test_label"] = Y_test

In [5]:
"""
KAN model with 3 hidden layers of width 4, 5, and 3 respectively, grid size of 3, and k=3
"""
model = KAN(width=[4,5,3], grid=3, k=3, seed=0)

In [6]:
"""
define training and testing accuracy functions which will be used as metrics for KAN.train() method
"""

def train_acc():
    return torch.mean((torch.argmax(model(dataset['train_input']), dim=1) == dataset['train_label']).float())

def test_acc():
    return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == dataset['test_label']).float())

# train the model
results = model.train(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc), loss_fn=torch.nn.CrossEntropyLoss());

  from .autonotebook import tqdm as notebook_tqdm
train loss: 2.27e-04 | test loss: 5.52e+00 | reg: 4.91e+02 : 100%|██| 20/20 [00:12<00:00,  1.64it/s]


In [7]:
#print results
print(f"Train Accuracy: {results['train_acc'][-1]}")
print(f"Test Accuracy: {results['test_acc'][-1]}")

Train Accuracy: 1.0
Test Accuracy: 0.9130434989929199


In [16]:
"""
Pykan also allows us to see the symbolic formula of the model. In this case we have 3 formulas each corresponding to a class.
Each formula has 4 variables which correspond to our 4 features in the iris dataset. Plugging in the values of these variables will 
give us the probability of the input belonging to that class.

Note that the formulas generated are not the same as the ones used to generate the data. (ideal function) 
They are just and approximation of the function that the model has learned and is true upto some error threshold given by 
the Kolmogorov-Arnold representation theorem.
"""

symbols = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','abs'] #symbols that can be used in the formula
model.auto_symbolic(lib=symbols) #generate the symbolic formula


formula1, formula2, formula3 = model.symbolic_formula()[0]
print(f"Formula 1 :{formula1}\n\nFormula 2:{formula2}\n\nFormula 3:{formula3}")

fixing (0,0,0) with log, r2=0.9991572499275208
fixing (0,0,1) with x^3, r2=0.8089573979377747
fixing (0,0,2) with sin, r2=0.998892068862915
fixing (0,0,3) with sin, r2=0.9976062178611755
fixing (0,0,4) with sin, r2=0.9994670748710632
fixing (0,1,0) with sin, r2=0.9999797344207764
fixing (0,1,1) with sin, r2=1.0000001192092896
fixing (0,1,2) with sin, r2=0.9999666810035706
fixing (0,1,3) with exp, r2=0.9998781681060791
fixing (0,1,4) with sin, r2=0.9995633363723755
fixing (0,2,0) with sin, r2=0.9999454021453857
fixing (0,2,1) with sin, r2=0.9999930262565613
fixing (0,2,2) with sqrt, r2=0.9999890923500061
fixing (0,2,3) with sin, r2=0.999992311000824
fixing (0,2,4) with x^2, r2=0.9999874830245972
fixing (0,3,0) with tanh, r2=0.9998378753662109
fixing (0,3,1) with sin, r2=0.9999805092811584
fixing (0,3,2) with tanh, r2=0.9999189972877502
fixing (0,3,3) with tanh, r2=0.9999874234199524
fixing (0,3,4) with sin, r2=0.9999833106994629
fixing (1,0,0) with x^2, r2=0.9976719617843628
fixing (1,0

In [None]:
# performing grid search on pykan model
# Load the iris dataset and prepare it for KAN model

params = {'width': [[4,3], [4,10,3]], 'grid': [3, 10], 'k': [3, 4]}

result = gridsearch(df, params, classifier=KAN)

best_params = result['params'][np.argmax(result['mean_test_score'])]
best_accuracy = np.max(result['mean_test_score'])


Training model with width: [4, 3], grid: 3, k: 3


  from .autonotebook import tqdm as notebook_tqdm
train loss: 4.07e-04 | test loss: 2.13e+00 | reg: 2.49e+03 : 100%|██| 20/20 [00:04<00:00,  4.07it/s]
train loss: 7.72e-05 | test loss: 2.36e+00 | reg: 2.57e+03 : 100%|██| 20/20 [00:03<00:00,  5.83it/s]
train loss: 4.46e-05 | test loss: 0.00e+00 | reg: 2.49e+03 : 100%|██| 20/20 [00:03<00:00,  5.20it/s]
train loss: 4.64e-04 | test loss: 0.00e+00 | reg: 2.41e+03 : 100%|██| 20/20 [00:01<00:00, 10.67it/s]
train loss: 8.34e-05 | test loss: 1.26e-04 | reg: 2.38e+03 : 100%|██| 20/20 [00:01<00:00, 10.12it/s]


Training model with width: [4, 3], grid: 3, k: 4


train loss: 2.75e-04 | test loss: 0.00e+00 | reg: 8.22e+02 : 100%|██| 20/20 [00:04<00:00,  4.63it/s]
train loss: 2.11e-04 | test loss: 7.35e-01 | reg: 8.40e+02 : 100%|██| 20/20 [00:02<00:00,  7.74it/s]
train loss: 1.84e-04 | test loss: 0.00e+00 | reg: 8.32e+02 : 100%|██| 20/20 [00:03<00:00,  5.91it/s]
train loss: 2.50e-04 | test loss: 1.26e-04 | reg: 8.37e+02 : 100%|██| 20/20 [00:03<00:00,  6.66it/s]
train loss: 4.46e-05 | test loss: 3.13e-02 | reg: 8.25e+02 : 100%|██| 20/20 [00:05<00:00,  3.67it/s]


Training model with width: [4, 3], grid: 10, k: 3


train loss: 3.15e-05 | test loss: 0.00e+00 | reg: 2.36e+02 : 100%|██| 20/20 [00:03<00:00,  5.05it/s]
train loss: 0.00e+00 | test loss: 6.30e-05 | reg: 2.46e+02 : 100%|██| 20/20 [00:03<00:00,  5.64it/s]
train loss: 0.00e+00 | test loss: 0.00e+00 | reg: 2.40e+02 : 100%|██| 20/20 [00:01<00:00, 12.13it/s]
train loss: 5.46e-05 | test loss: 0.00e+00 | reg: 2.38e+02 : 100%|██| 20/20 [00:03<00:00,  5.62it/s]
train loss: 5.46e-05 | test loss: 6.30e-05 | reg: 2.33e+02 : 100%|██| 20/20 [00:04<00:00,  4.07it/s]


Training model with width: [4, 3], grid: 10, k: 4


train loss: 4.46e-05 | test loss: 0.00e+00 | reg: 2.14e+02 : 100%|██| 20/20 [00:04<00:00,  4.41it/s]
train loss: 0.00e+00 | test loss: 1.80e+00 | reg: 2.24e+02 : 100%|██| 20/20 [00:01<00:00, 16.23it/s]
train loss: 0.00e+00 | test loss: 0.00e+00 | reg: 2.29e+02 : 100%|██| 20/20 [00:04<00:00,  4.17it/s]
train loss: 0.00e+00 | test loss: 2.37e+00 | reg: 2.27e+02 : 100%|██| 20/20 [00:03<00:00,  5.45it/s]
train loss: 0.00e+00 | test loss: 9.48e-03 | reg: 2.24e+02 : 100%|██| 20/20 [00:03<00:00,  6.33it/s]


Training model with width: [4, 10, 3], grid: 3, k: 3


train loss: 3.92e-01 | test loss: 0.00e+00 | reg: 2.84e+02 : 100%|██| 20/20 [00:08<00:00,  2.39it/s]
train loss: 2.69e-04 | test loss: 1.12e+00 | reg: 2.94e+02 : 100%|██| 20/20 [00:06<00:00,  3.19it/s]
train loss: 5.76e-04 | test loss: 0.00e+00 | reg: 2.81e+02 : 100%|██| 20/20 [00:13<00:00,  1.49it/s]
train loss: 1.22e-04 | test loss: 3.38e-02 | reg: 2.83e+02 : 100%|██| 20/20 [00:11<00:00,  1.76it/s]
train loss: 1.09e-04 | test loss: 1.83e-03 | reg: 2.78e+02 : 100%|██| 20/20 [00:13<00:00,  1.44it/s]


Training model with width: [4, 10, 3], grid: 3, k: 4


train loss: 1.51e-04 | test loss: 0.00e+00 | reg: 2.08e+02 : 100%|██| 20/20 [00:19<00:00,  1.04it/s]
train loss: 0.00e+00 | test loss: 4.77e-01 | reg: 2.06e+02 : 100%|██| 20/20 [00:09<00:00,  2.06it/s]
train loss: 5.46e-05 | test loss: 8.17e-01 | reg: 2.14e+02 : 100%|██| 20/20 [00:11<00:00,  1.70it/s]
train loss: 9.97e-05 | test loss: 0.00e+00 | reg: 2.12e+02 : 100%|██| 20/20 [00:09<00:00,  2.08it/s]
train loss: 9.97e-05 | test loss: 2.60e-04 | reg: 2.09e+02 : 100%|██| 20/20 [00:09<00:00,  2.08it/s]


Training model with width: [4, 10, 3], grid: 10, k: 3


train loss: 2.32e-04 | test loss: 0.00e+00 | reg: 4.21e+02 : 100%|██| 20/20 [00:14<00:00,  1.41it/s]
train loss: 0.00e+00 | test loss: 8.91e-05 | reg: 4.36e+02 : 100%|██| 20/20 [00:03<00:00,  6.31it/s]
train loss: 0.00e+00 | test loss: 0.00e+00 | reg: 4.29e+02 : 100%|██| 20/20 [00:04<00:00,  4.31it/s]
train loss: 5.46e-05 | test loss: 0.00e+00 | reg: 4.27e+02 : 100%|██| 20/20 [00:05<00:00,  3.47it/s]
train loss: 0.00e+00 | test loss: 0.00e+00 | reg: 4.17e+02 : 100%|██| 20/20 [00:10<00:00,  1.83it/s]


Training model with width: [4, 10, 3], grid: 10, k: 4


train loss: 1.89e-03 | test loss: 0.00e+00 | reg: 2.30e+02 : 100%|██| 20/20 [00:10<00:00,  1.92it/s]
train loss: 4.46e-05 | test loss: 1.77e-01 | reg: 2.39e+02 : 100%|██| 20/20 [00:11<00:00,  1.79it/s]
train loss: 1.26e-04 | test loss: 2.29e+00 | reg: 2.35e+02 : 100%|██| 20/20 [00:13<00:00,  1.46it/s]
train loss: 1.05e-04 | test loss: 1.37e+00 | reg: 2.33e+02 : 100%|██| 20/20 [00:11<00:00,  1.78it/s]
train loss: 3.15e-05 | test loss: 6.23e-01 | reg: 2.30e+02 : 100%|██| 20/20 [00:13<00:00,  1.54it/s]


In [None]:
print(result)

{'params': [([4, 3], 3, 3), ([4, 3], 3, 4), ([4, 3], 10, 3), ([4, 3], 10, 4), ([4, 10, 3], 3, 3), ([4, 10, 3], 3, 4), ([4, 10, 3], 10, 3), ([4, 10, 3], 10, 4)], 'mean_train_score': [1.0, 1.0, 1.0, 1.0, 0.996666669845581, 1.0, 1.0, 1.0], 'mean_test_score': [0.9733333230018616, 0.9933333277702332, 1.0, 0.9866666555404663, 0.9866666674613953, 0.9866666555404663, 1.0, 0.9666666507720947]}


In [None]:
print("Best parameters: ", best_params)

Best parameters:  ([4, 3], 10, 3)
