In [1]:
import matplotlib.pyplot as plt
import torch
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
import pickle
from sklearn.metrics import f1_score, accuracy_score
from kan.KAN import KAN
import itertools
from rdkit import Chem
from rdkit.Chem import MACCSkeys

## Preparing data

In [2]:
df = pd.read_csv('MACCS_melanin_classes.csv')

In [3]:
X = np.array(df.iloc[:, 0:166])
y = np.array(df['Class'])

In [4]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=10)

## Train KAN

In [5]:
dataset = {} 
dataset['train_input'] = torch.tensor(Xtrain, dtype=torch.int64) 
dataset['train_label'] = torch.tensor(ytrain.reshape(-1, 1), dtype=torch.int64) 
dataset['test_input'] = torch.tensor(Xtest, dtype=torch.int64) 
dataset['test_label'] = torch.tensor(ytest.reshape(-1, 1), dtype=torch.int64) 
 
X = dataset['train_input'] 
y = dataset['train_label']

In [9]:
model = KAN(width=[166,1,2], seed=2024)

def train_acc(): 
    return torch.mean((torch.round(model(dataset['train_input'])[:,0]) == dataset['train_label'][:,0]).float()) 
 
def test_acc(): 
    return torch.mean((torch.round(model(dataset['test_input'])[:,0]) == dataset['test_label'][:,0]).float()) 
 
results = model.train(dataset, opt="LBFGS", steps=10, metrics=(train_acc, test_acc), lamb = 0.1, lamb_entropy = 0.0) 
results['train_acc'][-1], results['test_acc'][-1]

train loss: 4.42e-01 | test loss: 4.71e-01 | reg: 5.98e+00 : 100%|██| 10/10 [00:12<00:00,  1.22s/it]


(0.6971153616905212, 0.6410256624221802)

## Hyperparameter optimization

In [19]:
k_values =  [2, 3, 5]
grid_values = [1, 5, 10]  

best_accuracy = 0
best_params = {}

for grid_val, k_val in itertools.product(grid_values, k_values):
    model = KAN(width=[166,1,2], grid=grid_val, seed=2024)
    results = model.train(dataset, opt="LBFGS", steps=10, metrics=(train_acc, test_acc), lamb =  0.1, lamb_entropy = 0.0)
    
    train_accuracy = results['train_acc'][-1]
    test_accuracy = results['test_acc'][-1]
    
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_params = {'grid': grid_val, 'k_val': k_val}

print("Best parameters found:")
print(best_params)
print("Best test accuracy:", best_accuracy)

train loss: 5.86e-01 | test loss: 5.96e-01 | reg: 7.23e+00 : 100%|██| 10/10 [00:08<00:00,  1.18it/s]
train loss: 5.86e-01 | test loss: 5.96e-01 | reg: 7.23e+00 : 100%|██| 10/10 [00:09<00:00,  1.07it/s]
train loss: 5.86e-01 | test loss: 5.96e-01 | reg: 7.23e+00 : 100%|██| 10/10 [00:08<00:00,  1.15it/s]
train loss: 3.94e-01 | test loss: 4.23e-01 | reg: 3.15e+00 : 100%|██| 10/10 [00:24<00:00,  2.45s/it]
train loss: 3.91e-01 | test loss: 4.20e-01 | reg: 3.35e+00 : 100%|██| 10/10 [00:21<00:00,  2.20s/it]
train loss: 3.94e-01 | test loss: 4.23e-01 | reg: 3.15e+00 : 100%|██| 10/10 [00:27<00:00,  2.74s/it]
train loss: 3.86e-01 | test loss: 4.21e-01 | reg: 1.82e+00 : 100%|██| 10/10 [00:24<00:00,  2.43s/it]
train loss: 3.86e-01 | test loss: 4.19e-01 | reg: 2.56e+00 : 100%|██| 10/10 [00:17<00:00,  1.75s/it]
train loss: 3.86e-01 | test loss: 4.19e-01 | reg: 2.56e+00 : 100%|██| 10/10 [00:17<00:00,  1.75s/it]

Best parameters found:
{'grid': 5, 'k_val': 2}
Best test accuracy: 0.7564102411270142





In [22]:
opt_model = KAN(width=[166,1,2], k=2, grid=5, seed=2024)

def train_acc(): 
    return torch.mean((torch.round(model(dataset['train_input'])[:,0]) == dataset['train_label'][:,0]).float()) 
 
def test_acc(): 
    return torch.mean((torch.round(model(dataset['test_input'])[:,0]) == dataset['test_label'][:,0]).float()) 
 
results = opt_model.train(dataset, opt="LBFGS", steps=10, metrics=(train_acc, test_acc), lamb = 0.1, lamb_entropy = 0.0) 
results['train_acc'][-1], results['test_acc'][-1]

train loss: 3.84e-01 | test loss: 4.14e-01 | reg: 2.88e+00 : 100%|██| 10/10 [00:16<00:00,  1.64s/it]


(0.7884615659713745, 0.7564102411270142)

## Save model

In [46]:
torch.save(model.state_dict(), 'KAN_melanin.pth')