In [1]:
!git clone https://github.com/AthanasiosDelis/faster-kan.git
!cd faster-kan && pip install .

Cloning into 'faster-kan'...
remote: Enumerating objects: 356, done.[K
remote: Counting objects: 100% (146/146), done.[K
remote: Compressing objects: 100% (86/86), done.[K
remote: Total 356 (delta 86), reused 111 (delta 58), pack-reused 210[K
Receiving objects: 100% (356/356), 955.13 KiB | 25.13 MiB/s, done.
Resolving deltas: 100% (167/167), done.
Processing /content/faster-kan
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytest>=8.2.0 (from efficient-kan==0.1.0)
  Downloading pytest-8.2.2-py3-none-any.whl (339 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m339.9/339.9 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2.3.0->efficient-kan==0.1.0)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.

In [2]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [3]:
!pip install --upgrade scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m72.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
Successfully installed scikit-learn-1.5.0


# ***Libraries :-***

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm

import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tabulate import tabulate
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, TargetEncoder, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from fasterkan import fasterkan as fkan

import warnings
warnings.filterwarnings('ignore')

# ***Preprocessing :-***

In [5]:
filePath = "/content/drive/MyDrive/mainSimulationAccessTraces.csv"
df = pd.read_csv(filePath)

# For Missing Data
df['accessedNodeType'] = df['accessedNodeType'].fillna(value='/Malicious')

# For Unexpected Data
df.loc[df.value=='twenty',"value"] = '20.0'
df.loc[df.value=='false',"value"] = '0'
df.loc[df.value=='true',"value"] = '1'
df.loc[df.value=='none',"value"] = '0'
df.loc[df.value=='0',"value"] = '0.0'
df['value'] = df['value'].fillna(value='60.0')
df = df.drop(df.index[df.value.str.contains("org.*")])
df.value = df.value.astype(float)

# Dropping TimeStamp Column
df = df.drop('timestamp', axis=1)

# Converting Label class from Categorical to Numerical class
def cat2num(x) :
  if x == 'normal' :
    return 0
  else :
    return 1

df['normality'] = df['normality'].apply(cat2num)

df.head()

Unnamed: 0,sourceID,sourceAddress,sourceType,sourceLocation,destinationServiceAddress,destinationServiceType,destinationLocation,accessedNodeAddress,accessedNodeType,operation,value,normality
0,lightcontrol2,/agent2/lightcontrol2,/lightControler,BedroomParents,/agent2/lightcontrol2,/lightControler,BedroomParents,/agent2/lightcontrol2,/lightControler,registerService,0.0,0
1,lightcontrol3,/agent3/lightcontrol3,/lightControler,Dinningroom,/agent3/lightcontrol3,/lightControler,Dinningroom,/agent3/lightcontrol3,/lightControler,registerService,0.0,0
2,lightcontrol1,/agent1/lightcontrol1,/lightControler,BedroomChildren,/agent1/lightcontrol1,/lightControler,BedroomChildren,/agent1/lightcontrol1,/lightControler,registerService,0.0,0
3,lightcontrol4,/agent4/lightcontrol4,/lightControler,Kitchen,/agent4/lightcontrol4,/lightControler,Kitchen,/agent4/lightcontrol4,/lightControler,registerService,0.0,0
4,movement4,/agent4/movement4,/movementSensor,Kitchen,/agent4/movement4,/movementSensor,Kitchen,/agent4/movement4,/movementSensor,registerService,0.0,0


# ***Data Exploration :-***

In [6]:
sourceLocationData = []

for i, j in zip(df.sourceLocation.value_counts().index, df.sourceLocation.value_counts().values):
  sourceLocationData.append([i, j])

sourceLocationData = sorted(sourceLocationData, key=lambda x: x[1], reverse=True)
print(tabulate(sourceLocationData, headers=["Source Location", "Count"], tablefmt="grid"))

+-------------------+---------+
| Source Location   |   Count |
| Garage            |   39499 |
+-------------------+---------+
| Watterroom        |   38368 |
+-------------------+---------+
| Entrance          |   38217 |
+-------------------+---------+
| Bathroom          |   28461 |
+-------------------+---------+
| Showerroom        |   28315 |
+-------------------+---------+
| Kitchen           |   18962 |
+-------------------+---------+
| Dinningroom       |   13429 |
+-------------------+---------+
| BedroomChildren   |   12836 |
+-------------------+---------+
| room_6            |   11642 |
+-------------------+---------+
| Bedroom           |   11009 |
+-------------------+---------+
| BedroomParents    |   10946 |
+-------------------+---------+
| room_8            |   10798 |
+-------------------+---------+
| room_9            |   10752 |
+-------------------+---------+
| Livingroom        |   10620 |
+-------------------+---------+
| room_2            |   10606 |
+-------

In [None]:
destinationLocationData = []

for i, j in zip(df.destinationLocation.value_counts().index, df.destinationLocation.value_counts().values):
  destinationLocationData.append([i, j])

destinationLocationData = sorted(destinationLocationData, key=lambda x: x[1], reverse=True)
print(tabulate(destinationLocationData, headers=["Destination Location", "Count"], tablefmt="grid"))

+------------------------+---------+
| Destination Location   |   Count |
| Garage                 |   57100 |
+------------------------+---------+
| Entrance               |   54448 |
+------------------------+---------+
| Watterroom             |   38524 |
+------------------------+---------+
| Kitchen                |   19437 |
+------------------------+---------+
| BedroomChildren        |   12768 |
+------------------------+---------+
| Bathroom               |   12684 |
+------------------------+---------+
| Showerroom             |   12583 |
+------------------------+---------+
| BedroomParents         |   10938 |
+------------------------+---------+
| room_9                 |   10843 |
+------------------------+---------+
| room_2                 |   10828 |
+------------------------+---------+
| room_1                 |   10805 |
+------------------------+---------+
| Livingroom             |   10781 |
+------------------------+---------+
| Dinningroom            |   10759 |
+

In [None]:
accessedNodeTypeData = []

for i, j in zip(df.accessedNodeType.value_counts().index, df.accessedNodeType.value_counts().values):
  accessedNodeTypeData.append([i, j])

accessedNodeTypeData = sorted(accessedNodeTypeData, key=lambda x: x[1], reverse=True)
print(tabulate(accessedNodeTypeData, headers=["Accessed Node Type", "Count"], tablefmt="grid"))

+----------------------+---------+
| Accessed Node Type   |   Count |
| /sensorService       |  130155 |
+----------------------+---------+
| /derived/boolean     |   94725 |
+----------------------+---------+
| /basic/number        |   90010 |
+----------------------+---------+
| /basic/text          |   42428 |
+----------------------+---------+
| /thermostat          |     313 |
+----------------------+---------+
| /Malicious           |     148 |
+----------------------+---------+
| /basic/composed      |     100 |
+----------------------+---------+
| /lightControler      |      23 |
+----------------------+---------+
| /movementSensor      |      22 |
+----------------------+---------+
| /batteryService      |       6 |
+----------------------+---------+
| /doorLockService     |       5 |
+----------------------+---------+
| /washingService      |       3 |
+----------------------+---------+
| /smartPhone          |       3 |
+----------------------+---------+


In [None]:
operationData = []

for i, j in zip(df.operation.value_counts().index, df.operation.value_counts().values):
  operationData.append([i, j])

operationData = sorted(operationData, key=lambda x: x[1], reverse=True)
print(tabulate(operationData, headers=["Accessed Node Type", "Count"], tablefmt="grid"))

+----------------------+---------+
| Accessed Node Type   |   Count |
| read                 |  248061 |
+----------------------+---------+
| write                |  109648 |
+----------------------+---------+
| lockSubtree          |     148 |
+----------------------+---------+
| registerService      |      84 |
+----------------------+---------+


In [None]:
normalityData = []

for i, j in zip(df.normality.value_counts().index, df.normality.value_counts().values):
  normalityData.append([i, j])

normalityData = sorted(normalityData, key=lambda x: x[1], reverse=True)
print(tabulate(normalityData, headers=["Normality", "Count"], tablefmt="grid"))

+-------------------------------+---------+
| Normality                     |   Count |
| normal                        |  347924 |
+-------------------------------+---------+
| anomalous(DoSattack)          |    5780 |
+-------------------------------+---------+
| anomalous(scan)               |    1547 |
+-------------------------------+---------+
| anomalous(malitiousControl)   |     889 |
+-------------------------------+---------+
| anomalous(malitiousOperation) |     805 |
+-------------------------------+---------+
| anomalous(spying)             |     532 |
+-------------------------------+---------+
| anomalous(dataProbing)        |     342 |
+-------------------------------+---------+
| anomalous(wrongSetUp)         |     122 |
+-------------------------------+---------+


# ***Kolmogorov-Arnold-Network Implementation :-***

* Kolmogorov-Arnold Representation Theorem :-
  * $f(x) = f(x_{1},...,x_{n}) = ∑_{q=1}^{2n + 1}𝚽_{q}(∑_{p=1}^{n}Φ_{q, p}(x_{p}))$
* Gausian Radial Basis Function (RBF) :-
  * Computationally efficient and easy to calculate, with significant speed-ups in forward and backward passes.
  * $b_{i}(u) = e^{(-(u - u_{i})^{2}/h)}$
* Reflextional Switch Activation Function (RSWAF) :-
  * Uses function which have reflextionary symmetry, allows us to retain performance while reducing computation time.
  * $b_{i}(u) = 1 - (tanh((u - u_{i})/h))^{2}$

In [7]:
class KANLayer(nn.Module):
  def __init__(self,
               in_features,
               out_features,
               grid_size=5,
               spline_order=3,
               scale_noise=0.1,
               scale_base=1.0,
               scale_spline=1.0,
               enable_standalone_scale_spline=True,
               base_activation=torch.nn.SiLU,
               grid_eps=0.2,
               grid_range=[-1, 1],) :

    super(KANLayer, self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.grid_size = grid_size
    self.spline_order = spline_order

    h = (grid_range[1] - grid_range[0])/grid_size

    grid = ((torch.arange(-spline_order, grid_size + spline_order + 1) * h + grid_range[0]).expand(in_features, -1).contiguous())
    self.register_buffer("grid", grid)

    self.base_weight = torch.nn.Parameter(torch.Tensor(out_features, in_features))
    self.spline_weight = torch.nn.Parameter(torch.Tensor(out_features, in_features, grid_size + spline_order))

    if enable_standalone_scale_spline:
      self.spline_scaler = torch.nn.Parameter(torch.Tensor(out_features, in_features))

    self.scale_noise = scale_noise
    self.scale_base = scale_base
    self.scale_spline = scale_spline
    self.enable_standalone_scale_spline = enable_standalone_scale_spline
    self.base_activation = base_activation()
    self.grid_eps = grid_eps

    self.reset_parameters()

  def reset_parameters(self) :
    torch.nn.init.kaiming_uniform_(self.base_weight, a=math.sqrt(5) * self.scale_base)

    with torch.no_grad() :
      noise = ((torch.rand(self.grid_size + 1, self.in_features, self.out_features)- 1 / 2)* self.scale_noise/ self.grid_size)
      self.spline_weight.data.copy_((self.scale_spline if not self.enable_standalone_scale_spline else 1.0)* self.curve2coeff(self.grid.T[self.spline_order : -self.spline_order],noise,))

      if self.enable_standalone_scale_spline :
        torch.nn.init.kaiming_uniform_(self.spline_scaler, a=math.sqrt(5) * self.scale_spline)

  def b_splines(self, x : torch.Tensor) :
    assert x.dim() == 2 and x.size(1) == self.in_features

    grid : torch.Tensor = (self.grid)
    x = x.unsqueeze(-1)
    bases = ((x >= grid[:, :-1]) & (x < grid[:, 1:])).to(x.dtype)

    for k in range(1, self.spline_order + 1) :
      bases = ((x - grid[:, : -(k + 1)])/ (grid[:, k:-1] - grid[:, : -(k + 1)])* bases[:, :, :-1]) + ((grid[:, k + 1 :] - x)/ (grid[:, k + 1 :] - grid[:, 1:(-k)])* bases[:, :, 1:])

    assert bases.size() == (x.size(0), self.in_features, self.grid_size + self.spline_order,)
    return bases.contiguous()

  def curve2coeff(self, x : torch.Tensor, y : torch.Tensor) :
    assert x.dim() == 2 and x.size(1) == self.in_features
    assert y.size() == (x.size(0), self.in_features, self.out_features)

    A = self.b_splines(x).transpose(0, 1)
    B = y.transpose(0, 1)
    solution = torch.linalg.lstsq(A, B).solution
    result = solution.permute(2, 0, 1)

    assert result.size() == (self.out_features, self.in_features, self.grid_size + self.spline_order,)
    return result.contiguous()

    @property
    def scaled_spline_weight(self):
      return self.spline_weight * (self.spline_scaler.unsqueeze(-1) if self.enable_standalone_scale_spline else 1.0)

  def forward(self, x : torch.Tensor) :
    assert x.size(-1) == self.in_features
    original_shape = x.shape
    x = x.view(-1, self.in_features)

    base_output = F.linear(self.base_activation(x), self.base_weight)
    spline_output = F.linear(self.b_splines(x).view(x.size(0), -1), self.scaled_spline_weight.view(self.out_features, -1),)
    output = base_output + spline_output

    output = output.view(*original_shape[:-1], self.out_features)
    return output

  def update_grid(self, x : torch.Tensor, margin=0.01) :
    assert x.dim() == 2 and x.size(1) == self.in_features
    batch = x.size(0)

    splines = self.b_splines(x)
    splines = splines.permute(1, 0, 2)

    orig_coeff = self.scaled_spline_weight
    orig_coeff = orig_coeff.permute(1, 2, 0)

    unreduced_spline_output = torch.bmm(splines, orig_coeff)
    unreduced_spline_output = unreduced_spline_output.permute(1, 0, 2)

    x_sorted = torch.sort(x, dim=0)[0]
    grid_adaptive = x_sorted[torch.linspace(0, batch - 1, self.grid_size + 1, dtype=torch.int64, device=x.device)]

    uniform_step = (x_sorted[-1] - x_sorted[0] + 2 * margin) / self.grid_size
    grid_uniform = (torch.arange(self.grid_size + 1, dtype=torch.float32, device=x.device).unsqueeze(1) * uniform_step + x_sorted[0] - margin)

    grid = self.grid_eps * grid_uniform + (1 - self.grid_eps) * grid_adaptive
    grid = torch.concatenate([grid[:1] - uniform_step * torch.arange(self.spline_order, 0, -1, device=x.device).unsqueeze(1), grid, grid[-1:] + uniform_step * torch.arange(1, self.spline_order + 1, device=x.device).unsqueeze(1),], dim=0,)

    self.grid.copy_(grid.T)
    self.spline_weight.data.copy_(self.curve2coeff(x, unreduced_spline_output))

  def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0) :
    l1_fake = self.spline_weight.abs().mean(-1)
    regularization_loss_activation = l1_fake.sum()
    p = l1_fake / regularization_loss_activation
    regularization_loss_entropy = -torch.sum(p * p.log())

    return (regularize_activation * regularization_loss_activation + regularize_entropy * regularization_loss_entropy)

class KAN(torch.nn.Module):
  def __init__(
      self,
      layers_hidden,
      grid_size=5,
      spline_order=3,
      scale_noise=0.1,
      scale_base=1.0,
      scale_spline=1.0,
      base_activation=torch.nn.SiLU,
      grid_eps=0.02,
      grid_range=[-1, 1],
  ) :
    super(KAN, self).__init__()
    self.grid_size = grid_size
    self.spline_order = spline_order

    self.layers = torch.nn.ModuleList()
    for in_features, out_features in zip(layers_hidden, layers_hidden[1:]) :
      self.layers.append(KANLayer(
          in_features,
          out_features,
          grid_size=grid_size,
          spline_order=spline_order,
          scale_noise=scale_noise,
          scale_base=scale_base,
          scale_spline=scale_spline,
          base_activation=base_activation,
          grid_eps=grid_eps,
          grid_range=grid_range, ))

  def forward(self, x: torch.Tensor, update_grid=False) :
    for layer in self.layers :
      if update_grid :
        layer.update_grid(x)
        x = layer(x)
    return x

  def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
    return sum(layer.regularization_loss(regularize_activation, regularize_entropy) for layer in self.layers)

In [8]:
X0 = df.iloc[:, 0].values.reshape(-1, 1)
X1 = df.iloc[:, 1].values.reshape(-1, 1)
X2 = df.iloc[:, 2].values.reshape(-1, 1)
X3 = df.iloc[:, 3].values.reshape(-1, 1)
X4 = df.iloc[:, 4].values.reshape(-1, 1)
X5 = df.iloc[:, 5].values.reshape(-1, 1)
X6 = df.iloc[:, 6].values.reshape(-1, 1)
X7 = df.iloc[:, 7].values.reshape(-1, 1)
X8 = df.iloc[:, 8].values.reshape(-1, 1)
X9 = df.iloc[:, 9].values.reshape(-1, 1)
X10 = df.iloc[:, 10].values.reshape(-1, 1)

y = df['normality'].values

X0 = TargetEncoder(target_type="binary").fit_transform(X0, y)
X1 = TargetEncoder(target_type="binary").fit_transform(X1, y)
X2 = TargetEncoder(target_type="binary").fit_transform(X2, y)
X3 = TargetEncoder(target_type="binary").fit_transform(X3, y)
X4 = TargetEncoder(target_type="binary").fit_transform(X4, y)
X5 = TargetEncoder(target_type="binary").fit_transform(X5, y)
X6 = TargetEncoder(target_type="binary").fit_transform(X6, y)
X7 = TargetEncoder(target_type="binary").fit_transform(X7, y)
X8 = TargetEncoder(target_type="binary").fit_transform(X8, y)
X9 = TargetEncoder(target_type="binary").fit_transform(X9, y)

X10 = StandardScaler().fit_transform(X10.reshape(-1, 1))

X = np.concatenate((X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10), axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

In [9]:
print("Training Data Shape : ", X_train.shape)
print("Testing Data Shape : ", X_test.shape)
print("Training Label Shape : ", y_train.shape)
print("Testing Label Shape : ", y_test.shape)

Training Data Shape :  torch.Size([286352, 11])
Testing Data Shape :  torch.Size([71589, 11])
Training Label Shape :  torch.Size([286352])
Testing Label Shape :  torch.Size([71589])


In [10]:
trainLoader = DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
valLoader = DataLoader(torch.utils.data.TensorDataset(X_test, y_test), batch_size=64, shuffle=False)

In [11]:
def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

In [12]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

***Efficient -KAN***

In [14]:
model = KAN([X_train.shape[1], 100, len(set(y_train))])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)
criterion = torch.nn.CrossEntropyLoss()

In [16]:
for name, param in model.named_parameters():
    if not param.requires_grad:
        print(f"Parameter {name} does not require grad!")
    else: print(f"Parameter {name} requires grad!")

Parameter layers.0.base_weight requires grad!
Parameter layers.0.spline_weight requires grad!
Parameter layers.0.spline_scaler requires grad!
Parameter layers.1.base_weight requires grad!
Parameter layers.1.spline_weight requires grad!
Parameter layers.1.spline_scaler requires grad!


In [17]:
# Train
epochs = 15

for epoch in range(epochs):
  model.train()
  epoch_loss = 0
  epoch_accuracy = 0
  with tqdm(trainLoader, unit="batch") as pbar:
        for i, (feats, labels) in enumerate(pbar):
            feats = feats.to(device).requires_grad_(True)
            labels = labels.to(device)
            #if not feats.requires_grad : print("Inputs do not require gradients!")

            optimizer.zero_grad()
            output = model(feats)
            #if output.grad_fn is None : print("Output does not have a grad_fn!")

            loss = criterion(output, labels)
            #if loss.grad_fn is None : print("Loss does not have a grad_fn!")

            loss.backward()
            optimizer.step()

            accuracy = (output.argmax(dim=1) == labels.to(device)).float().mean()
            epoch_loss += loss.item()
            epoch_accuracy += accuracy.item()
            pbar.set_postfix(loss=loss.item(), accuracy=accuracy.item(), lr=optimizer.param_groups[0]['lr'])

  scheduler.step()
  print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss/len(trainLoader):.4f} - Accuracy: {epoch_accuracy/len(trainLoader):.4f}")

model.eval()

val_loss = 0
val_accuracy = 0
with torch.no_grad():
    for feats, labels in valLoader:
        feats, labels = feats.to(device), labels.to(device)
        output = model(feats)
        loss = criterion(output, labels)
        accuracy = (output.argmax(dim=1) == labels).float().mean()
        val_loss += loss.item()
        val_accuracy += accuracy.item()

print(f"Validation - Loss: {val_loss/len(valLoader):.4f} - Accuracy: {val_accuracy/len(valLoader):.4f}")

100%|██████████| 4475/4475 [00:18<00:00, 243.30batch/s, accuracy=0, loss=2.38, lr=0.001]


Epoch 1/15 - Loss: 2.4296 - Accuracy: 0.0253


100%|██████████| 4475/4475 [00:19<00:00, 232.04batch/s, accuracy=0, loss=2.41, lr=0.0008]


Epoch 2/15 - Loss: 2.4296 - Accuracy: 0.0253


 93%|█████████▎| 4157/4475 [00:18<00:01, 225.65batch/s, accuracy=0.0156, loss=2.39, lr=0.00064]


KeyboardInterrupt: 

In [None]:
parameter_kan = count_parameters(model)
print(f"Total Parameters : {parameter_kan[0]}")
print(f"Trainable Parameters : {parameter_kan[1]}")

Total Parameters : 286363000
Trainable Parameters : 286363000


***Faster-KAN***

In [15]:
train_flag = True

model_ = fkan.FasterKAN([X_train.shape[1], 100, len(set(y_train))], grid_min=-1.2, grid_max=0.2, num_grids=5, exponent=2, inv_denominator=0.5, train_grid=train_flag, train_inv_denominator=train_flag).to(device)
model_.to(device)

optimizer_ = torch.optim.AdamW(model_.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler_ = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.6, patience=1, verbose=True)
criterion_ = torch.nn.CrossEntropyLoss()
earlyStopping = EarlyStopping(patience=7, min_delta=0.001)

In [16]:
epochs_ = 100
val_loss = 0.0

for epoch in range(epochs_):
  model_.train()
  with tqdm(trainLoader) as pbar:
    for i, (feats, labels) in enumerate(pbar):
      feats = feats.to(device).requires_grad_(True)
      labels = labels.to(device)

      optimizer_.zero_grad()

      output = model_(feats)

      loss = criterion_(output, labels)
      loss.backward()

      torch.nn.utils.clip_grad_norm_(model_.parameters(), max_norm=1.0)

      optimizer_.step()

      accuracy = (output.argmax(dim=1) == labels.to(device)).float().mean()
      pbar.set_postfix(loss=loss.item(), accuracy=accuracy.item(), lr=optimizer_.param_groups[0]['lr'])

  model_.eval()
  with torch.no_grad():
    val_correct = 0
    for feats, labels in valLoader:
        feats = feats.to(device)
        output = model_(feats)
        val_loss += criterion_(output, labels.to(device)).item()
        preds = output.argmax(dim=1)
        val_correct += (preds == labels.to(device)).sum().item()


  val_loss /= len(valLoader.dataset)
  val_accuracy = val_correct / len(valLoader.dataset)

  scheduler_.step(val_loss)

  print(f"Epoch {epoch + 1}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")
  print(f"Current Learning Rate: {optimizer.param_groups[0]['lr']}")

  earlyStopping(val_loss)
  if earlyStopping.early_stop:
    print("Triggering Early Stop!")
    break

100%|██████████| 4475/4475 [05:46<00:00, 12.92it/s, accuracy=1, loss=0.00233, lr=0.001]


Epoch 1, Val Loss: 0.0003442798067289987, Val Accuracy: 0.9914372319769797
Current Learning Rate: 0.001


100%|██████████| 4475/4475 [05:43<00:00, 13.01it/s, accuracy=1, loss=2.5e-5, lr=0.001]


Epoch 2, Val Loss: 0.00025768938037364655, Val Accuracy: 0.9931553730321697
Current Learning Rate: 0.001


100%|██████████| 4475/4475 [05:44<00:00, 12.97it/s, accuracy=1, loss=1.6e-5, lr=0.001]


Epoch 3, Val Loss: 0.00025329322583038, Val Accuracy: 0.9926105966000364
Current Learning Rate: 0.001


100%|██████████| 4475/4475 [05:42<00:00, 13.07it/s, accuracy=1, loss=0.00098, lr=0.001]


Epoch 4, Val Loss: 0.000269417303026877, Val Accuracy: 0.9932950592968194
Current Learning Rate: 0.001


100%|██████████| 4475/4475 [05:42<00:00, 13.07it/s, accuracy=1, loss=0.0191, lr=0.001]


Epoch 5, Val Loss: 0.00036488787682472306, Val Accuracy: 0.9901660869686684
Current Learning Rate: 0.0006


100%|██████████| 4475/4475 [05:42<00:00, 13.08it/s, accuracy=1, loss=0.000997, lr=0.001]


Epoch 6, Val Loss: 0.0003810813055150816, Val Accuracy: 0.983726550168322
Current Learning Rate: 0.0006


100%|██████████| 4475/4475 [05:43<00:00, 13.03it/s, accuracy=0.938, loss=0.372, lr=0.001]


Epoch 7, Val Loss: 0.00037272968605219436, Val Accuracy: 0.9899844948246239
Current Learning Rate: 0.00035999999999999997


100%|██████████| 4475/4475 [05:43<00:00, 13.02it/s, accuracy=0.938, loss=0.0666, lr=0.001]


Epoch 8, Val Loss: 0.000431507375765532, Val Accuracy: 0.9896911536688597
Current Learning Rate: 0.00035999999999999997
Triggering Early Stop!
