# KAN

https://arxiv.org/html/2404.19756v1#abstract

In [49]:
import numpy as np
import scipy.interpolate as interp
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.keras import Model

# Torch

In [62]:
class TorchSplineFunction(nn.Module):
    def __init__(self, knots, coeffs):
        super(TorchSplineFunction, self).__init__()
        self.knots = nn.Parameter(torch.tensor(knots, dtype=torch.float32))
        self.coeffs = nn.Parameter(torch.tensor(coeffs, dtype=torch.float32))

    def forward(self, x):
        x = torch.clamp(x, min=self.knots.min(), max=self.knots.max())
        distances = (x.unsqueeze(-1) - self.knots.unsqueeze(0)).abs()
        weights = 1.0 / (distances + 1e-8)
        weights = weights / weights.sum(dim=-1, keepdim=True)
        return (weights * self.coeffs).sum(dim=-1)

class TorchKANLayer(nn.Module):
    def __init__(self, input_dim, output_dim, num_knots):
        super(TorchKANLayer, self).__init__()
        self.splines = nn.ModuleList([TorchSplineFunction(self.initialize_knots(num_knots), 
                                                     self.initialize_coeffs(num_knots)) 
                                      for _ in range(input_dim * output_dim)])
        self.input_dim = input_dim
        self.output_dim = output_dim

    def initialize_knots(self, num_knots):
        return np.linspace(0, 1, num_knots)

    def initialize_coeffs(self, num_knots):
        return np.random.randn(num_knots)

    def forward(self, x):
        batch_size = x.size(0)
        outputs = []
        for i in range(self.output_dim):
            output = sum(self.splines[i * self.input_dim + j](x[:, j]) for j in range(self.input_dim))
            outputs.append(output)
        return torch.stack(outputs, dim=1).view(batch_size, self.output_dim)

class TorchKAN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, num_knots):
        super(TorchKAN, self).__init__()
        layers = [TorchKANLayer(input_dim, hidden_dim, num_knots)]
        for _ in range(num_layers - 2):
            layers.append(TorchKANLayer(hidden_dim, hidden_dim, num_knots))
        layers.append(TorchKANLayer(hidden_dim, output_dim, num_knots))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

In [67]:
def torch_train_model(model, train_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        
        accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}, Accuracy: {accuracy}%")

def torch_evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    accuracy = 100 * correct / total
    print(f"Test Loss: {total_loss/len(test_loader)}, Accuracy: {accuracy}%")

In [64]:
# Hyperparameters
input_dim = 4
hidden_dim = 20
output_dim = 3
num_layers = 5
num_knots = 20
num_epochs = 50
learning_rate = 0.001


iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Note: No unsqueeze for CrossEntropyLoss
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)  # Note: No unsqueeze for CrossEntropyLoss

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

model = TorchKAN(input_dim, hidden_dim, output_dim, num_layers, num_knots)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train and Evaluate
torch_train_model(model, train_loader, criterion, optimizer, num_epochs)
torch_evaluate_model(model, test_loader, criterion)

Epoch [1/50], Loss: 2.86202871799469
Epoch [2/50], Loss: 3.001336485147476
Epoch [3/50], Loss: 3.301159769296646
Epoch [4/50], Loss: 2.6966452300548553
Epoch [5/50], Loss: 3.1048040091991425
Epoch [6/50], Loss: 2.866089105606079
Epoch [7/50], Loss: 2.9012086987495422
Epoch [8/50], Loss: 2.6421312391757965
Epoch [9/50], Loss: 2.3339592814445496
Epoch [10/50], Loss: 2.4590850472450256
Epoch [11/50], Loss: 2.377817839384079
Epoch [12/50], Loss: 2.1627655178308487
Epoch [13/50], Loss: 1.9259559214115143
Epoch [14/50], Loss: 2.2630555480718613
Epoch [15/50], Loss: 2.1690630316734314
Epoch [16/50], Loss: 1.9556254893541336
Epoch [17/50], Loss: 2.002310633659363
Epoch [18/50], Loss: 1.8260233774781227
Epoch [19/50], Loss: 1.8134599179029465
Epoch [20/50], Loss: 1.6560426205396652
Epoch [21/50], Loss: 2.0070666521787643
Epoch [22/50], Loss: 1.7113595008850098
Epoch [23/50], Loss: 1.9828859716653824
Epoch [24/50], Loss: 1.9659209996461868
Epoch [25/50], Loss: 2.195171818137169
Epoch [26/50], Lo

# TensorFlow

In [60]:
class TensorFlowSplineFunction(Layer):
    def __init__(self, knots, coeffs):
        super(TensorFlowSplineFunction, self).__init__()
        self.knots = tf.Variable(knots, trainable=True, dtype=tf.float32)
        self.coeffs = tf.Variable(coeffs, trainable=True, dtype=tf.float32)

    def call(self, x):
        x = tf.clip_by_value(x, tf.reduce_min(self.knots), tf.reduce_max(self.knots))
        distances = tf.abs(x[:, tf.newaxis] - self.knots[tf.newaxis, :])
        weights = 1.0 / (distances + 1e-8)
        weights = weights / tf.reduce_sum(weights, axis=-1, keepdims=True)
        return tf.reduce_sum(weights * self.coeffs, axis=-1)

class TensorFlowKANLayer(Layer):
    def __init__(self, input_dim, output_dim, num_knots):
        super(TensorFlowKANLayer, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.splines = [
            TensorFlowSplineFunction(
                tf.linspace(0.0, 1.0, num_knots),
                tf.random.normal((num_knots,))
            )
            for _ in range(input_dim * output_dim)
        ]

    def call(self, x):
        outputs = []
        for i in range(self.output_dim):
            output = sum(self.splines[i * self.input_dim + j](x[:, j])
                         for j in range(self.input_dim))
            outputs.append(output)
        return tf.stack(outputs, axis=1)

class TensorFlowKAN(Model):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, num_knots):
        super(TensorFlowKAN, self).__init__()
        self.layers_list = [TensorFlowKANLayer(input_dim, hidden_dim, num_knots)]
        for _ in range(num_layers - 2):
            self.layers_list.append(TensorFlowKANLayer(hidden_dim, hidden_dim, num_knots))
        self.layers_list.append(TensorFlowKANLayer(hidden_dim, output_dim, num_knots))

    def call(self, x):
        for layer in self.layers_list:
            x = layer(x)
        return x

In [66]:
# Hyperparameters
input_dim = 4
hidden_dim = 20
output_dim = 3
num_layers = 3
num_knots = 5
num_epochs = 500
learning_rate = 0.05

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16).shuffle(buffer_size=100)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(16)

model = TensorFlowKAN(input_dim, hidden_dim, output_dim, num_layers, num_knots)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_dataset, epochs=num_epochs)

test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

# Multi Layer Perceptrons

In [56]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [69]:
# Hyperparameters
input_dim = 4
hidden_dim = 10
output_dim = 3
num_epochs = 500
learning_rate = 0.01

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

mlp_model = MLP(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp_model.parameters(), lr=learning_rate)

# Train and Evaluate Functions
def mlp_train_model(model, train_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

def mlp_evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        correct = 0
        total = 0
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = 100 * correct / total
    print(f"Test Loss: {total_loss/len(test_loader)}, Accuracy: {accuracy}%")

mlp_train_model(mlp_model, train_loader, criterion, optimizer, num_epochs)
mlp_evaluate_model(mlp_model, test_loader, criterion)

Epoch [1/500], Loss: 0.9599684700369835
Epoch [2/500], Loss: 0.7031642124056816
Epoch [3/500], Loss: 0.5391526818275452
Epoch [4/500], Loss: 0.4470079317688942
Epoch [5/500], Loss: 0.39584067836403847
Epoch [6/500], Loss: 0.3200357463210821
Epoch [7/500], Loss: 0.2531310822814703
Epoch [8/500], Loss: 0.18112275283783674
Epoch [9/500], Loss: 0.13655362324789166
Epoch [10/500], Loss: 0.1065359462518245
Epoch [11/500], Loss: 0.08960304176434875
Epoch [12/500], Loss: 0.0821549731772393
Epoch [13/500], Loss: 0.06753220810787752
Epoch [14/500], Loss: 0.09448243724182248
Epoch [15/500], Loss: 0.07362987729720771
Epoch [16/500], Loss: 0.0812568892724812
Epoch [17/500], Loss: 0.060002128477208316
Epoch [18/500], Loss: 0.05889482802012935
Epoch [19/500], Loss: 0.054784952313639224
Epoch [20/500], Loss: 0.05500718264374882
Epoch [21/500], Loss: 0.0723117520683445
Epoch [22/500], Loss: 0.051754513406194746
Epoch [23/500], Loss: 0.050828117033233866
Epoch [24/500], Loss: 0.06649793830001727
Epoch [