In [1]:
import torch
from torch import nn
import torch.nn.functional as F
# from torchsummary import summary
from ptflops import get_model_complexity_info

from kan import KAN
from efficient_kan import KAN as EffKAN
from kan_convolutional import KANLinear
from kan_convolutional.KANConv import KAN_Convolutional_Layer
from torchkan.KANvolver import KANvolver as kanv

# Perceptrons

## MLP

In [5]:
class MLP(nn.Module):
    def __init__(self, hidden):
        super(MLP, self).__init__()
        # self.fc1 = nn.Linear(1 , 2)
        self.fc2 = nn.Linear(28 * 28, 784)
        self.fc3 = nn.Linear(784, hidden)
        self.fc4 = nn.Linear(hidden, 10)
    def forward(self, x):
        # x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x
    

class MLPBig(nn.Module):
    def __init__(self, hidden):
        super(MLPBig, self).__init__()
        self.fc1 = nn.Linear(28*28, 28*28)
        self.fc2 = nn.Linear(28*28, 285)
        self.fc3 = nn.Linear(285, hidden)
        self.fc4 = nn.Linear(hidden, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        # x = (x / 0.5 - 1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        x = F.log_softmax(x, dim=1)
        return x

In [6]:
model = MLP(256)
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (784,), as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

MLP(
  818.97 k, 100.000% Params, 818.97 KMac, 100.000% MACs, 
  (fc2): Linear(615.44 k, 75.148% Params, 615.44 KMac, 75.148% MACs, in_features=784, out_features=784, bias=True)
  (fc3): Linear(200.96 k, 24.538% Params, 200.96 KMac, 24.538% MACs, in_features=784, out_features=256, bias=True)
  (fc4): Linear(2.57 k, 0.314% Params, 2.57 KMac, 0.314% MACs, in_features=256, out_features=10, bias=True)
)
FLOPs: 818.97 KMac
Params: 818.97 k


In [37]:
model = MLPBig(256)
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (784,), as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

MLPBig(
  914.95 k, 100.000% Params, 914.95 KMac, 100.000% MACs, 
  (fc1): Linear(615.44 k, 67.265% Params, 615.44 KMac, 67.265% MACs, in_features=784, out_features=784, bias=True)
  (fc2): Linear(223.72 k, 24.452% Params, 223.72 KMac, 24.452% MACs, in_features=784, out_features=285, bias=True)
  (fc3): Linear(73.22 k, 8.002% Params, 73.22 KMac, 8.002% MACs, in_features=285, out_features=256, bias=True)
  (fc4): Linear(2.57 k, 0.281% Params, 2.57 KMac, 0.281% MACs, in_features=256, out_features=10, bias=True)
)
FLOPs: 914.95 KMac
Params: 914.95 k


## EfficientKAN

In [6]:
model = EffKAN([784, 128, 10], grid_size=5, spline_order=3, sp_trainable=False, sb_trainable=False)

with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (784,), as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KAN(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (layers): ModuleList(
    (0-1): 2 x KANLinear(
      0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
      (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
    )
  )
)
FLOPs: 912 Mac
Params: 813.06 k


In [7]:
model = EffKAN([784, 128, 10], grid_size=5, spline_order=3, sp_trainable=True, sb_trainable=False)

with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (784,), as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KAN(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (layers): ModuleList(
    (0-1): 2 x KANLinear(
      0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
      (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
    )
  )
)
FLOPs: 912 Mac
Params: 914.69 k


In [3]:
# KAN
class Efficient_KAN(nn.Module):
    def __init__(self, grid, device='cuda', output_size=10):
        super(Efficient_KAN, self).__init__()
        # self.layers = [input_size, 103, output_size] if dataset == datasets.MNIST \
        self.layers = [28*28, 128, output_size]
        self.model = EffKAN(self.layers, grid_size=grid, sb_trainable=True, sp_trainable=True).to(device)

    def forward(self, x):
        x = x.view(-1, self.layers[0])
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x
    
# KAN
class Efficient_KAN_Fix(nn.Module):
    def __init__(self, grid, device='cuda', output_size=10):
        super(Efficient_KAN_Fix, self).__init__()
        # self.layers = [input_size, 103, output_size] if dataset == datasets.MNIST \
        self.layers = [28*28, 128, output_size]
        self.model = EffKAN(self.layers, grid_size=grid, sb_trainable=False, sp_trainable=True).to(device)

    def forward(self, x):
        x = x.view(-1, self.layers[0])
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x

with torch.cuda.device(0):
    flops, params = get_model_complexity_info(Efficient_KAN_Fix(grid=5), (784,), as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

Efficient_KAN_Fix(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (model): KAN(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (layers): ModuleList(
      (0-1): 2 x KANLinear(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
      )
    )
  )
)
FLOPs: 912 Mac
Params: 914.69 k


## KAN

In [8]:
model = KAN([784, 128, 10], grid=5, k=3, symbolic_enabled=False, sp_trainable=False, sb_trainable=False, bias_trainable=False)

with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (784,), valid=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KAN(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (biases): ModuleList(
    (0): Linear(0, 0.000% Params, 0.0 Mac, 0.000% MACs, in_features=128, out_features=1, bias=False)
    (1): Linear(0, 0.000% Params, 0.0 Mac, 0.000% MACs, in_features=10, out_features=1, bias=False)
  )
  (act_fun): ModuleList(
    (0-1): 2 x KANLayer(
      0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
      (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
    )
  )
  (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
)
FLOPs: 101.63 KMac
Params: 813.06 k


  self.acts_scale_std.append(torch.std(postacts, dim=0))


In [9]:
model = KAN([784, 128, 10], grid=5, k=3, symbolic_enabled=False, sp_trainable=True, sb_trainable=False, bias_trainable=False)

with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (784,), valid=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KAN(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (biases): ModuleList(
    (0): Linear(0, 0.000% Params, 0.0 Mac, 0.000% MACs, in_features=128, out_features=1, bias=False)
    (1): Linear(0, 0.000% Params, 0.0 Mac, 0.000% MACs, in_features=10, out_features=1, bias=False)
  )
  (act_fun): ModuleList(
    (0-1): 2 x KANLayer(
      0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
      (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
    )
  )
  (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
)
FLOPs: 101.63 KMac
Params: 914.69 k


  self.acts_scale_std.append(torch.std(postacts, dim=0))


In [3]:
class Py_KAN_Fix(nn.Module):
    def __init__(self, grid, device='cuda'):
        super(Py_KAN_Fix, self).__init__()
        # self.layers = [input_size, 73, 10] if dataset == datasets.MNIST \
        self.layers = [28*28, 128, 10]
        self.model = KAN(self.layers, grid=grid, device=device, sb_trainable=False,
                           sp_trainable=True, bias_trainable=False, symbolic_enabled=False).to(device)

    def forward(self, x):
        x = x.view(-1, self.layers[0])
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x
    
class Py_KAN(nn.Module):
    def __init__(self, grid, device='cuda'):
        super(Py_KAN, self).__init__()
        # self.layers = [input_size, 73, 10] if dataset == datasets.MNIST \
        self.layers = [28*28, 128, 10]
        self.model = KAN(self.layers, grid=grid, device=device, sb_trainable=True,
                           sp_trainable=True, bias_trainable=True, symbolic_enabled=False).to(device)

    # def train(self, epochs=5, on_epoch_end=None, lr=1, train_loader=None):
    #     self.model.train(train_loader, test_loader, lr=lr, epochs=epochs, device=device,
    #                      metrics=MulticlassAccuracy(num_classes=10).to(device), opt="Adam",
    #                      loss_fn=nn.NLLLoss())

    def forward(self, x):
        x = x.view(-1, self.layers[0])
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x
    
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(Py_KAN_Fix(grid=5), (784,), valid=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

Py_KAN_Fix(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (model): KAN(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (biases): ModuleList(
      (0): Linear(0, 0.000% Params, 0.0 Mac, 0.000% MACs, in_features=128, out_features=1, bias=False)
      (1): Linear(0, 0.000% Params, 0.0 Mac, 0.000% MACs, in_features=10, out_features=1, bias=False)
    )
    (act_fun): ModuleList(
      (0-1): 2 x KANLayer(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
      )
    )
    (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
  )
)
FLOPs: 101.63 KMac
Params: 914.69 k


## Prova EffKAN vs KAN

In [3]:
model = KAN([50, 2], grid=5, k=3, symbolic_enabled=False, sp_trainable=False, sb_trainable=False, bias_trainable=False)

with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (50,), eval=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KAN(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (biases): ModuleList(
    (0): Linear(0, 0.000% Params, 0.0 Mac, 0.000% MACs, in_features=2, out_features=1, bias=False)
  )
  (act_fun): ModuleList(
    (0): KANLayer(
      0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
      (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
    )
  )
  (base_fun): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
)
FLOPs: 100 Mac
Params: 800


In [18]:
model = EffKAN([50, 2], grid_size=5, spline_order=3, sp_trainable=False, sb_trainable=False)

with torch.cuda.device(0):
    flops, params = get_model_complexity_info(model, (50,), as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KAN(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (layers): ModuleList(
    (0): KANLinear(
      0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
      (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
    )
  )
)
FLOPs: 50 Mac
Params: 800


# Convolutionals

## Provo CKAN vs CONV

In [30]:
# KAN_Convolutional_Layer + MLP
class CKAN(nn.Module):
    def __init__(self, device='cpu', wb_train=False, ws_train=False, ss_train=False):
        super().__init__()
        self.conv1 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            grid_size=3,
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )
        self.conv2 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            grid_size=3,
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )
        # self.bn1 = nn.BatchNorm2d(5)
        self.flat = nn.Flatten()

        self.linear1 = nn.Linear(14400, 1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flat(x)
        x = self.linear1(x)
        # x = self.bn1(x)
        return x


# Conv2d + KAN
class CONV(nn.Module):
    def __init__(self, wb_train=True, ws_train=True, ss_train=True):
        super(CONV, self).__init__()
        # Convolutional layer, assuming an input with 1 channel (grayscale image)
        # and producing 16 output channels, with a kernel size of 3x3
        self.conv1 = nn.Conv2d(1, 5, kernel_size=3, padding=1)

    def forward(self, x):
        x = self.conv1(x)

        return x

In [31]:
ckan = CKAN()
kan = CONV()

for model in [ckan, kan]:
    with torch.cuda.device(0):
        flops, params = get_model_complexity_info(model, (1, 28, 28,), as_strings=True)
        print(f"FLOPs: {flops}")
        print(f"Params: {params}")

CKAN(
  14.4 k, 96.386% Params, 14.4 KMac, 8.256% MACs, 
  (conv1): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (conv2): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (flat): Flatten(0, 0.000% Params, 0.0 Mac, 0.000% MACs, start_dim=1, end_dim=-1)
  (linear1): Linear(14.4 k, 96.386% Params, 14.4 KMac, 8.256% MACs, in_features=14400, out_features=1, bias=True)
)
FL

## CKAN_BN

In [175]:
# KAN_Convolutional_Layer + MLP
class CKAN_BN(nn.Module):
    def __init__(self, device='cpu', wb_train=True, ws_train=True, ss_train=True):
        super().__init__()
        self.conv1 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )
        self.bn1 = nn.BatchNorm2d(5)

        self.conv2 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )
        self.bn2 = nn.BatchNorm2d(25)

        self.pool1 = nn.MaxPool2d(
            kernel_size=(2, 2)
        )

        self.flat = nn.Flatten()

        self.linear1 = nn.Linear(625, 256)
        self.linear2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.pool1(x)
        
        x = self.flat(x)
        x = self.linear1(x)
        x = self.linear2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [182]:
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(CKAN_BN(wb_train=False, ss_train=False, ws_train=False), (1, 28, 28), eval=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

CKAN_BN(
  162.89 k, 99.560% Params, 182.04 KMac, 73.973% MACs, 
  (conv1): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (bn1): BatchNorm2d(10, 0.006% Params, 6.76 KMac, 2.747% MACs, 5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (bn2): BatchNorm2d(50, 0.031% Params, 6.05 KMac, 2.458% MACs

## KANC_MLP

In [97]:
# KAN_Convolutional_Layer + MLP (without Batch Norm)
class KANC_MLP(nn.Module):
    def __init__(self, device='cpu', wb_train=True, ws_train=True, ss_train=True):
        super().__init__()
        self.conv1 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )

        self.conv2 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )

        self.pool1 = nn.MaxPool2d(
            kernel_size=(2, 2)
        )

        self.flat = nn.Flatten()

        self.linear1 = nn.Linear(625, 256)
        self.linear2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        
        x = self.flat(x)
        x = self.linear1(x)
        x = self.linear2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [98]:
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(KANC_MLP(wb_train=False, ss_train=False, ws_train=False), (1, 28, 28), eval=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KANC_MLP(
  162.83 k, 99.560% Params, 169.23 KMac, 70.605% MACs, 
  (conv1): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (conv2): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (pool1): MaxPool2d(0, 0.000% Params, 6.41 KMac, 2.672% MACs, kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (flat): Flatten(0, 0.000% Params, 0.0 Mac, 0.000% MA

## NormalConvsKAN

In [179]:
# Conv2d + KAN
class NormalConvsKAN(nn.Module):
    def __init__(self, wb_train=True, ws_train=True, ss_train=True):
        super(NormalConvsKAN, self).__init__()
        # Convolutional layer, assuming an input with 1 channel (grayscale image)
        # and producing 16 output channels, with a kernel size of 3x3
        self.conv1 = nn.Conv2d(1, 5, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(5)

        self.conv2 = nn.Conv2d(5, 5, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(5)
        # self.conv3 = nn.Conv2d(5, 10, kernel_size=3, padding=1)

        # Max pooling layer
        self.maxpool = nn.MaxPool2d(kernel_size=2)

        # Flatten layer
        self.flatten = nn.Flatten()

        # KAN layer
        self.kan1 = KANLinear(
            980, # 245, # 90
            20,
            grid_size=5,
            spline_order=3,
            scale_noise=0.01,
            scale_base=1,
            scale_spline=1,
            base_activation=nn.SiLU,
            grid_eps=0.02,
            grid_range=[0, 1],
            base_w_trainable=wb_train,
            spline_w_trainable=ws_train,
            spline_s_trainable=ss_train)
        self.kan2 = KANLinear(
            20, # 245, # 90
            10,
            grid_size=5,
            spline_order=3,
            scale_noise=0.01,
            scale_base=1,
            scale_spline=1,
            base_activation=nn.SiLU,
            grid_eps=0.02,
            grid_range=[0, 1],
            base_w_trainable=wb_train,
            spline_w_trainable=ws_train,
            spline_s_trainable=ss_train)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.bn1(x)
        x = F.relu(self.conv2(x))
        x = self.bn2(x)
        x = self.maxpool(x)
        # x = F.relu(self.conv3(x))
        # x = self.maxpool(x)
        x = self.flatten(x)
        print(x.shape)
        x = self.kan1(x)
        x = self.kan2(x)
        x = F.log_softmax(x, dim=1)

        return x

In [180]:
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(NormalConvsKAN(wb_train=False, ss_train=False, ws_train=False), (1, 28, 28), eval=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

torch.Size([1, 980])
NormalConvsKAN(
  300, 0.189% Params, 239.12 KMac, 94.934% MACs, 
  (conv1): Conv2d(50, 0.032% Params, 39.2 KMac, 15.563% MACs, 1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(10, 0.006% Params, 7.84 KMac, 3.113% MACs, 5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(230, 0.145% Params, 180.32 KMac, 71.590% MACs, 5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(10, 0.006% Params, 7.84 KMac, 3.113% MACs, 5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool): MaxPool2d(0, 0.000% Params, 3.92 KMac, 1.556% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(0, 0.000% Params, 0.0 Mac, 0.000% MACs, start_dim=1, end_dim=-1)
  (kan1): KANLinear(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
  )
  (kan2): KANLinear(
    0, 0.000% Params, 0.0 Mac, 0.

## ConvNet

In [133]:
# Conv2d + MLP + (Dropout)
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()

        # self.layers = [1, 64 * 7 * 7, 256, 10]
        # self.layers = [1, 980, 256, 10]
        self.layers = [1, 980, 161, 10]

        self.conv1 = nn.Conv2d(self.layers[0], 5, kernel_size=3, padding='same')
        self.conv2 = nn.Conv2d(5, 5, kernel_size=3, padding='same')

        # self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding='same')
        # self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding='same')

        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.25)
        self.dropout3 = nn.Dropout(0.5)

        self.fc1 = nn.Linear(self.layers[1], self.layers[2])
        self.fc2 = nn.Linear(self.layers[2], self.layers[3])

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.maxpool(x)
        #x = self.dropout1(x)

        # x = F.relu(self.conv3(x))
        # x = F.relu(self.conv4(x))
        # x = self.maxpool(x)
        # #x = self.dropout2(x)

        x = torch.flatten(x, 1)
        print(x.shape)
        x = F.relu(self.fc1(x))
        #x = self.dropout3(x)
        x = self.fc2(x)

        x = F.log_softmax(x, dim=1)
        return x

In [134]:
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(ConvNet(), (1, 28, 28), eval=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

torch.Size([1, 980])
ConvNet(
  159.84 k, 100.000% Params, 383.0 KMac, 96.981% MACs, 
  (conv1): Conv2d(50, 0.031% Params, 39.2 KMac, 9.926% MACs, 1, 5, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (conv2): Conv2d(230, 0.144% Params, 180.32 KMac, 45.660% MACs, 5, 5, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (maxpool): MaxPool2d(0, 0.000% Params, 3.92 KMac, 0.993% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout1): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.25, inplace=False)
  (dropout2): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.25, inplace=False)
  (dropout3): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.5, inplace=False)
  (fc1): Linear(157.94 k, 98.811% Params, 157.94 KMac, 39.993% MACs, in_features=980, out_features=161, bias=True)
  (fc2): Linear(1.62 k, 1.014% Params, 1.62 KMac, 0.410% MACs, in_features=161, out_features=10, bias=True)
)
FLOPs: 394.92 KMac
Params: 159.84 k


## KKAN_Convolutional_Network

In [159]:
# KAN_Convolutional_Layer + KAN
class KKAN_Convolutional_Network(nn.Module):
    def __init__(self, device='cpu', wb_train=True, ws_train=True, ss_train=True):
        super().__init__()
        self.conv1 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )

        self.conv2 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )

        self.pool1 = nn.MaxPool2d(
            kernel_size=(2, 2)
        )

        self.flat = nn.Flatten()

        self.kan1 = KANLinear(
            625,
            31,
            grid_size=5,
            spline_order=3,
            scale_noise=0.01,
            scale_base=1,
            scale_spline=1,
            base_activation=nn.SiLU,
            grid_eps=0.02,
            grid_range=[0, 1],
            base_w_trainable=wb_train,
            spline_w_trainable=ws_train,
            spline_s_trainable=ss_train
        )
        self.kan2 = KANLinear(
            31,
            10,
            grid_size=5,
            spline_order=3,
            scale_noise=0.01,
            scale_base=1,
            scale_spline=1,
            base_activation=nn.SiLU,
            grid_eps=0.02,
            grid_range=[0, 1],
            base_w_trainable=wb_train,
            spline_w_trainable=ws_train,
            spline_s_trainable=ss_train
        )

    def forward(self, x):
        x = self.conv1(x)

        x = self.pool1(x)

        x = self.conv2(x)
        x = self.pool1(x)
        x = self.flat(x)

        x = self.kan1(x)
        x = self.kan2(x)
        x = F.log_softmax(x, dim=1)

        return x

In [160]:
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(KKAN_Convolutional_Network(wb_train=False, ss_train=False, ws_train=False), (1, 28, 28), eval=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KKAN_Convolutional_Network(
  0, 0.000% Params, 6.41 KMac, 9.007% MACs, 
  (conv1): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (conv2): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
  (pool1): MaxPool2d(0, 0.000% Params, 6.41 KMac, 9.007% MACs, kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (flat): Flatten(0, 0.000% Params, 0.0 Mac, 0.

In [164]:
# KAN_Convolutional_Layer + KAN
class KANvNet(nn.Module):
    def __init__(self, device='cpu', wb_train=True, ws_train=True, ss_train=True):
        super().__init__()
        self.conv1 = KAN_Convolutional_Layer(
            n_convs=5,
            kernel_size=(3, 3),
            device=device,
            base_w_trainable=wb_train,
            spline_s_trainable=ss_train,
            spline_w_trainable=ws_train
        )

        # self.conv2 = KAN_Convolutional_Layer(
        #     n_convs=5,
        #     kernel_size=(3, 3),
        #     device=device,
        #     base_w_trainable=wb_train,
        #     spline_s_trainable=ss_train,
        #     spline_w_trainable=ws_train
        # )

        # self.pool1 = nn.MaxPool2d(
        #     kernel_size=(2, 2)
        # )

        # self.flat = nn.Flatten()

        # self.kan1 = KANLinear(
        #     625,
        #     31,
        #     grid_size=5,
        #     spline_order=3,
        #     scale_noise=0.01,
        #     scale_base=1,
        #     scale_spline=1,
        #     base_activation=nn.SiLU,
        #     grid_eps=0.02,
        #     grid_range=[0, 1],
        #     base_w_trainable=wb_train,
        #     spline_w_trainable=ws_train,
        #     spline_s_trainable=ss_train
        # )
        # self.kan2 = KANLinear(
        #     31,
        #     10,
        #     grid_size=5,
        #     spline_order=3,
        #     scale_noise=0.01,
        #     scale_base=1,
        #     scale_spline=1,
        #     base_activation=nn.SiLU,
        #     grid_eps=0.02,
        #     grid_range=[0, 1],
        #     base_w_trainable=wb_train,
        #     spline_w_trainable=ws_train,
        #     spline_s_trainable=ss_train
        # )

    def forward(self, x):
        x = self.conv1(x)

        # x = self.pool1(x)

        # x = self.conv2(x)
        # x = self.pool1(x)
        # x = self.flat(x)

        # x = self.kan1(x)
        # x = self.kan2(x)
        # x = F.log_softmax(x, dim=1)

        return x

In [170]:
with torch.cuda.device(0):
    flops, params = get_model_complexity_info(KANvNet(wb_train=False, ss_train=False, ws_train=True), (1, 28, 28), eval=False, as_strings=True)
    print(f"FLOPs: {flops}")
    print(f"Params: {params}")

KANvNet(
  0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
  (conv1): KAN_Convolutional_Layer(
    0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
    (convs): ModuleList(
      (0-4): 5 x KAN_Convolution(
        0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
        (conv): KANLinear(
          0, 0.000% Params, 0.0 Mac, 0.000% MACs, 
          (base_activation): SiLU(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
        )
      )
    )
  )
)
FLOPs: 30.42 KMac
Params: 360
