In [1]:
import torch
from torchsummary import summary
from models.KANLinear_EdgeFace import EdgeFace_KANLinear

# Define the model
model = EdgeFace_KANLinear(
    in_chans=3, 
    num_classes=512,
    depths=[3, 3, 9, 3],
    dims=[48, 96, 160, 304],
    global_block=[0, 0, 0, 3],
    global_block_type=['None', 'None', 'None', 'SDTA'],
    drop_path_rate=0.1, 
    layer_scale_init_value=1e-6, 
    head_init_scale=1.0, 
    expan_ratio=4,
    kernel_sizes=[7, 7, 7, 7],
    heads=[8, 8, 8, 8],
    use_pos_embd_xca=[False, False, False, False],
    use_pos_embd_global=False,
    d2_scales=[2, 3, 4, 5],
    classifier_dropout=0.5  # Add any additional kwargs
)

# Print the model structure
print(model)



EdgeFace_KANLinear(
  (downsample_layers): ModuleList(
    (0): Sequential(
      (0): Conv2d(3, 48, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm()
    )
    (1): Sequential(
      (0): LayerNorm()
      (1): Conv2d(48, 96, kernel_size=(2, 2), stride=(2, 2))
    )
    (2): Sequential(
      (0): LayerNorm()
      (1): Conv2d(96, 160, kernel_size=(2, 2), stride=(2, 2))
    )
    (3): Sequential(
      (0): LayerNorm()
      (1): Conv2d(160, 304, kernel_size=(2, 2), stride=(2, 2))
    )
  )
  (stages): ModuleList(
    (0): Sequential(
      (0): ConvEncoder_KANLinear(
        (dwconv): Conv2d(48, 48, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=48)
        (norm): LayerNorm()
        (pwconv1): LoRaLin_KAN(
          (linear1): KANLinear(
            (base_activation): SiLU()
          )
          (linear2): KANLinear(
            (base_activation): SiLU()
          )
        )
        (act): GELU(approximate='none')
        (pwconv2): LoRaLin_KAN(
          (line

In [2]:
# Create a random tensor of shape (batch_size, 3, 112, 112) to pass through the model
input_tensor = torch.randn(1, 3, 112, 112)  # Batch size of 1

# Pass the tensor through the model
output_tensor = model(input_tensor)

# Print the output tensor shape
print(f"Output tensor shape: {output_tensor.shape}")

Output tensor shape: torch.Size([1, 512])


In [3]:
# Summary of the model
summary(model, (3, 112, 112), device="cpu")  # Use "cuda" if a GPU is available
# Calculate total parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params}")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 48, 28, 28]           2,352
         LayerNorm-2           [-1, 48, 28, 28]              96
            Conv2d-3           [-1, 48, 28, 28]           2,400
         LayerNorm-4           [-1, 28, 28, 48]              96
              SiLU-5                   [-1, 48]               0
         KANLinear-6           [-1, 28, 28, 24]               0
              SiLU-7                   [-1, 24]               0
         KANLinear-8          [-1, 28, 28, 192]               0
       LoRaLin_KAN-9          [-1, 28, 28, 192]               0
             GELU-10          [-1, 28, 28, 192]               0
             SiLU-11                  [-1, 192]               0
        KANLinear-12           [-1, 28, 28, 24]               0
             SiLU-13                   [-1, 24]               0
        KANLinear-14           [-1, 28,

In [1]:
import torch
from torchsummary import summary
from models.KANConv_EdgeFace import EdgeFace_KANConv

# Define the model
model = EdgeFace_KANConv(
    in_chans=3, 
    num_classes=512,
    depths=[3, 3, 9, 3],
    dims=[48, 96, 160, 304],
    global_block=[0, 0, 0, 3],
    global_block_type=['None', 'None', 'None', 'SDTA'],
    drop_path_rate=0.1, 
    layer_scale_init_value=1e-6, 
    head_init_scale=1.0, 
    expan_ratio=4,
    kernel_sizes=[(7, 7), (7, 7), (7, 7), (7, 7)],
    heads=[8, 8, 8, 8],
    use_pos_embd_xca=[False, False, False, False],
    use_pos_embd_global=False,
    d2_scales=[2, 3, 4, 5],
    classifier_dropout=0.5  # Add any additional kwargs
)

# Print the model structure
print(model)



EdgeFace_KANConv(
  (downsample_layers): ModuleList(
    (0): Sequential(
      (0): KAN_Convolutional_Layer(
        (convs): ModuleList(
          (0-143): 144 x KAN_Convolution(
            (conv): KANLinear(
              (base_activation): SiLU()
            )
          )
        )
      )
      (1): LayerNorm()
    )
    (1): Sequential(
      (0): LayerNorm()
      (1): KAN_Convolutional_Layer(
        (convs): ModuleList(
          (0-4607): 4608 x KAN_Convolution(
            (conv): KANLinear(
              (base_activation): SiLU()
            )
          )
        )
      )
    )
    (2): Sequential(
      (0): LayerNorm()
      (1): KAN_Convolutional_Layer(
        (convs): ModuleList(
          (0-15359): 15360 x KAN_Convolution(
            (conv): KANLinear(
              (base_activation): SiLU()
            )
          )
        )
      )
    )
    (3): Sequential(
      (0): LayerNorm()
      (1): KAN_Convolutional_Layer(
        (convs): ModuleList(
          (0-486

In [None]:
# Create a random tensor of shape (batch_size, 3, 112, 112) to pass through the model
input_tensor = torch.randn(1, 3, 112, 112)  # Batch size of 1

# Pass the tensor through the model
output_tensor = model(input_tensor)

# Print the output tensor shape
print(f"Output tensor shape: {output_tensor.shape}")