In [1]:
# Essential imports for deep learning and visualization
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms

# Data science and visualization libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Configure matplotlib for better visualization
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🚀 Using device: {device}")

if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("   Running on CPU")

print(f"\n📦 Library Versions:")
print(f"   PyTorch: {torch.__version__}")
print(f"   NumPy: {np.__version__}")
print(f"   Matplotlib: {plt.matplotlib.__version__}")

print("\n✅ Environment setup complete!")

🚀 Using device: cpu
   Running on CPU

📦 Library Versions:
   PyTorch: 2.9.0+cpu
   NumPy: 2.3.4
   Matplotlib: 3.10.7

✅ Environment setup complete!


In [15]:
class ScaledTanH(nn.Module):
    """Scaled Hyperbolic Tangent : f(a) = A * tanh(S * a)"""
    def __init__(self, A=1.7159, S=2/3):
        super(ScaledTanH, self).__init__()
        self.A = A
        self.S = S
    def forward(self, X):
        return self.A * torch.tanh(self.S * X)

In [35]:
class C3(nn.Module):
    def __init__(self, connection_table, in_maps =6, out_maps = 16, kernel_size = 5):
        super(C3, self).__init__()
        self.connection_table = torch.tensor(connection_table, dtype=torch.uint8)
        assert self.connection_table.shape == (in_maps, out_maps)

        self.convs = nn.ModuleList()
        for j in range(out_maps):
            in_ch = int(self.connection_table[:, j].sum().item())
            self.convs.append(
                nn.Conv2d(in_ch, 1, kernel_size=kernel_size, stride=1, padding=0)
            )

    def forward(self, x):
        outputs = []
        for j, conv in enumerate(self.convs):
            connected = self.connection_table[:, j].bool()
            subset = x[:, connected, :, :]
            y = conv(subset)
            outputs.append(y)
        return torch.cat(outputs, dim=1)   # → (batch, 16, 10, 10)

In [36]:
class RBFOutput(nn.Module):
    """
    Computes squared Euclidean distance from F6 activations
    to fixed prototype code vectors (one per class).
    """
    def __init__(self, num_classes=10, in_features=84, codebook=None):
        super(RBFOutput, self).__init__()
        if codebook is None:
            # Default: random ±1 code vectors
            codebook = torch.sign(torch.randn(num_classes, in_features))
        self.register_buffer("codebook", codebook)  # non-trainable

    def forward(self, x):
        # x: (batch, in_features)
        # Return squared Euclidean distances (batch, num_classes)
        dists = torch.cdist(x, self.codebook, p=2) ** 2
        return dists

In [37]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()

        #Activation after each layer
        self.activation = ScaledTanH()

        #C1
        self.C1 = nn.Conv2d(
            in_channels=1, #Grayscale input
            out_channels = 6,
            kernel_size=5,
            stride=1,
            padding=0
        )

        #S2
        self.S2_pool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.S2_weight = nn.Parameter(torch.ones(6))
        self.S2_bias = nn.Parameter(torch.zeros(6))

        self.C3_connections = torch.tensor([
            [1,0,0,0,1,1,1,0,0,1,1,1,1,0,1,1],
            [1,1,0,0,0,1,1,1,0,0,1,1,1,1,0,1],
            [1,1,1,0,0,0,1,1,1,0,0,1,0,1,1,1],
            [0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,1],
            [0,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1],
            [0,0,0,1,1,1,0,0,1,1,1,1,0,1,1,1]
        ], dtype=torch.uint8)

        #C3
        self.C3 = C3(self.C3_connections)

        #S4
        self.S4_pool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.S4_weight = nn.Parameter(torch.ones(16))
        self.S4_bias = nn.Parameter(torch.zeros(16))

        #C5
        self.C5 = nn.Conv2d(
            in_channels=16,
            out_channels=120,
            kernel_size=5,
            stride=1,
            padding=0
        )

        #F6
        self.F6 = nn.Linear(in_features=120, out_features=84)

        #Output
        self.output = RBFOutput(num_classes=10, in_features=84)

    def forward(self, X):
        #C1
        X = self.C1(X)
        X = self.activation(X)

        #S2
        X = self.S2_pool(X)
        X = self.S2_weight.view(1,-1,1,1) * X + self.S2_bias.view(1,-1,1,1)
        X = self.activation(X)

        #C3
        X = self.C3(X)
        X = self.activation(X)

        #S4
        X = self.S4_pool(X)
        X = self.S4_weight.view(1,-1,1,1) * X + self.S4_bias.view(1,-1,1,1)
        X = self.activation(X)

        #C5
        X = self.C5(X)
        X = self.activation(X)
        X = X.view(X.size(0), -1)

        #F6
        X = self.F6(X)
        X = self.activation(X)

        X = self.output(X)

        return X

    def info(self):
        total_params = sum(p.numel() for p in self.parameters())
        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        info = {
            'Total Parameters': f'{total_params:,}',
            'Trainable Parameters': f'{trainable_params:,}',
            'Model Size (MB)': f'{total_params * 4 / (1024**2):.2f}',  # Assuming float32
        }
        return info

In [38]:
model = LeNet5().to(device)
print(f'Model Architecture')
print(model)

Model Architecture
LeNet5(
  (activation): ScaledTanH()
  (C1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (S2_pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (C3): C3(
    (convs): ModuleList(
      (0-5): 6 x Conv2d(3, 1, kernel_size=(5, 5), stride=(1, 1))
      (6-14): 9 x Conv2d(4, 1, kernel_size=(5, 5), stride=(1, 1))
      (15): Conv2d(6, 1, kernel_size=(5, 5), stride=(1, 1))
    )
  )
  (S4_pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (C5): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (F6): Linear(in_features=120, out_features=84, bias=True)
  (output): RBFOutput()
)


In [39]:
model_info = model.info()
print(f"\n Model Statistics")
for key, value in model_info.items():
    print(f"    {key}: {value}")


 Model Statistics
    Total Parameters: 60,000
    Trainable Parameters: 60,000
    Model Size (MB): 0.23


In [40]:
model = LeNet5().to(device)
model.eval()

with torch.no_grad():
    # LeNet expects 32×32 grayscale inputs
    sample_input = torch.randn(32, 1, 32, 32).to(device)
    output = model(sample_input)

    print(f"   Input shape: {sample_input.shape}")
    print(f"   Output shape: {output.shape}")
    print(f"   Output range: [{output.min():.3f}, {output.max():.3f}]")

    # Output is RBF distance (smaller = closer to class center)
    # Convert to similarity scores (optional)
    similarities = torch.exp(-output)  # exp(-distance)
    probs = similarities / similarities.sum(dim=1, keepdim=True)

    print(f"   Similarity range: [{similarities.min():.3f}, {similarities.max():.3f}]")
    print(f"   Probability range: [{probs.min():.3f}, {probs.max():.3f}]")
    print(f"   Probability sum per sample: {probs.sum(dim=1)[0]:.3f} (should be ~1.0)")

    # Predicted class
    preds = probs.argmax(dim=1)
    print(f"   Predicted classes: {preds[:10].cpu().numpy()}")


   Input shape: torch.Size([32, 1, 32, 32])
   Output shape: torch.Size([32, 10])
   Output range: [81.361, 90.957]
   Similarity range: [0.000, 0.000]
   Probability range: [0.000, 0.706]
   Probability sum per sample: 1.000 (should be ~1.0)
   Predicted classes: [0 5 0 2 6 2 5 6 2 2]
