In [None]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on device: {device}")

class Neuralnetwork(nn.Module):
    def __init__(self, input_channels, hidden_dim, output_dim, kernel_size=3, padding_size=1):
        super().__init__()
        # Store kernel and padding as instance variables for clarity
        self.kernel_size = kernel_size
        self.padding_size = padding_size

        # Multipliers (using the same values as in the original network)
        V1_p = 10
        Thick_stripe_p = 1
        MT_p = 1
        VIP_p = 0.4
        MST_p = 0.5
        Interstripe_p = 5
        Thin_stripe_p = 1
        LIP_p = 1
        V4_p = 4
        PIT_p = 2.5
        CIT_p = 3.5
        SevenA_p = 3.5
        AIT_p = 4.5

        # Convolutional layers with variable kernel_size and padding_size
        self.V1 = nn.Conv2d(input_channels, int(hidden_dim * V1_p), kernel_size=self.kernel_size, padding=self.padding_size)
        self.ThickStripe = nn.Conv2d(int(hidden_dim * V1_p), int(hidden_dim * Thick_stripe_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # MT takes concatenated outputs of V1 and ThickStripe
        self.MT = nn.Conv2d(int(hidden_dim * (V1_p + Thick_stripe_p)), int(hidden_dim * MT_p), kernel_size=self.kernel_size, padding=self.padding_size)
        self.VIP = nn.Conv2d(int(hidden_dim * MT_p), int(hidden_dim * VIP_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # MST takes concatenated outputs of MT and VIP
        self.MST = nn.Conv2d(int(hidden_dim * (MT_p + VIP_p)), int(hidden_dim * MST_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # Both Interstripe and ThinStripe take V1 as input
        self.Interstripe = nn.Conv2d(int(hidden_dim * V1_p), int(hidden_dim * Interstripe_p), kernel_size=self.kernel_size, padding=self.padding_size)
        self.ThinStripe = nn.Conv2d(int(hidden_dim * V1_p), int(hidden_dim * Thin_stripe_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # LIP takes MST as input
        self.LIP = nn.Conv2d(int(hidden_dim * MST_p), int(hidden_dim * LIP_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # V4 takes the concatenation of MT, Interstripe, and ThinStripe
        self.V4 = nn.Conv2d(int(hidden_dim * (MT_p + Interstripe_p + Thin_stripe_p)), int(hidden_dim * V4_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # PIT takes V4, MST, and LIP
        self.PIT = nn.Conv2d(int(hidden_dim * (V4_p + MST_p + LIP_p)), int(hidden_dim * PIT_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # CIT takes PIT and V4
        self.CIT = nn.Conv2d(int(hidden_dim * (PIT_p + V4_p)), int(hidden_dim * CIT_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # SevenA takes MST and LIP
        self.SevenA = nn.Conv2d(int(hidden_dim * (MST_p + LIP_p)), int(hidden_dim * SevenA_p), kernel_size=self.kernel_size, padding=self.padding_size)
        # AIT takes CIT and SevenA (final layer before pooling)
        self.AIT = nn.Conv2d(int(hidden_dim * (CIT_p + SevenA_p)), output_dim, kernel_size=self.kernel_size, padding=self.padding_size)

        # Global average pooling to collapse spatial dimensions before classification
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.relu = nn.ReLU()

    def forward(self, x):
        # x shape: (batch, channels, height, width)
        V1 = self.relu(self.V1(x))
        ThickStripe = self.relu(self.ThickStripe(V1))
        MT = self.relu(self.MT(torch.cat((V1, ThickStripe), dim=1)))
        VIP = self.relu(self.VIP(MT))
        MST = self.relu(self.MST(torch.cat((MT, VIP), dim=1)))
        Interstripe = self.relu(self.Interstripe(V1))
        ThinStripe = self.relu(self.ThinStripe(V1))
        LIP = self.relu(self.LIP(MST))
        V4 = self.relu(self.V4(torch.cat((MT, Interstripe, ThinStripe), dim=1)))
        PIT = self.relu(self.PIT(torch.cat((V4, MST, LIP), dim=1)))
        CIT = self.relu(self.CIT(torch.cat((PIT, V4), dim=1)))
        SevenA = self.relu(self.SevenA(torch.cat((MST, LIP), dim=1)))
        AIT = self.AIT(torch.cat((CIT, SevenA), dim=1))
        # Global average pooling: output shape (batch, output_dim, 1, 1)
        out = self.pool(AIT)
        # Flatten to (batch, output_dim)
        out = out.view(out.size(0), -1)
        return out

Training on device: cpu


100%|██████████| 9.91M/9.91M [00:00<00:00, 16.5MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 476kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.49MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.36MB/s]



Test set: Average loss: 0.3273, Accuracy: 8973/10000 (90%)


Test set: Average loss: 0.1232, Accuracy: 9627/10000 (96%)


Test set: Average loss: 0.1051, Accuracy: 9677/10000 (97%)


Test set: Average loss: 0.0841, Accuracy: 9738/10000 (97%)


Test set: Average loss: 0.0756, Accuracy: 9774/10000 (98%)

Model saved as our_conv_neural_network_mnist.pth


In [None]:
# Training and testing functions remain largely the same,
# noting that we no longer need to flatten the data since it has spatial dimensions.
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    criterion = nn.CrossEntropyLoss()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)  # Data is already in shape (batch, 1, 28, 28)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if args.dry_run:
                break

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    criterion = nn.CrossEntropyLoss(reduction='sum')
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    # Hyperparameters
    batch_size = 64
    learning_rate = 0.001
    num_epochs = 5
    input_channels = 1  # MNIST images are grayscale
    hidden_dim = 8      # Adjust based on your compute resources
    output_dim = 10     # 10 classes for MNIST

    # Define transformation and normalization for MNIST
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    # Prepare the MNIST dataset
    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Initialize the model with the variable kernel and padding sizes
    model = OurConvNeuralnetwork(input_channels, hidden_dim, output_dim, kernel_size=3, padding_size=1).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Simple args class for logging
    class Args:
        log_interval = 100
        dry_run = False
    args = Args()

    for epoch in range(1, num_epochs+1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

    torch.save(model.state_dict(), "our_conv_neural_network_mnist.pth")
    print("Model saved as our_conv_neural_network_mnist.pth")

if __name__ == "__main__":
    main()