In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from tqdm.notebook import tqdm
import numpy as np

## Basic CNN

In [3]:
rand = torch.rand(4, 3, 128, 128)

conv = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=3, stride=1, padding=0)

conv(rand).shape


torch.Size([4, 1, 126, 126])

In [None]:
rand = torch.rand(3, 128, 128)

conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7, stride=2, padding=1)
conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
output = conv2(conv1(rand))

print("Stacked Convolution Shape:", output.shape)

Stacked Convolution Shape: torch.Size([64, 62, 62])


## Custom Convolutional Neural Network

In [9]:
class MyConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(MyConv2d, self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.linear = nn.Linear(in_channels * kernel_size * kernel_size, out_channels)

    def forward(self, x):
        batch_size, channels, height, width = x.size()
        assert channels == self.in_channels

        # Calculate output dimensions once
        out_height = (height - self.kernel_size + 2 * self.padding) // self.stride + 1
        out_width = (width - self.kernel_size + 2 * self.padding) // self.stride + 1

        # Use unfold to extract patches
        patched = nn.functional.unfold(x, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)
        _, kernel_param_size, num_patches = patched.shape

        # Reshape for linear operation more efficiently
        patched = patched.transpose(1, 2).reshape(-1, self.in_channels * self.kernel_size * self.kernel_size)

        # Apply linear transformation
        convolved = self.linear(patched)

        # Reshape back to proper output format
        convolved = convolved.reshape(batch_size, num_patches, -1)
        convolved = convolved.transpose(1, 2).reshape(batch_size, self.out_channels, out_height, out_width)
        return convolved


rand = torch.randn(4, 3, 128, 128)
conv = MyConv2d(3, 64, 7)
conv(rand).shape

torch.Size([4, 64, 122, 122])

## Pooling

In [10]:
rand = torch.randn(4, 3, 128, 128)

avg_pool = nn.AvgPool2d(kernel_size=3)
avg_pool_2 = nn.AvgPool2d(kernel_size=3, stride=2)

out = avg_pool(rand)
out_2 = avg_pool_2(rand)

print(out.shape, out_2.shape)

torch.Size([4, 3, 42, 42]) torch.Size([4, 3, 63, 63])


In [11]:
adative_pooling = torch.nn.AdaptiveAvgPool2d((64, 64))
print(adative_pooling(rand).shape)

torch.Size([4, 3, 64, 64])


## Alexnet

In [13]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=2, dropout=0.5):
        super(AlexNet, self).__init__()

        self.num_classes = num_classes
        self.dropout = dropout

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(192),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(384),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.BatchNorm2d(256),
        )

        self.avgpool = nn.AdaptiveAvgPool2d((2, 2))
        self.classifier = nn.Sequential(
            nn.Dropout(self.dropout),
            nn.Linear(256 * 2 * 2, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(self.dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, self.num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


model = AlexNet()

In [15]:
DEVICE = "mps:0"
path_to_data = "./catsanddogs/PetImages/"

normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

train_transform = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalizer,
    ]
)

dataset = ImageFolder(path_to_data, transform=train_transform)

train_samples, test_samples = int(0.9 * len(dataset)), len(dataset) - int(0.9 * len(dataset))
train_dataset, val_dataset = torch.utils.data.random_split(dataset, lengths=[train_samples, test_samples])

In [16]:
model = model.to(DEVICE)

EPOCHS = 5
BATCH_SIZE = 128
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [17]:
def train(model, epochs, optimizer, loss_fn, train_loader, val_loader):
    log_training = {
        "epoch": [],
        "training_loss": [],
        "validation_loss": [],
        "training_acc": [],
        "validation_acc": [],
    }

    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")

        train_losses, train_accuracies = [], []
        val_losses, val_accuracies = [], []

        model.train()
        for image, label in tqdm(train_loader, desc="Training"):
            image, label = image.to(DEVICE), label.to(DEVICE)

            output = model(image)
            loss = loss_fn(output, label)
            train_losses.append(loss.item())

            # Compute accuracy
            predictions = torch.argmax(output, axis=-1)
            accuracy = (predictions == label).float().mean()
            train_accuracies.append(accuracy.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        model.eval()
        for image, label in tqdm(val_loader, desc="Validation"):
            image, label = image.to(DEVICE), label.to(DEVICE)

            with torch.no_grad():
                output = model(image)
                loss = loss_fn(output, label)
                val_losses.append(loss.item())

                # Compute accuracy
                predictions = torch.argmax(output, axis=-1)
                accuracy = (predictions == label).float().mean()
                val_accuracies.append(accuracy.item())

        training_loss_mean, training_acc_mean = np.mean(train_losses), np.mean(train_accuracies)
        valid_loss_mean, valid_acc_mean = np.mean(val_losses), np.mean(val_accuracies)

        log_training["epoch"].append(epoch)
        log_training["training_loss"].append(training_loss_mean)
        log_training["training_acc"].append(training_acc_mean)
        log_training["validation_loss"].append(valid_loss_mean)
        log_training["validation_acc"].append(valid_acc_mean)

        print("Training Loss:", training_loss_mean)
        print("Training Acc:", training_acc_mean)
        print("Validation Loss:", valid_loss_mean)
        print("Validation Acc:", valid_acc_mean)

    return log_training, model

In [18]:
random_init_log, model = train(model, EPOCHS, optimizer, loss_fn, train_loader, val_loader)

Epoch 1/5


Training:   0%|          | 0/176 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]



Training Loss: 0.6230423291298476
Training Acc: 0.6901651641184633
Validation Loss: 0.49977565109729766
Validation Acc: 0.7576056987047195
Epoch 2/5


Training:   0%|          | 0/176 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Training Loss: 0.4218318970365958
Training Acc: 0.8063652308827097
Validation Loss: 0.35902311056852343
Validation Acc: 0.8410845577716828
Epoch 3/5


Training:   0%|          | 0/176 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Training Loss: 0.3441081251949072
Training Acc: 0.84731200709939
Validation Loss: 0.3253169983625412
Validation Acc: 0.8520220577716827
Epoch 4/5


Training:   0%|          | 0/176 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Training Loss: 0.30165519438345323
Training Acc: 0.8697457675906745
Validation Loss: 0.2680665396153927
Validation Acc: 0.8786305159330368
Epoch 5/5


Training:   0%|          | 0/176 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Training Loss: 0.2594657489521937
Training Acc: 0.889051447876475
Validation Loss: 0.2322740450501442
Validation Acc: 0.8994944840669632
