In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
class StackedShiftedConvolution(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=True, mode='add'):
        super(StackedShiftedConvolution, self).__init__()
        self.stride = stride
        self.mode = mode
        self.convs = nn.ModuleList([nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=bias) for _ in range(stride)])
        
        if mode == 'concat':
            assert out_channels % stride == 0, "Out channels must be divisible by stride"
            self.out_channels = out_channels /  stride
        elif mode == 'add':
            self.out_channels = out_channels
        else:
            raise ValueError("Mode must be either 'add' or 'concatenate'")

    def forward(self, x):
        batch_size, _, height, width = x.size()
        outputs = []
        
        for i in range(self.stride):
            # Shift the input tensor
            shifted_x = x[:, :, :, i:]
            # Apply convolution
            output = self.convs[i](shifted_x)
            # Adjust output size to match the maximum output size
            max_output_size = (height - i + self.stride - 1) // self.stride
            if output.size(2) < max_output_size:
                output = F.pad(output, (0, 0, 0, max_output_size - output.size(2)))
            if output.size(3) < max_output_size:
                output = F.pad(output, (0, max_output_size - output.size(3), 0, 0))
            outputs.append(output)
        
        if self.mode == 'add':
            output = sum(outputs)
        elif self.mode == 'concatenate':
            output = torch.cat(outputs, dim=1)

        return output

In [3]:
# Transformations for the CIFAR-100 dataset
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762))])

# CIFAR-100 dataset
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
class ShiftedCNN(nn.Module):
    def __init__(self, num_classes=100):
        super(ShiftedCNN, self).__init__()
        self.layer1 = StackedShiftedConvolution(3, 64, kernel_size=3, stride=2, padding=1, mode='concat')
        self.layer2 = StackedShiftedConvolution(64, 128, kernel_size=3, stride=2, padding=1, mode='concat')
        self.layer3 = StackedShiftedConvolution(128, 256, kernel_size=3, stride=2, padding=1, mode='concat')
        self.layer4 = StackedShiftedConvolution(256, 512, kernel_size=3, stride=2, padding=1, mode='concat')
        self.fc1 = nn.Linear(512 * 2 * 2, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.layer4(x)
        x = F.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x


In [15]:
class ShiftedCNN2(nn.Module):
    def __init__(self, num_classes=100):
        super(ShiftedCNN2, self).__init__()
        self.layer1 = StackedShiftedConvolution(3, 64, kernel_size=3, stride=2, padding=1, mode='add')
        self.layer2 = StackedShiftedConvolution(64, 128, kernel_size=3, stride=2, padding=1, mode='add')
        self.layer3 = StackedShiftedConvolution(128, 256, kernel_size=3, stride=2, padding=1, mode='add')
        self.layer4 = StackedShiftedConvolution(256, 512, kernel_size=3, stride=2, padding=1, mode='add')
        self.fc1 = nn.Linear(512 * 2 * 2, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.layer4(x)
        x = F.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x


In [5]:
class StandardCNN1(nn.Module):
    def __init__(self, num_classes=100):
        super(StandardCNN1, self).__init__()
        self.layer1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.layer2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.layer3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.layer4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(512 * 32 * 32, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.layer4(x)
        x = F.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x


In [6]:
class StandardCNN2(nn.Module):
    def __init__(self, num_classes=100):
        super(StandardCNN2, self).__init__()
        self.layer1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
        self.layer2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.layer3 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
        self.layer4 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
        self.fc1 = nn.Linear(512 * 2 * 2, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.layer4(x)
        x = F.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x


In [7]:
class StandardCNN3(nn.Module):
    def __init__(self, num_classes=100):
        super(StandardCNN3, self).__init__()
        self.layer1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.layer2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.layer3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.layer4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(512 * 2 * 2, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.layer4(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [8]:
import torch.optim as optim

def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.6f}')

def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')
    return accuracy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define loss function and optimizers
criterion = nn.CrossEntropyLoss()



num_epochs = 20

In [9]:
%%time
torch.manual_seed(42)
model_shifted = ShiftedCNN().to(device)
print(sum(p.numel() for p in model_shifted.parameters()))
optimizer_shifted = optim.Adam(model_shifted.parameters(), lr=0.001)
for epoch in range(1, num_epochs + 1):
    print(f"Epoch {epoch} - Shifted CNN")
    train(model_shifted, device, train_loader, optimizer_shifted, criterion, epoch)
    test(model_shifted, device, test_loader, criterion)

5302628
Epoch 1 - Shifted CNN
Train Epoch: 1 [0/50000] Loss: 4.611930
Train Epoch: 1 [6400/50000] Loss: 4.191510
Train Epoch: 1 [12800/50000] Loss: 4.125809
Train Epoch: 1 [19200/50000] Loss: 4.105628
Train Epoch: 1 [25600/50000] Loss: 3.767456
Train Epoch: 1 [32000/50000] Loss: 3.806023
Train Epoch: 1 [38400/50000] Loss: 3.851313
Train Epoch: 1 [44800/50000] Loss: 3.494675

Test set: Average loss: 0.0361, Accuracy: 1341/10000 (13.41%)

Epoch 2 - Shifted CNN
Train Epoch: 2 [0/50000] Loss: 3.212019
Train Epoch: 2 [6400/50000] Loss: 3.358641
Train Epoch: 2 [12800/50000] Loss: 3.455882
Train Epoch: 2 [19200/50000] Loss: 3.343467
Train Epoch: 2 [25600/50000] Loss: 3.285715
Train Epoch: 2 [32000/50000] Loss: 3.251871
Train Epoch: 2 [38400/50000] Loss: 3.519493
Train Epoch: 2 [44800/50000] Loss: 3.409282

Test set: Average loss: 0.0328, Accuracy: 2045/10000 (20.45%)

Epoch 3 - Shifted CNN
Train Epoch: 3 [0/50000] Loss: 2.988768
Train Epoch: 3 [6400/50000] Loss: 3.184377
Train Epoch: 3 [12800

In [16]:
%%time
torch.manual_seed(42)
model_shifted2 = ShiftedCNN2().to(device)
print(sum(p.numel() for p in model_shifted2.parameters()))
optimizer_shifted2 = optim.Adam(model_shifted2.parameters(), lr=0.001)
for epoch in range(1, num_epochs + 1):
    print(f"Epoch {epoch} - Shifted CNN")
    train(model_shifted2, device, train_loader, optimizer_shifted2, criterion, epoch)
    test(model_shifted2, device, test_loader, criterion)

5302628
Epoch 1 - Shifted CNN
Train Epoch: 1 [0/50000] Loss: 4.608929
Train Epoch: 1 [6400/50000] Loss: 4.297118
Train Epoch: 1 [12800/50000] Loss: 4.065460
Train Epoch: 1 [19200/50000] Loss: 4.018024
Train Epoch: 1 [25600/50000] Loss: 3.716012
Train Epoch: 1 [32000/50000] Loss: 3.833528
Train Epoch: 1 [38400/50000] Loss: 3.760399
Train Epoch: 1 [44800/50000] Loss: 3.436708

Test set: Average loss: 0.0358, Accuracy: 1370/10000 (13.70%)

Epoch 2 - Shifted CNN
Train Epoch: 2 [0/50000] Loss: 3.158731
Train Epoch: 2 [6400/50000] Loss: 3.385376
Train Epoch: 2 [12800/50000] Loss: 3.669129
Train Epoch: 2 [19200/50000] Loss: 3.290378
Train Epoch: 2 [25600/50000] Loss: 3.264608
Train Epoch: 2 [32000/50000] Loss: 3.269560
Train Epoch: 2 [38400/50000] Loss: 3.494141
Train Epoch: 2 [44800/50000] Loss: 3.276348

Test set: Average loss: 0.0327, Accuracy: 1994/10000 (19.94%)

Epoch 3 - Shifted CNN
Train Epoch: 3 [0/50000] Loss: 2.917730
Train Epoch: 3 [6400/50000] Loss: 3.119437
Train Epoch: 3 [12800

In [10]:
%%time 
torch.manual_seed(42)
model_standard1 = StandardCNN1().to(device)
print(sum(p.numel() for p in model_standard1.parameters()))
optimizer_standard1 = optim.Adam(model_standard1.parameters(), lr=0.001) 
for epoch in range(1, num_epochs + 1):    
    print(f"Epoch {epoch} - Standard CNN")
    train(model_standard1, device, train_loader, optimizer_standard1, criterion, epoch)
    test(model_standard1, device, test_loader, criterion)

538525412
Epoch 1 - Standard CNN
Train Epoch: 1 [0/50000] Loss: 4.605332
Train Epoch: 1 [6400/50000] Loss: 4.505864
Train Epoch: 1 [12800/50000] Loss: 4.272562
Train Epoch: 1 [19200/50000] Loss: 4.202658
Train Epoch: 1 [25600/50000] Loss: 3.805302
Train Epoch: 1 [32000/50000] Loss: 4.182688
Train Epoch: 1 [38400/50000] Loss: 4.126405
Train Epoch: 1 [44800/50000] Loss: 4.162068

Test set: Average loss: 0.0380, Accuracy: 1085/10000 (10.85%)

Epoch 2 - Standard CNN
Train Epoch: 2 [0/50000] Loss: 3.874862
Train Epoch: 2 [6400/50000] Loss: 3.697786
Train Epoch: 2 [12800/50000] Loss: 3.466908
Train Epoch: 2 [19200/50000] Loss: 3.644466
Train Epoch: 2 [25600/50000] Loss: 3.554774
Train Epoch: 2 [32000/50000] Loss: 3.494505
Train Epoch: 2 [38400/50000] Loss: 3.435015
Train Epoch: 2 [44800/50000] Loss: 3.304770

Test set: Average loss: 0.0336, Accuracy: 1914/10000 (19.14%)

Epoch 3 - Standard CNN
Train Epoch: 3 [0/50000] Loss: 3.371648
Train Epoch: 3 [6400/50000] Loss: 3.294289
Train Epoch: 3 [

In [11]:
%%time 
torch.manual_seed(42)
model_standard2 = StandardCNN2().to(device)
print(sum(p.numel() for p in model_standard2.parameters()))
optimizer_standard2 = optim.Adam(model_standard2.parameters(), lr=0.001) 
for epoch in range(1, num_epochs + 1):    
    print(f"Epoch {epoch} - Standard CNN")
    train(model_standard2, device, train_loader, optimizer_standard2, criterion, epoch)
    test(model_standard2, device, test_loader, criterion)

3751652
Epoch 1 - Standard CNN
Train Epoch: 1 [0/50000] Loss: 4.607858
Train Epoch: 1 [6400/50000] Loss: 4.177043
Train Epoch: 1 [12800/50000] Loss: 4.210123
Train Epoch: 1 [19200/50000] Loss: 4.048520
Train Epoch: 1 [25600/50000] Loss: 3.880157
Train Epoch: 1 [32000/50000] Loss: 4.041887
Train Epoch: 1 [38400/50000] Loss: 3.614506
Train Epoch: 1 [44800/50000] Loss: 3.691150

Test set: Average loss: 0.0355, Accuracy: 1522/10000 (15.22%)

Epoch 2 - Standard CNN
Train Epoch: 2 [0/50000] Loss: 3.498392
Train Epoch: 2 [6400/50000] Loss: 3.433415
Train Epoch: 2 [12800/50000] Loss: 3.584297
Train Epoch: 2 [19200/50000] Loss: 3.373950
Train Epoch: 2 [25600/50000] Loss: 3.451601
Train Epoch: 2 [32000/50000] Loss: 3.196486
Train Epoch: 2 [38400/50000] Loss: 2.961136
Train Epoch: 2 [44800/50000] Loss: 3.101888

Test set: Average loss: 0.0320, Accuracy: 2181/10000 (21.81%)

Epoch 3 - Standard CNN
Train Epoch: 3 [0/50000] Loss: 3.091313
Train Epoch: 3 [6400/50000] Loss: 2.946694
Train Epoch: 3 [12

In [12]:
%%time 
torch.manual_seed(42)
model_standard3 = StandardCNN3().to(device)
print(sum(p.numel() for p in model_standard3.parameters()))
optimizer_standard3 = optim.Adam(model_standard3.parameters(), lr=0.001) 
for epoch in range(1, num_epochs + 1):    
    print(f"Epoch {epoch} - Standard CNN")
    train(model_standard3, device, train_loader, optimizer_standard3, criterion, epoch)
    test(model_standard3, device, test_loader, criterion)

3751652
Epoch 1 - Standard CNN
Train Epoch: 1 [0/50000] Loss: 4.608216
Train Epoch: 1 [6400/50000] Loss: 4.278872
Train Epoch: 1 [12800/50000] Loss: 4.196471
Train Epoch: 1 [19200/50000] Loss: 3.966461
Train Epoch: 1 [25600/50000] Loss: 3.859695
Train Epoch: 1 [32000/50000] Loss: 3.924137
Train Epoch: 1 [38400/50000] Loss: 3.576105
Train Epoch: 1 [44800/50000] Loss: 3.638292

Test set: Average loss: 0.0343, Accuracy: 1714/10000 (17.14%)

Epoch 2 - Standard CNN
Train Epoch: 2 [0/50000] Loss: 3.354309
Train Epoch: 2 [6400/50000] Loss: 3.363702
Train Epoch: 2 [12800/50000] Loss: 3.494507
Train Epoch: 2 [19200/50000] Loss: 3.099123
Train Epoch: 2 [25600/50000] Loss: 3.321603
Train Epoch: 2 [32000/50000] Loss: 3.043731
Train Epoch: 2 [38400/50000] Loss: 2.870404
Train Epoch: 2 [44800/50000] Loss: 2.888300

Test set: Average loss: 0.0302, Accuracy: 2530/10000 (25.30%)

Epoch 3 - Standard CNN
Train Epoch: 3 [0/50000] Loss: 3.016620
Train Epoch: 3 [6400/50000] Loss: 2.676237
Train Epoch: 3 [12