In [1]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch
import time

datapath = '../data-unversions/p1ch7/'
cifar10 = datasets.CIFAR10(root= datapath, train=True, download = True, transform=transforms.ToTensor())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Files already downloaded and verified


Pre-processing

In [2]:
imgs = torch.stack([img_t for img_t, _ in cifar10], dim=3)
mean = imgs.view(3, -1).mean(dim=1)
std = imgs.view(3, -1).std(dim=1)

normalize = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean,std)])

train_data = datasets.CIFAR10(root= datapath, train=True, download = True, transform=normalize)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

valid_data = datasets.CIFAR10(root= datapath, train=False, download = True, transform=normalize)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=64, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


ResNet Training and Validation

In [3]:
class Block(nn.Module):
    def __init__(self, inputs, outputs, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(inputs, outputs, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outputs)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(outputs, outputs, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(outputs)
        self.downsample = nn.Sequential()
        if stride != 1 or inputs != outputs:
            self.downsample = nn.Sequential(
                nn.Conv2d(inputs, outputs, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outputs),
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.downsample(identity)
        out = self.relu(out)
        return out

class ResNet10(nn.Module):
    def __init__(self, block, layers):
        super(ResNet10, self).__init__()
        self.inputs = 16
        self.conv1 = nn.Conv2d(3, self.inputs, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.inputs)
        self.relu = nn.ReLU(inplace=True)
        self.blocks = self._make_layers(block, layers)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(self.inputs, 10)

    def _make_layers(self, block, layers):
        layers_list = []
        for layer in layers:
            layers_list.append(block(self.inputs, self.inputs, stride=1))
        return nn.Sequential(*layers_list)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.blocks(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = ResNet10(Block, [2, 2, 2]).to(device)  
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Training loop
epochs = 300
total_start_time = time.time()
for epoch in range(epochs):
    start_time = time.time()
    model.train()
    losses = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        losses += loss.item()
        
    end_time = time.time()
    training_time = end_time - start_time
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {losses/len(train_loader)}, Training Time: {training_time:.2f} seconds')

total_end_time = time.time()
total_training_time = total_end_time - total_start_time

# Validation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

valid_accuracy = correct / total
print(f'Validation Accuracy: {valid_accuracy * 100:.2f}%')

Epoch 1/300, Loss: 1.6481623974297663, Training Time: 11.69 seconds
Epoch 11/300, Loss: 0.8687803607310176, Training Time: 12.35 seconds
Epoch 21/300, Loss: 0.7566721458797869, Training Time: 11.75 seconds
Epoch 31/300, Loss: 0.695978500692131, Training Time: 11.86 seconds
Epoch 41/300, Loss: 0.6510343938837271, Training Time: 11.80 seconds
Epoch 51/300, Loss: 0.6135639289151067, Training Time: 11.79 seconds
Epoch 61/300, Loss: 0.5938663480379393, Training Time: 11.83 seconds
Epoch 71/300, Loss: 0.5699063487674879, Training Time: 11.85 seconds
Epoch 81/300, Loss: 0.55032846737472, Training Time: 11.75 seconds
Epoch 91/300, Loss: 0.5343578697546668, Training Time: 11.73 seconds
Epoch 101/300, Loss: 0.5197022117845848, Training Time: 11.71 seconds
Epoch 111/300, Loss: 0.5062748726905154, Training Time: 11.75 seconds
Epoch 121/300, Loss: 0.49242198169993623, Training Time: 11.77 seconds
Epoch 131/300, Loss: 0.48161202880656323, Training Time: 11.73 seconds
Epoch 141/300, Loss: 0.470085437

Weight Decay ResNet

In [4]:
class Block(nn.Module):
    def __init__(self, inputs, outputs, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(inputs, outputs, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outputs)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(outputs, outputs, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(outputs)
        self.downsample = nn.Sequential()
        if stride != 1 or inputs != outputs:
            self.downsample = nn.Sequential(
                nn.Conv2d(inputs, outputs, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outputs),
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.downsample(identity)
        out = self.relu(out)
        return out

class ResNet10(nn.Module):
    def __init__(self, block, layers):
        super(ResNet10, self).__init__()
        self.inputs = 16
        self.conv1 = nn.Conv2d(3, self.inputs, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.inputs)
        self.relu = nn.ReLU(inplace=True)
        self.blocks = self._make_layers(block, layers)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(self.inputs, 10)

    def _make_layers(self, block, layers):
        layers_list = []
        for layer in layers:
            layers_list.append(block(self.inputs, self.inputs, stride=1))
        return nn.Sequential(*layers_list)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.blocks(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = ResNet10(Block, [2, 2, 2]).to(device)  
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.001)

# Training loop
epochs = 300
total_start_time = time.time()
for epoch in range(epochs):
    start_time = time.time()
    model.train()
    losses = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        losses += loss.item()
        
    end_time = time.time()
    training_time = end_time - start_time
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {losses/len(train_loader)}, Training Time: {training_time:.2f} seconds')

total_end_time = time.time()
total_training_time = total_end_time - total_start_time

# Validation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

valid_accuracy = correct / total

print(f'Weight Decay Training Time: {total_training_time:.2f} seconds')
print(f'Weight Decay Final Training Loss: {losses / len(train_loader)}')
print(f'Weight Decay Final Validation Accuracy: {valid_accuracy * 100:.2f}%')

Epoch 1/300, Loss: 1.6594918667507903, Training Time: 11.84 seconds
Epoch 11/300, Loss: 0.9126898744679472, Training Time: 12.45 seconds
Epoch 21/300, Loss: 0.8407893780704654, Training Time: 11.46 seconds
Epoch 31/300, Loss: 0.8043648933663088, Training Time: 11.50 seconds
Epoch 41/300, Loss: 0.7768550750315951, Training Time: 11.53 seconds
Epoch 51/300, Loss: 0.76492087866949, Training Time: 11.52 seconds
Epoch 61/300, Loss: 0.7502386549397198, Training Time: 11.53 seconds
Epoch 71/300, Loss: 0.7431146243344182, Training Time: 11.47 seconds
Epoch 81/300, Loss: 0.7434568150955088, Training Time: 11.48 seconds
Epoch 91/300, Loss: 0.7335436174555507, Training Time: 11.52 seconds
Epoch 101/300, Loss: 0.728727473246167, Training Time: 11.55 seconds
Epoch 111/300, Loss: 0.7261305977697567, Training Time: 11.51 seconds
Epoch 121/300, Loss: 0.7175085701982079, Training Time: 11.48 seconds
Epoch 131/300, Loss: 0.7181389949968099, Training Time: 11.47 seconds
Epoch 141/300, Loss: 0.70667345604

Dropout with p=0.3 ResNet

In [5]:
class Block(nn.Module):
    def __init__(self, inputs, outputs, stride=1, dropout_prob=0.3):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(inputs, outputs, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outputs)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=dropout_prob)
        self.conv2 = nn.Conv2d(outputs, outputs, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(outputs)
        self.downsample = nn.Sequential()
        if stride != 1 or inputs != outputs:
            self.downsample = nn.Sequential(
                nn.Conv2d(inputs, outputs, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outputs),
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.downsample(identity)
        out = self.relu(out)
        return out

# Initialize the model with Dropout
model = ResNet10(Block, [2, 2, 2]).to(device)  
criterion = nn.CrossEntropyLoss()
optimizer_dropout = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Training loop
epochs = 300
total_start_time = time.time()
for epoch in range(epochs):
    start_time = time.time()
    model.train()
    losses = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        losses += loss.item()
        
    end_time = time.time()
    training_time = end_time - start_time
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {losses/len(train_loader)}, Training Time: {training_time:.2f} seconds')

total_end_time = time.time()
total_training_time = total_end_time - total_start_time

# Validation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

valid_accuracy = correct / total

print(f'Dropout Training Time: {total_training_time:.2f} seconds')
print(f'Dropout Final Training Loss: {losses / len(train_loader)}')
print(f'Dropout Final Validation Accuracy: {valid_accuracy * 100:.2f}%')

Epoch 1/300, Loss: 2.599541852236404, Training Time: 11.56 seconds
Epoch 11/300, Loss: 2.599963320795532, Training Time: 11.97 seconds
Epoch 21/300, Loss: 2.599567667602578, Training Time: 14.24 seconds
Epoch 31/300, Loss: 2.599937772506948, Training Time: 14.24 seconds
Epoch 41/300, Loss: 2.5994041225184565, Training Time: 14.25 seconds
Epoch 51/300, Loss: 2.599738048775422, Training Time: 14.24 seconds
Epoch 61/300, Loss: 2.600074477207935, Training Time: 14.26 seconds
Epoch 71/300, Loss: 2.5998129808079558, Training Time: 14.10 seconds
Epoch 81/300, Loss: 2.5995759262758145, Training Time: 14.21 seconds
Epoch 91/300, Loss: 2.600000722938791, Training Time: 14.23 seconds
Epoch 101/300, Loss: 2.5996569949952537, Training Time: 14.24 seconds
Epoch 111/300, Loss: 2.5995613775594766, Training Time: 14.25 seconds
Epoch 121/300, Loss: 2.5996320329968583, Training Time: 14.26 seconds
Epoch 131/300, Loss: 2.5998469557603605, Training Time: 14.25 seconds
Epoch 141/300, Loss: 2.599879713009690

Batch Normalization ResNet

In [6]:
class Block(nn.Module):
    def __init__(self, inputs, outputs, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(inputs, outputs, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outputs)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(outputs, outputs, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(outputs)
        self.downsample = nn.Sequential()
        if stride != 1 or inputs != outputs:
            self.downsample = nn.Sequential(
                nn.Conv2d(inputs, outputs, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outputs),
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.downsample(identity)
        out = self.relu(out)
        return out

# Initialize the model with Batch Normalization
model = ResNet10(Block, [2, 2, 2]).to(device)  
criterion = nn.CrossEntropyLoss()
optimizer= optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Training loop
epochs = 300
total_start_time = time.time()
for epoch in range(epochs):
    start_time = time.time()
    model.train()
    losses = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        losses += loss.item()
        
    end_time = time.time()
    training_time = end_time - start_time
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {losses/len(train_loader)}, Training Time: {training_time:.2f} seconds')

total_end_time = time.time()
total_training_time = total_end_time - total_start_time

# Validation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

valid_accuracy = correct / total

print(f'Batch Normalization Training Time: {total_training_time:.2f} seconds')
print(f'Batch Normalization Final Training Loss: {losses / len(train_loader)}')
print(f'Batch Normalization Final Validation Accuracy: {valid_accuracy * 100:.2f}%')

Epoch 1/300, Loss: 1.6468335275759782, Training Time: 19.18 seconds
Epoch 11/300, Loss: 0.8638069114035658, Training Time: 19.01 seconds
Epoch 21/300, Loss: 0.7471922308282779, Training Time: 19.11 seconds
Epoch 31/300, Loss: 0.679054518146893, Training Time: 19.07 seconds
Epoch 41/300, Loss: 0.6385786998683535, Training Time: 19.11 seconds
Epoch 51/300, Loss: 0.6039083092413899, Training Time: 18.90 seconds
Epoch 61/300, Loss: 0.5771825313187011, Training Time: 19.13 seconds
Epoch 71/300, Loss: 0.5634580402423048, Training Time: 19.14 seconds
Epoch 81/300, Loss: 0.5423234517083448, Training Time: 19.00 seconds
Epoch 91/300, Loss: 0.527406616931986, Training Time: 19.21 seconds
Epoch 101/300, Loss: 0.5046544743849494, Training Time: 19.14 seconds
Epoch 111/300, Loss: 0.497510135211908, Training Time: 19.08 seconds
Epoch 121/300, Loss: 0.48644770363636336, Training Time: 19.10 seconds
Epoch 131/300, Loss: 0.4772624688227768, Training Time: 18.98 seconds
Epoch 141/300, Loss: 0.4678489052