# Training CNNs

In [None]:
## Imports
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# Dataloaders
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
    ]
)
trainset = torchvision.datasets.CIFAR(root='/date', train=True,
                                      download=True, transform=transform)
trainloader = torchvision.utils.data.DataLoader(trainset, batch_size=128,
                                                shuffle=True, num_workers=2)
                                    
testset = torchvision.datasets.CIFAR(root='/date', train=False,
                                     download=True, transform=transform)
testloader = torchvision.utils.data.DataLoader(trainset, batch_size=128,
                                               shuffle=False, num_workers=2)

In [None]:
# Optimizer

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 3e-4)

In [None]:
# Training epochs - Simple

for epoch in range(10):
    for i,data in enumerate(trainloader, start=0):
        # Get the inputs
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()
        # Predict and compute loss based on CrossEntropy
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        # Compute the gradients using backprop
        loss.backward()
        # Update the weights after calculating gradients
        optimizer.step()
print("Finished Training")

In [None]:
# Evaluate results

correct, total = 0,0
predictions = []
net.eval()

for i,data in enumerate(testloader, 0):
    inputs, labels = data
    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    predictions.append(outputs)
    total += labels.size(0) # each batch size
    correct += (predicted==labels).sum().item()
print("The testing set accuracy of the network is %d %%" % (100*correct/total))

In [None]:
# Training epochs - With Logging

train_losses = []
test_losses = []
train_correct = []
test_correct = []
for epoch in range(10):
    trn_corr = 0
    tst_corr = 0
    for i,data in enumerate(trainloader, start=0):
        # Get the inputs
        inputs, labels = data

        # Tally the number of correct predictions
        y_pred = net(inputs)
        predicted = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted == labels).sum()
        trn_corr += batch_corr

        # Zero the parameter gradients
        optimizer.zero_grad()
        # Forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print interim results
        if i%100 == 0:
            print(f'epoch: {i:2}  batch: {b:4} [{10*b:6}/60000]  loss: {loss.item():10.8f}  \
    accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
    
    # You may optionally also run test batches for each epoch. 
    # # Run the testing batches
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(testloader):

            # Apply the model
            y_val = net(X_test)

            # Tally the number of correct predictions
            predicted = torch.max(y_val.data, 1)[1] 
            tst_corr += (predicted == y_test).sum()
            
    loss = criterion(y_val, y_test)
    test_losses.append(loss)
    test_correct.append(tst_corr)
print("Finished Training")

## Sidenote - How to check how the dimensions for the Linear layers at the end should be. You can use the following trick:

In [None]:
# Define layers
conv1 = nn.Conv2d(1, 6, 3, 1)
conv2 = nn.Conv2d(6, 16, 3, 1)
# Grab the first MNIST record
for i, (X_train, y_train) in enumerate(train_data):
    break
x = X_train.view(1,1,28,28)
print("Shape 1: ", x.shape)
# Perform the first convolution/activation
x = F.relu(conv1(x))
print("Shape 2: ", x.shape)
# Run the first pooling layer
x = F.max_pool2d(x, 2, 2)
print("Pool shape: ", x.shape)