### Model 3

In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import make_grid
from sklearn import metrics
import os 

sns.set_style("whitegrid")

def accuracy(target, pred):
    return metrics.accuracy_score(target.detach().cpu().numpy(), pred.detach().cpu().numpy())

def compute_confusion_matrix(target, pred, normalize=None):
    return metrics.confusion_matrix(
        target.detach().cpu().numpy(), 
        pred.detach().cpu().numpy(),
        normalize=normalize
    )

def show_image(img):
    img = img.detach().cpu()
    img = img / 2 + 0.5   # unnormalize
    with sns.axes_style("white"):
        plt.figure(figsize=(8, 8))
        plt.imshow(img.permute((1, 2, 0)).numpy())
        plt.axis('off')
        plt.show()

In [3]:
# The output of torchvision datasets are PIL images in the range [0, 1]. 
# We transform them to PyTorch tensors and rescale them to be in the range [-1, 1].
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  # subtract 0.5 and divide by 0.5
    ]
)

batch_size = 64  # both for training and testing

# Load datasets
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=True)

# Map from class index to class name.
classes = {index: name for name, index in train_set.class_to_idx.items()}

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
print("Training data")
print("Number of points:", len(train_set))
x, y = next(iter(train_loader))
print("Batch dimension (B x C x H x W):", x.shape)
print(f"Number of distinct labels: {len(set(train_set.targets))} (unique labels: {set(train_set.targets)})")

print("\nTest data")
print("Number of points:", len(test_set))
x, y = next(iter(test_loader))
print("Batch dimension (B x C x H x W):", x.shape)
print(f"Number of distinct labels: {len(set(test_set.targets))} (unique labels: {set(test_set.targets)})")

n_classes = len(set(test_set.targets))

Training data
Number of points: 50000
Batch dimension (B x C x H x W): torch.Size([64, 3, 32, 32])
Number of distinct labels: 10 (unique labels: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

Test data
Number of points: 10000
Batch dimension (B x C x H x W): torch.Size([64, 3, 32, 32])
Number of distinct labels: 10 (unique labels: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})


In [17]:
# Original Image 
image_dim = 32
chanels_in = 3

# Kernel 1
chanels_out1 = 32
kernel1 = 3
padd1 = 1
image_dim1 = image_dim - kernel1 + 1 + 2*padd1

# Kernel 2
chanels_out2 = 64
kernel2 = 3
padd2 = 1
image_dim2 = image_dim1 - kernel2 + 1 + 2*padd2

# Max pool: 1st
max_pool1 = 2
image_dim2_p = image_dim2//max_pool1

# Kernel 3
chanels_out3 = 128
kernel3 = 3
padd3 = 1
image_dim3 = image_dim2_p - kernel3 + 1 + 2*padd3

# kernel 4
chanels_out4 = 128
kernel4 = 3 
padd4 = 1
image_dim4 = image_dim3 - kernel4 + 1 + 2*padd4 

# Max pool: 2st
max_pool2 = 2
image_dim4_p = image_dim4//max_pool2

# kernel 5
chanels_out5 = 256
kernel5 = 3 
padd5 = 1
image_dim5 = image_dim4_p - kernel5 + 1 + 2*padd5 

# kernel 6
chanels_out6 = 256
kernel6 = 3 
padd6 = 1
image_dim6 = image_dim5 - kernel6 + 1 + 2*padd6

# Max pool: 3nd
max_pool3 = 2
image_dim6p =  image_dim6//max_pool3


# Calculate the number of neurons 
# n_features = chanels_out4*(image_dim5_p)**2  # n_features it's the input in for the neral network
n_features = 4096
# Hidden Layers 
hidden_units = [1024, 512, 256]

Which type of Kernel is it, Gaussian? Can we change the type?
What are exactly the output chanels, Why are they different if we use the same kernel? 
Can we use different kernels to filter the image and then combine the result in a flatten?

In [18]:
class PrintSize(nn.Module):
    """Utility module to print current shape of a Tensor in Sequential, only at the first pass."""
    
    first = True
    
    def forward(self, x):
        if self.first:
            print(f"Size: {x.size()}")
            self.first = False
        return x

class Model2(nn.Module):

    def __init__(self, n_classes):
        super().__init__()
        self.num_classes = n_classes
        activation_relu = nn.ReLU
        activation_elu = nn.ELU
        activation_sig = nn.Sigmoid
        activation_sof = nn.Softmax

        self.conv_layer = nn.Sequential(

            # CONVOLUTION PART
            
            # Conv 1st Kernel + Batch norm + ReLU 
            nn.Conv2d(chanels_in, chanels_out1, (kernel1, kernel1), padding=padd1),  # (num_chanels, out_chanels, kernel_size, stride, padding)
            nn.BatchNorm2d(chanels_out1),
            activation_relu(),

            # Conv 2nd Kernel + ReLU + max pool
            nn.Conv2d(chanels_out1, chanels_out2, (kernel2, kernel2), padding=padd2),  # (num_chanels, out_chanels, kernel_size, stride, padding)
            activation_relu(),
            nn.MaxPool2d((max_pool1,max_pool1), stride=2),

            # Conv 3th Kernel + Batch norm + ReLU
            nn.Conv2d(chanels_out2, chanels_out3, (kernel3, kernel3), padding=padd3),  # (num_chanels, out_chanels, kernel_size, stride, padding)
            nn.BatchNorm2d(chanels_out3),
            activation_relu(),

            # Conv 4th Kernel + ReLU + Maxpool + dropout
            nn.Conv2d(chanels_out3, chanels_out4, (kernel4, kernel4), padding=padd4),  # (num_chanels, out_chanels, kernel_size, stride, padding)
            activation_relu(),
            nn.MaxPool2d((max_pool2,max_pool2), stride=2),
            nn.Dropout2d(p=0.05),

            # Conv 5th Kernel + batch norm + ReLU
            nn.Conv2d(chanels_out4, chanels_out5, (kernel5, kernel5), padding=padd5),
            nn.BatchNorm2d(chanels_out5),
            activation_relu(),

            # Conv 6th Kernel
            nn.Conv2d(chanels_out5, chanels_out6, (kernel6, kernel6), padding=padd6),
            activation_relu(),
            nn.MaxPool2d((max_pool3,max_pool3), stride=2),

            # Flatten 
            nn.Flatten(),
        )

        self.dense_layer = nn.Sequential(
            
            # Input Layer
            nn.Dropout(p=0.1),
            nn.Linear(n_features, hidden_units[0]),  # (in_features, out_features)
            activation_relu(),

            # Hidden layer: 1
            nn.Linear(hidden_units[0], hidden_units[1]),
            activation_relu(),
            nn.Dropout(p=0.1),

            # Hidden layer 2:
            nn.Linear(hidden_units[1], hidden_units[2]),
            activation_relu(),
            nn.Dropout(p=0.1),

            # Output Layer
            nn.Linear(hidden_units[2], self.num_classes),
                # activation_sof(dim=1)   # Why we don't have to put the softmax? it's already defined ? 
        )

    def forward(self, x):
        # Apply convolution step 
        x = self.conv_layer(x)

        # Apply dense NN
        x = self.dense_layer(x)


        return x


model2 = Model2(n_classes)
device = torch.device('cuda')  # use cuda or cpu
model2.to(device)
print(model2)

Model2(
  (conv_layer): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Dropout2d(p=0.05, inplace=False)
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): ReLU()
    (16): 

In [19]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model2.parameters(), lr=1e-3)

In [20]:
# Test the forward pass with dummy data
out = model2(torch.randn(2, 3, 32, 32, device=device))
print("Output shape:", out.size())
print(f"Output logits:\n{out.detach().cpu().numpy()}")
print(f"Output probabilities:\n{out.softmax(1).detach().cpu().numpy()}")

Output shape: torch.Size([2, 10])
Output logits:
[[-0.01234096  0.02201333 -0.00179604  0.02955786 -0.00875029 -0.04393495
  -0.00055776 -0.07772359  0.06876911 -0.0855868 ]
 [ 0.00725805  0.02275951  0.04069508  0.03896881 -0.01002445 -0.0467821
   0.00708807 -0.07434189  0.06938757 -0.06958433]]
Output probabilities:
[[0.0997679  0.10325492 0.10082552 0.10403688 0.10012679 0.09666512
  0.10095045 0.0934535  0.10819732 0.09272154]
 [0.10076876 0.10234299 0.10419513 0.10401542 0.09904218 0.09546772
  0.10075162 0.09287257 0.10722805 0.09331548]]


In [21]:
batch_size = 60
num_epochs = 30
validation_every_steps = 500

step = 0
model2.train()

train_accuracies = []
valid_accuracies = []
        
for epoch in range(num_epochs):
    
    train_accuracies_batches = []
    
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass, compute gradients, perform one training step.
        
        # Forward pass.
        output = model2(inputs)
        
        # Compute loss.
        loss = loss_fn(output, targets)
        
        # Clean up gradients from the model.
        optimizer.zero_grad()
        
        # Compute gradients based on the loss from the current batch (backpropagation).
        loss.backward()
        
        # Take one optimizer step using the gradients computed in the previous step.
        optimizer.step()
        
        # Increment step counter
        step += 1
        
        # Compute accuracy.
        predictions = output.max(1)[1]
        train_accuracies_batches.append(accuracy(targets, predictions))
        
        if step % validation_every_steps == 0:
            
            # Append average training accuracy to list.
            train_accuracies.append(np.mean(train_accuracies_batches))
            
            train_accuracies_batches = []
        
            # Compute accuracies on validation set.
            valid_accuracies_batches = []
            with torch.no_grad():
                model2.eval()
                for inputs, targets in test_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    output = model2(inputs)
                    loss = loss_fn(output, targets)

                    predictions = output.max(1)[1]

                    # Multiply by len(x) because the final batch of DataLoader may be smaller (drop_last=False).
                    valid_accuracies_batches.append(accuracy(targets, predictions) * len(inputs))

                model2.train()
                
            # Append average validation accuracy to list.
            valid_accuracies.append(np.sum(valid_accuracies_batches) / len(test_set))
     
            print(f"Step {step:<5}   training accuracy: {train_accuracies[-1]}")
            print(f"             test accuracy: {valid_accuracies[-1]}")

print("Finished training.")

Step 500     training accuracy: 0.10546875
             test accuracy: 0.1064
Step 1000    training accuracy: 0.12793864678899083
             test accuracy: 0.1475
Step 1500    training accuracy: 0.1451875
             test accuracy: 0.1761
Step 2000    training accuracy: 0.17140911697247707
             test accuracy: 0.2119
Step 2500    training accuracy: 0.2234172077922078
             test accuracy: 0.2601
Step 3000    training accuracy: 0.23690625
             test accuracy: 0.2675
Step 3500    training accuracy: 0.2599546370967742
             test accuracy: 0.2988
Step 4000    training accuracy: 0.2857638888888889
             test accuracy: 0.2822
Step 4500    training accuracy: 0.29003125
             test accuracy: 0.3109
Step 5000    training accuracy: 0.3176237824675325
             test accuracy: 0.3443
Step 5500    training accuracy: 0.35877403846153844
             test accuracy: 0.3697
Step 6000    training accuracy: 0.3493125
             test accuracy: 0.3908
Step 65

In [10]:
# Check if we have GPUs available
print("Available CUDA devices:", [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])

Available CUDA devices: ['Tesla T4']
