# Model 2 CIFAR 10  Database 

In [15]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import make_grid
from sklearn import metrics
import os 

sns.set_style("whitegrid")

def accuracy(target, pred):
    return metrics.accuracy_score(target.detach().cpu().numpy(), pred.detach().cpu().numpy())

def compute_confusion_matrix(target, pred, normalize=None):
    return metrics.confusion_matrix(
        target.detach().cpu().numpy(), 
        pred.detach().cpu().numpy(),
        normalize=normalize
    )

def show_image(img):
    img = img.detach().cpu()
    img = img / 2 + 0.5   # unnormalize
    with sns.axes_style("white"):
        plt.figure(figsize=(8, 8))
        plt.imshow(img.permute((1, 2, 0)).numpy())
        plt.axis('off')
        plt.show()

### Is the "transforms.Normalize" the normalization of the data ?

In [16]:
# The output of torchvision datasets are PIL images in the range [0, 1]. 
# We transform them to PyTorch tensors and rescale them to be in the range [-1, 1].
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  # subtract 0.5 and divide by 0.5
    ]
)

batch_size = 64  # both for training and testing

# Load datasets
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=True)

# Map from class index to class name.
classes = {index: name for name, index in train_set.class_to_idx.items()}

Files already downloaded and verified
Files already downloaded and verified


In [17]:
print("Training data")
print("Number of points:", len(train_set))
x, y = next(iter(train_loader))
print("Batch dimension (B x C x H x W):", x.shape)
print(f"Number of distinct labels: {len(set(train_set.targets))} (unique labels: {set(train_set.targets)})")

print("\nTest data")
print("Number of points:", len(test_set))
x, y = next(iter(test_loader))
print("Batch dimension (B x C x H x W):", x.shape)
print(f"Number of distinct labels: {len(set(test_set.targets))} (unique labels: {set(test_set.targets)})")

n_classes = len(set(test_set.targets))

Training data
Number of points: 50000
Batch dimension (B x C x H x W): torch.Size([64, 3, 32, 32])
Number of distinct labels: 10 (unique labels: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

Test data
Number of points: 10000
Batch dimension (B x C x H x W): torch.Size([64, 3, 32, 32])
Number of distinct labels: 10 (unique labels: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})


## Try without shrinking the image, apply Kernels with the adecuate padding 

In [18]:
# Original Image 
image_dim = 32
chanels_in = 3

# After applying 1st Kernel
chanels_out1 = 32
kernel1 = 9
padd1 = 4
image_dim1 = image_dim - kernel1 + 1 + 2*padd1

# Max pool: 1st
max_pool1 = 2
image_dim1_p = image_dim1//max_pool1

# After applying 2nd Kernel
chanels_out2 = 64
kernel2 = 7
padd2 = 3
image_dim2 = image_dim1_p - kernel2 + 1 + 2*padd2

# Max pool: 2nd
max_pool2 = 2
image_dim2_p =  image_dim2//max_pool2

# After applying 3th Kernel
chanels_out3 = 128
kernel3 = 5
padd3 = 2
image_dim3 = image_dim2_p - kernel3 + 1 + 2*padd3

# kernel 4
chanels_out4 = 256
kernel4 = 3 
padd4 = 1
image_dim4 = image_dim3 - kernel4 + 1 + 2*padd4 

# kernel 5
chanels_out5 = 256
kernel5 = 3 
padd5 = 1
image_dim5 = image_dim3 - kernel4 + 1 + 2*padd4 

# Max pool: 3th
max_pool3 = 2
image_dim5_p = image_dim4//max_pool2

# Calculate the number of neurons 
n_features = chanels_out4*(image_dim5_p)**2  # n_features it's the input in for the neral network
hidden_units = [800, 400]
final_neurons = 200

image_dim1, image_dim1_p, image_dim2, image_dim2_p, image_dim3, image_dim4, image_dim5_p, n_features



(32, 16, 16, 8, 8, 8, 4, 4096)

In [19]:
class PrintSize(nn.Module):
    """Utility module to print current shape of a Tensor in Sequential, only at the first pass."""
    
    first = True
    
    def forward(self, x):
        if self.first:
            print(f"Size: {x.size()}")
            self.first = False
        return x

class Model2(nn.Module):

    def __init__(self, n_classes):
        super().__init__()
        self.num_classes = n_classes
        activation_relu = nn.ReLU
        activation_elu = nn.ELU
        activation_sig = nn.Sigmoid
        activation_sof = nn.Softmax

        self.net = nn.Sequential(

            # CONVOLUTION PART
            
            # Conv 1st Kernel + ReLU 
            nn.Conv2d(chanels_in, chanels_out1, (kernel1, kernel1), padding=padd1),  # (num_chanels, out_chanels, kernel_size, stride, padding)
            activation_relu(),
            # Max Pooling + Conv 2nd Kernel + Relu
            nn.MaxPool2d((max_pool1,max_pool1)),
            nn.Conv2d(chanels_out1, chanels_out2, (kernel2, kernel2), padding=padd2),  # (num_chanels, out_chanels, kernel_size, stride, padding)
            activation_relu(),
            # Max Pooling + Conv 3th Kernel + Relu
            nn.MaxPool2d((max_pool2, max_pool2)),
            nn.Conv2d(chanels_out2, chanels_out3, (kernel3, kernel3), padding=padd3),  # (num_chanels, out_chanels, kernel_size, stride, padding)
            activation_relu(),
            # Conv 4th Kenrel + Relu + Dropout
            nn.Conv2d(chanels_out3, chanels_out4, (kernel4, kernel4), padding=padd4),
            activation_relu(),
            nn.Dropout(p=0.2, inplace=False),
            # Conv 5th Kenrel + Relu
            nn.Conv2d(chanels_out4, chanels_out5, (kernel5,kernel5), padding=padd5),
            activation_relu(),
            # Max Pool and Flatten 
            nn.MaxPool2d((max_pool3, max_pool3)),  # Specify the size of the Kernel of the max pooling operation 
            nn.Flatten(),  # from (1, channels, height, width) to (1, channels * height * width)
          
            # DENSE NEURAL NETWORK

            # Input layer  
            nn.Linear(n_features, hidden_units[0]),  # (in_features, out_features)
            activation_relu(),
            # Hidden layer: 1
            nn.Linear(hidden_units[0], hidden_units[1]),
            activation_relu(),
            nn.Dropout(p=0.2, inplace=False),
            # # Hidden layer: 2
            nn.Linear(hidden_units[1], final_neurons),
            activation_relu(),
            nn.Dropout(p=0.3, inplace=False),
            # Output Layer
            nn.Linear(final_neurons, self.num_classes),
                # activation_sof(dim=1)   # Why we don't have to put the softmax? it's already defined ? 
        )

    def forward(self, x):
        return self.net(x)


model2 = Model2(n_classes)
device = torch.device('cpu')  # use cuda or cpu
model2.to(device)
print(model2)

Model2(
  (net): Sequential(
    (0): Conv2d(3, 32, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (7): ReLU()
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Dropout(p=0.2, inplace=False)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (14): Flatten(start_dim=1, end_dim=-1)
    (15): Linear(in_features=4096, out_features=800, bias=True)
    (16): ReLU()
    (17): Linear(in_features=800, out_features=400, bias=True)


In [20]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model2.parameters(), lr=1e-3)

In [21]:
# Test the forward pass with dummy data
out = model2(torch.randn(2, 3, 32, 32, device=device))
print("Output shape:", out.size())
print(f"Output logits:\n{out.detach().cpu().numpy()}")
print(f"Output probabilities:\n{out.softmax(1).detach().cpu().numpy()}")

Output shape: torch.Size([2, 10])
Output logits:
[[ 0.01902679 -0.03527096  0.02573777  0.05851943  0.06461272  0.04608821
  -0.04030903  0.02836415  0.02209499 -0.05110846]
 [ 0.02148146 -0.05292613  0.03364832  0.05280799  0.06132663  0.04973081
  -0.0392937   0.0365147   0.02143052 -0.04793116]]
Output probabilities:
[[0.1004485  0.0951398  0.10112488 0.10449485 0.10513351 0.10320389
  0.09466168 0.10139082 0.10075717 0.09364489]
 [0.10069752 0.09347682 0.10193018 0.10390195 0.10479084 0.10358272
  0.09475987 0.10222276 0.10069239 0.09394491]]


In [22]:
batch_size = 64
num_epochs = 10
validation_every_steps = 500

step = 0
model2.train()

train_accuracies = []
valid_accuracies = []
        
for epoch in range(num_epochs):
    
    train_accuracies_batches = []
    
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass, compute gradients, perform one training step.
        
        # Forward pass.
        output = model2(inputs)
        
        # Compute loss.
        loss = loss_fn(output, targets)
        
        # Clean up gradients from the model.
        optimizer.zero_grad()
        
        # Compute gradients based on the loss from the current batch (backpropagation).
        loss.backward()
        
        # Take one optimizer step using the gradients computed in the previous step.
        optimizer.step()
        
        # Increment step counter
        step += 1
        
        # Compute accuracy.
        predictions = output.max(1)[1]
        train_accuracies_batches.append(accuracy(targets, predictions))
        
        if step % validation_every_steps == 0:
            
            # Append average training accuracy to list.
            train_accuracies.append(np.mean(train_accuracies_batches))
            
            train_accuracies_batches = []
        
            # Compute accuracies on validation set.
            valid_accuracies_batches = []
            with torch.no_grad():
                model2.eval()
                for inputs, targets in test_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    output = model2(inputs)
                    loss = loss_fn(output, targets)

                    predictions = output.max(1)[1]

                    # Multiply by len(x) because the final batch of DataLoader may be smaller (drop_last=False).
                    valid_accuracies_batches.append(accuracy(targets, predictions) * len(inputs))

                model2.train()
                
            # Append average validation accuracy to list.
            valid_accuracies.append(np.sum(valid_accuracies_batches) / len(test_set))
     
            print(f"Step {step:<5}   training accuracy: {train_accuracies[-1]}")
            print(f"             test accuracy: {valid_accuracies[-1]}")

print("Finished training.")

Step 500     training accuracy: 0.2351875
             test accuracy: 0.3344
Step 1000    training accuracy: 0.4038130733944954
             test accuracy: 0.3989
Step 1500    training accuracy: 0.46309375
             test accuracy: 0.496
Step 2000    training accuracy: 0.5132955848623854
             test accuracy: 0.5019
Step 2500    training accuracy: 0.554890422077922
             test accuracy: 0.5388
Step 3000    training accuracy: 0.5671875
             test accuracy: 0.573
Step 3500    training accuracy: 0.599588373655914
             test accuracy: 0.6004
Step 4000    training accuracy: 0.6368055555555555
             test accuracy: 0.6052
Step 4500    training accuracy: 0.63075
             test accuracy: 0.6013
Step 5000    training accuracy: 0.6592938311688312
             test accuracy: 0.619
Step 5500    training accuracy: 0.6941105769230769
             test accuracy: 0.6384
Step 6000    training accuracy: 0.6811875
             test accuracy: 0.6308
Step 6500    traini