# **CNN** *Pytorch*

In [13]:
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.transforms import v2
from torch import optim
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchmetrics
import torchvision

### CNN

In [14]:
class CNN(nn.Module):
    def __init__(self, in_channels, num_classes=26): #
        # NUM_CLASSES: Number of classes to predict. For letters this is 26 from A-Z. 
       
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d( #FIRST CONVOLUTIONAL LAYER.
            in_channels=in_channels, #Number of channels for input image. 1 for grayscale, 3 for color. 
            out_channels=8, #Controls models complexity: overfitting vs. underfitting. 
            kernel_size=3, #Kernelsize 3x3. 
            stride=1, #Controls how fast the filter moves acrosse the image. 1 -> 1 pixel at a time for more detail. 
            padding=1) #Helps control output size. 
        # 
        
        self.pool = nn.MaxPool2d( #POOLING LAYER.
            kernel_size=2, #Looks at 2x2 blocks. 
            stride=2) #Moves 2 pixels per step. 
        
        self.conv2 = nn.Conv2d( #SECONDS CONVOLUTIONAL LAYER. 
            in_channels=8, #In-channels match number of out-channels from first layer. 
            out_channels=16, 
            kernel_size=3, 
            stride=1, 
            padding=1)
        
        self.fc1 = nn.Linear( #FULLY CONNECTED LAYER. 
            16 * 7 * 7, #Input features after 2x2 pooling.
            num_classes) #26 output features. 

    def forward(self, x):
        x = F.relu(self.conv1(x))  # Apply first convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = F.relu(self.conv2(x))  # Apply second convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = x.reshape(x.shape[0], -1)  # Flatten the tensor
        x = self.fc1(x)            # Apply fully connected layer
        return x

### GPU OR CPU

In [15]:
device = "cuda" if torch.cuda.is_available() else "cpu"

### Hyperparamethers

In [26]:
input_size = 784  # 28x28 pixels (not directly used in CNN)
num_classes = 26  # Letters A-Z
learning_rate = 0.001
batch_size = 64 #Number of imahes processed at once (312)
num_epochs = 10  #Number of batches to be processed (200)

### Load data

In [None]:
#Reformatting and resizing images, and saving them in the variable Transform for later use.
transform = v2.Compose([
    v2.ToImage(), #Convert image to tensor. 
    v2.ToDtype(torch.uint8, scale = True), #Resizing tensors. 
    v2.Resize(size=(28,28), antialias= True), #Resizing the images. 
    v2.ToDtype(torch.float32, scale=True), #Normalize expected float inputs. 
    v2.Normalize(mean=[0,0,0],std=[1,1,1])
    v2.RandomHorizontalFlip([p=1])
])

train_dataset = datasets.EMNIST(root='emnist-letters-train',split='letters', train=True, download=True, transform=transform, target_transform=lambda y: y - 1)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.EMNIST(root='emnist-letters-test', split='letters', train=False, download=True, transform=transform, target_transform=lambda y: y - 1)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [28]:
#transforms.Compose([transforms.Resize((28,28)), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

### Initialize network

In [29]:
model = CNN(in_channels=3, num_classes=num_classes).to(device)

### Loss and optimizer

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train network

In [31]:
for epoch in range(num_epochs):
    print(f"Epoch [{epoch + 1}/{num_epochs}]")
    for batch_index, (data, targets) in enumerate(tqdm(train_loader)):
        # Move data and targets to the device (GPU/CPU)
        data = data.to(device)
        targets = targets.to(device)

        # Forward pass: compute the model output
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward pass: compute the gradients
        optimizer.zero_grad()
        loss.backward()

        # Optimization step: update the model parameters
        optimizer.step()

Epoch [1/10]


100%|██████████| 1950/1950 [00:34<00:00, 57.24it/s]


Epoch [2/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.22it/s]


Epoch [3/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.57it/s]


Epoch [4/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.48it/s]


Epoch [5/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.99it/s]


Epoch [6/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.77it/s]


Epoch [7/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.97it/s]


Epoch [8/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.81it/s]


Epoch [9/10]


100%|██████████| 1950/1950 [00:32<00:00, 59.29it/s]


Epoch [10/10]


100%|██████████| 1950/1950 [00:33<00:00, 58.61it/s]


### Accuracy

In [32]:
def check_accuracy(loader, model):
    """
    Checks the accuracy of the model on the given dataset loader.

    Parameters:
        loader: DataLoader
            The DataLoader for the dataset to check accuracy on.
        model: nn.Module
            The neural network model.
    """
    if loader.dataset.train:
        print("Checking accuracy on training data")
    else:
        print("Checking accuracy on test data")

    num_correct = 0
    num_samples = 0
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():  # Disable gradient calculation
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            # Forward pass: compute the model output
            scores = model(x)
            _, predictions = scores.max(1)  # Get the index of the max log-probability
            num_correct += (predictions == y).sum()  # Count correct predictions
            num_samples += predictions.size(0)  # Count total samples

        # Calculate accuracy
        accuracy = float(num_correct) / float(num_samples) * 100
        print(f"Got {num_correct}/{num_samples} with accuracy {accuracy:.2f}%")
    
    model.train()  # Set the model back to training mode

# Final accuracy check on training and test sets
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
Got 116214/124800 with accuracy 93.12%
Checking accuracy on test data
Got 19007/20800 with accuracy 91.38%
