# **CNN** *Pytorch*

***
### *Imports...*

In [109]:
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.transforms import v2
from torch import optim
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchmetrics
import torchvision
from torchsummary import summary

***
### *CNN...*

In [120]:

class CNN(nn.Module):
    def __init__(self, in_channels, num_classes=26): #
        # NUM_CLASSES: Number of classes to predict. For letters this is 26 from A-Z. 
       
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d( #FIRST CONVOLUTIONAL LAYER.
            in_channels=in_channels, #Number of channels for input image. 1 for grayscale, 3 for color. 
            out_channels=8, #Controls models complexity: overfitting vs. underfitting. 
            kernel_size=3, #Kernelsize 3x3. 
            stride=1, #Controls how fast the filter moves acrosse the image. 1 -> 1 pixel at a time for more detail. 
            padding=1) #Helps control output size. 
        # 
        
        self.pool = nn.MaxPool2d( #POOLING LAYER.
            kernel_size=2, #Looks at 2x2 blocks. 
            stride=2) #Moves 2 pixels per step. 
        
        self.conv2 = nn.Conv2d( #SECONDS CONVOLUTIONAL LAYER. 
            in_channels=8, #In-channels match number of out-channels from first layer. 
            out_channels=16, 
            kernel_size=3, 
            stride=1, 
            padding=1)
        
        self.fc1 = nn.Linear( #FULLY CONNECTED LAYER. 
            16 * 7 * 7, #Input features after 2x2 pooling.
            num_classes) #26 output features. 

    def forward(self, x):
        x = F.relu(self.conv1(x))  # Apply first convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = F.relu(self.conv2(x))  # Apply second convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = x.reshape(x.shape[0], -1)  # Flatten the tensor
        x = self.fc1(x)            # Apply fully connected layer
        return x

cnn = CNN(3,26)

In [90]:
device = "cuda" if torch.cuda.is_available() else "cpu" #Prefers GPU else CPU 

In [121]:
summary(cnn,input_size=(3,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]             224
         MaxPool2d-2            [-1, 8, 14, 14]               0
            Conv2d-3           [-1, 16, 14, 14]           1,168
         MaxPool2d-4             [-1, 16, 7, 7]               0
            Linear-5                   [-1, 26]          20,410
Total params: 21,802
Trainable params: 21,802
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.09
Params size (MB): 0.08
Estimated Total Size (MB): 0.18
----------------------------------------------------------------


***
### *Hyperparamethers...*

In [126]:
input_size = 784  # 28x28 pixels (not directly used in CNN)
num_classes = 26  # Letters A-Z
learning_rate = 0.001
batch_size = 100 #Number of imahes processed at once 
num_epochs = 20  #Number of batches to be processed 

***
### *Load data...*

In [102]:
#Reformatting and resizing images, and saving them in the variable Transform for later use.
transform = v2.Compose([
    v2.ToImage(), #Convert image to tensor. 
    v2.ToDtype(torch.uint8, scale = True), #Resizing tensors. 
    v2.Resize(size=(28,28), antialias= True), #Resizing the images. 
    v2.ToDtype(torch.float32, scale=True), #Normalize expected float inputs. 
    v2.Normalize(mean=[0,0,0],std=[1,1,1]),
    v2.RandomHorizontalFlip(p=1)
])

train_dataset = datasets.EMNIST(root='emnist-letters-train',split='letters', train=True, download=True, transform=transform, target_transform=lambda y: y - 1)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.EMNIST(root='emnist-letters-test', split='letters', train=False, download=True, transform=transform, target_transform=lambda y: y - 1)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [98]:
print(len(train_dataset))
print(len(test_dataset))

124800
20800


***
### *Initialize network with loss and optimizer...*

In [103]:
model = CNN(in_channels=3, num_classes=num_classes).to(device)

In [104]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train network

In [125]:
for epoch in range(num_epochs):
    print(f"Epoch [{epoch + 1}/{num_epochs}]")

    correct = 0
    total = 0
    running_loss = 0.0

    for batch_index, (data, targets) in enumerate(tqdm(train_loader)):
        data = data.to(device)
        targets = targets.to(device)

        # Forward
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # ---- stats ----
        running_loss += loss.item()
        _, preds = scores.max(1)
        correct += (preds == targets).sum().item()
        total += targets.size(0)
        

    print(
        f"Epoch Loss: {running_loss / len(train_loader):.4f}, "
        f"Epoch Accuracy: {100 * correct / total:.2f} %\n"
    )

Epoch [1/10]


100%|██████████| 1950/1950 [00:36<00:00, 53.84it/s]


Epoch Loss: 0.1926, Epoch Accuracy: 93.40 %

Epoch [2/10]


100%|██████████| 1950/1950 [00:37<00:00, 52.36it/s]


Epoch Loss: 0.1884, Epoch Accuracy: 93.53 %

Epoch [3/10]


100%|██████████| 1950/1950 [00:36<00:00, 53.77it/s]


Epoch Loss: 0.1848, Epoch Accuracy: 93.64 %

Epoch [4/10]


100%|██████████| 1950/1950 [00:35<00:00, 54.47it/s]


Epoch Loss: 0.1813, Epoch Accuracy: 93.82 %

Epoch [5/10]


 67%|██████▋   | 1307/1950 [00:24<00:12, 52.53it/s]


KeyboardInterrupt: 

### Accuracy

In [96]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking accuracy on training data")
    else:
        print("Checking accuracy on test data")

    num_correct = 0
    num_samples = 0
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():  # Disable gradient calculation
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            # Forward pass: compute the model output
            scores = model(x)
            _, predictions = scores.max(1)  # Get the index of the max log-probability
            num_correct += (predictions == y).sum()  # Count correct predictions
            num_samples += predictions.size(0)  # Count total samples

        # Calculate accuracy
        accuracy = float(num_correct) / float(num_samples) * 100
        print(f"Got {num_correct}/{num_samples} with accuracy {accuracy:.2f}%")
    
    model.train()  # Set the model back to training mode

# Final accuracy check on training and test sets
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
Got 116566/124800 with accuracy 93.40%
Checking accuracy on test data
Got 19031/20800 with accuracy 91.50%
