# Convolutional Neural Network

Create a CNN that can identify family members once it has been trained

In [45]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
import sys
import os

sys.path.append(os.path.relpath("../"))
from helper_functions import *

In [279]:
# Custom CNN model to identify images, expects data to have dimensions of [batch_size, color channel, pic_height, pic_width]
KERNEL_SIZE = 3
POOL_KERNEL = 2
STRIDE = 1
PADDING = 2

layer_dim_calc(28, 2, 2, KERNEL_SIZE, POOL_KERNEL, STRIDE, PADDING)

class CNN_Model(nn.Module):
    def __init__(self, input, hidden_units, output):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=input, 
                      out_channels=hidden_units,
                      kernel_size=KERNEL_SIZE,
                      padding=PADDING),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=KERNEL_SIZE,
                      padding=PADDING),            
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=POOL_KERNEL)
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=KERNEL_SIZE,
                      padding=PADDING),            
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=KERNEL_SIZE,
                      padding=PADDING),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=POOL_KERNEL)
        )

        self.flatten = nn.Sequential(
            nn.Flatten(),
            nn.Linear(hidden_units* pow(layer_dim_calc(28,
                                                       2, 
                                                       2,
                                                       KERNEL_SIZE, 
                                                       POOL_KERNEL,
                                                       STRIDE,
                                                       PADDING
                                                        ), 2), 
                      output) # need to be aware of our photo dims after being filtered by conv2d and maxpool2d layers
         )
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        return self.flatten(x)
    
    
# Custom dataset class that inherits from torch.utils.data.Dataset
class FamilyData(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.image_paths = []

        # Data format (image, image_idx)
        for idx, class_name in enumerate(self.classes):
            class_folder = os.path.join(root_dir, class_name)
            if os.path.isdir(class_folder):
                for img_name in os.listdir(class_folder):
                    img_path = os.path.join(class_folder, img_name)
                    if img_path.endswith('.jpg') or img_path.endswith('.png'):
                        self.image_paths.append((img_path, class_name))

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path, label = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB') # Image dim = [3, 224, 224] 
        if self.transform:
            image = self.transform(image)
            image = image.permute(1, 2, 0) # rearrange dimensions to be compatible with plt.imgshow()
        return image, label

Resulting dimension prior to flatten layer is 10


In [191]:
def train_model(model: nn.Module, dataset_loader: torch.utils.data.DataLoader, lr=0.1):
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    model.train()
    for batch_idx, (data, label) in enumerate(dataset_loader):
        data, label = next(iter(dataset_loader))
        
        logits = model(data)
        loss = loss_fn(logits, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (batch_idx+1) % int(len(dataset_loader)/5) == 0:
            print(f"Ran {batch_idx+1} loops | loss: {loss.item()}")

def test_model(model: nn.Module, dataset_loader: torch.utils.data.DataLoader):
    test_accur, test_loss = 0, 0
    loss_fn = torch.nn.CrossEntropyLoss()
    model.eval()
    with torch.inference_mode():
        for data, labels in dataset_loader:
            logits = model(data)
            test_loss += loss_fn(logits, labels)
            test_accur += (logits.argmax(1) == labels.squeeze()).float().mean() *100

        test_loss /= len(dataset_loader)
        test_accur /= len(dataset_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_accur:.2f}%\n")

            # if accuracy * 100 > 98:
            #     FOLDER_NAME = "../state_dict"
            #     MODEL_NAME = "Four_Classes_Multiclassfication_Model.pt"
            #     torch.save(model.state_dict(), f"{FOLDER_NAME}/{MODEL_NAME}")

In [150]:
# Load image datasets
root_folder = Path("../data/Family_PP")

# Set the transformation of the image data to be 224 x 224 resolution and tensors
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
])

dataset = FamilyData(root_folder, transform=transform)

# Check the images in dataset
# for idx, image_data in enumerate(dataset):
#     image, label = image_data 
#     plt.figure()
#     plt.title(f"{label}")
#     plt.axis(False)
#     plt.imshow(image)

In [69]:
train_data = datasets.FashionMNIST(root='.././data', train=True, download=False, transform=ToTensor())
test_Data = datasets.FashionMNIST(root='.././data', train=False, download=False, transform=ToTensor())

# fig = plt.figure(figsize=(9, 9))
# rows, col = 4, 4
# for i in range(1, (rows*col + 1)):
#     random_idx = torch.randint(0, len(train_data), size=[1]).item()
#     img, label = train_data[random_idx]
#     fig.add_subplot(rows, col, i)
#     plt.imshow(img.squeeze(), cmap="gray")
#     plt.title(class_names[label])
#     plt.axis(False)

len(train_data)
len(test_Data)

length = DataLoader(train_data, batch_size=32)
len(length)

1875

In [280]:
train_data = datasets.FashionMNIST(root='.././data', train=True, download=False, transform=ToTensor())
test_Data = datasets.FashionMNIST(root='.././data', train=False, download=False, transform=ToTensor())

dataset_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_Data, batch_size=32, shuffle=True)

COLOR_CHANNELS = 1
CLASSES = len(train_data.classes)
LOOPS = 1000

model = CNN_Model(input=COLOR_CHANNELS, hidden_units=10, output=CLASSES)
train_model(model, dataset_loader, lr=0.1)
test_model(model, test_loader)

Resulting dimension prior to flatten layer is 10
Ran 375 loops | loss: 0.40972375869750977
Ran 750 loops | loss: 0.5317894816398621
Ran 1125 loops | loss: 0.44580069184303284
Ran 1500 loops | loss: 0.6202183365821838
Ran 1875 loops | loss: 0.2446848452091217
Test loss: 0.40258 | Test accuracy: 85.15%



In [54]:
data, label = next(iter(dataset_loader))
# print(torch.softmax(model(data[0].unsqueeze(0)).squeeze(), 0).argmax(0))
# print(label[0])
# print(model(data).shape)

print(data[0].shape)

torch.Size([1, 28, 28])


In [78]:
images = torch.randn(size=(32, 3, 28, 28))
test_image = images[0].unsqueeze(0)
print(test_image.shape)

conv2d = nn.Conv2d(in_channels=3,
                   out_channels=10,
                   kernel_size=3,
                   stride=1,
                   padding=0)

print(conv2d(test_image).shape)


torch.Size([1, 3, 28, 28])
torch.Size([1, 10, 26, 26])


In [253]:
def conv2d_calc(input_dim: int, kernel, stride, padding):
    output = 1 + (input_dim + (padding * 2) - kernel) / stride

    return output

# Calculate the resulting dim after filering through conv2d and maxpool2d
# for epoch in range(0, 2, 1):
#     for epoch in range(0, 2, 1):
#         dim = conv2d_calc(dim, KERNEL_SIZE, STRIDE, PADDING)
#     dim /= POOL_KERNEL

def layer_dim_calc(input_dim, conv2d_count, maxpool_count, conv2d_kernel, maxpool_kernel, stride, padding):
    """ Finds the resulting photo dimensions for a CNN model that uses conv2d and maxpool2d layers
    """
    for epoch in range(0, maxpool_count, 1):
        for epoch in range(0, conv2d_count, 1):
            input_dim = conv2d_calc(input_dim, conv2d_kernel, stride, padding)
        input_dim /= maxpool_kernel

    if (input_dim != int(input_dim)):
        print(f"WARNING: Resulting dimension of {input_dim} prior to flatten layer is not an integer. This may impact model performance.") 
    else:
        print(f"Resulting dimension prior to flatten layer is {int(input_dim)}")
    return int(input_dim)


layer_dim_calc(28, 2, 2, KERNEL_SIZE, POOL_KERNEL, STRIDE, PADDING)



8