In [1]:
import torch 
import torch.nn as nn 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import torch.nn.functional as F
from tqdm import tqdm
import os 
from torchinfo import summary
from torchvision.io import read_image
from torch.utils.data import random_split
import time 
import random # Random Search 
from sklearn.metrics import accuracy_score
from torch.optim import Adam

In [2]:
path_to_data = "/run/media/magnusjsc/T7/Classification-and-3D-reconstruction-of-archaeological-artifacts_DATA/DIME images"

def load_data_from_directory(directory_path, label, limit = 10000):
    data = []
    labels = []
    count = 0 

    for filename in os.listdir(directory_path):
        if filename.endswith(".jpg"):
            image_path = os.path.join(directory_path, filename)
            image = read_image(image_path)

            # Check if image is RGB
            if image.shape[0] == 3: 
                data.append(image)
                labels.append(label)
                count += 1 

            if count >= limit: 
                break; 

    return data, labels # Tuple 

# Load the data 
data, labels = load_data_from_directory(
    path_to_data,
    label = 0, # CHANGE
    limit = 25 
)

In [4]:
imgTrans = transforms.Compose(
    [
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Resize(size = (224,224))
    ]
)

training = 0.7
validation = 0.15
testing = 0.15 

# TODO: MISSING LABELS
train_set, val_set, test_set = random_split(data, [training, validation, testing]) # Partition the dataset

batch_size = 50

train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_set, batch_size = batch_size, shuffle = False)
test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False) 

print(f'Length of training set: {len(train_set)}, validation set: {len(val_set)}, testing set: {len(test_set)}')

n_channels = 3
n_classes = 1 # TODO: Calculate number of distinct classes

Length of training set: 18, validation set: 4, testing set: 3


In [42]:
# Classifier  
class CNN(nn.Module):
    def __init__(
        self, 
        num_layers: int,
        hyperparams: dict 
    ):
        super(CNN, self).__init__()

        self.num_layers = num_layers
        self.layers = nn.ModuleList() 
        final_channel = 0
        final_kernel = 0
        final_padding = 0
        final_stride = 0

        for i in range(num_layers):
            in_channels = n_channels if i == 0 else hyperparams['out_channels'][i - 1] 
            out_channels = hyperparams['out_channels'][i]
            kernel_size = hyperparams['kernel_size'][i]
            stride = hyperparams['stride'][i]
            padding = hyperparams['padding'][i]
            self.layers.append(
                nn.Conv2d(
                    in_channels = in_channels,
                    out_channels = out_channels,
                    kernel_size = kernel_size,
                    padding = padding
                )
            )
            self.layers.append(nn.ReLU())
            self.layers.append(
                nn.MaxPool2d(
                    kernel_size = 2,
                    stride = 2
                )
            )
            final_channel = out_channels
            final_kernel = kernel_size
            final_stride = stride
            final_padding = padding

        # TODO: FIX THE CALCULATION OF THE IMAGE SIZE FROM 224x224 to THE LAST OUTPUT FROM THE CONV LAYER - 224->28 
        
        # FC input size - Last i 
        input_size = ((28 - final_kernel + 2 * final_padding) // final_stride) + 1
        fc_input_size = final_channel * (input_size ** 2)
        
        self.fc = nn.Linear(fc_input_size, n_classes)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        print(f'Shape of x {x.shape}')
        x = x.view(x.size(0), -1) # Flatten
        print(f'Size of x before fc {x.shape}')
        x = self.fc(x)
        return x 

criterion = nn.BCEWithLogitsLoss() 

hyperparams_test = {
    'learning_rate': 0.1,
    'batch_size': 64,
    'num_epochs': 30,
    'num_layers': 3,
    'out_channels': [32, 16, 12],
    'kernel_size': [3, 3, 2],
    'padding': [0, 1, 0],
    'stride': [3, 2, 1]
    # TODO: Add more! 
}

cnn_model = CNN(
    num_layers = hyperparams_test['num_layers'],
    hyperparams = hyperparams_test
)

optimizer = Adam(cnn_model.parameters(), lr = hyperparams_test['learning_rate'])

summary(
    cnn_model,
    input_size = (hyperparams_test['batch_size'], 3, 224, 224), 
    verbose = 2, 
    col_names = [
        "input_size", 
        "num_params", 
        "output_size", 
        "mult_adds", 
        "trainable"
    ],
    mode = 'train'
)

'''
Note: A kernel with size 3x3 and a stride of 1 preserves the spatial dimension of the image.
'''

Shape of x torch.Size([64, 12, 27, 27])
Size of x before fc torch.Size([64, 8748])
Layer (type:depth-idx)                   Input Shape               Param #                   Output Shape              Mult-Adds                 Trainable
CNN                                      [64, 3, 224, 224]         --                        [64, 1]                   --                        True
├─ModuleList: 1-1                        --                        --                        --                        --                        True
│    └─0.weight                                                    ├─864
│    └─0.bias                                                      ├─32
│    └─3.weight                                                    ├─4,608
│    └─3.bias                                                      ├─16
│    └─6.weight                                                    ├─768
│    └─6.bias                                                      └─12
│    └─Conv2d: 2-1       

'\nNote: A kernel with size 3x3 and a stride of 1 preserves the spatial dimension of the image.\n'

In [None]:
# Training, validation and testing loop 

# TODO !!! 

In [22]:
# Random Search for hyperparameter search
hyperparams = {
    'learning_rate': [0.001, 0.01, 0.1],
    'batch_size': [16, 32, 64],
    'num_epochs': [30, 90, 120],
    'num_layers': [2, 3, 4, 5],
    'layer_out': [32, 64, 128],
    'kernel_size': [2, 3, 4, 5],
    'padding': [0, 1, 2]
    # TODO: Add more! 
}

trails = 10 

best_accuracy = 0
best_hyperparams = None

# TODO: Finish random search function