In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# %cd /content/drive/Shareddrives/
%cd /content/drive/'MyDrive'/'Colab Notebooks'/CNN

/content/drive/MyDrive/Colab Notebooks/CNN


In [3]:
from dataset.translate import *

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import seaborn as sns
from sklearn.metrics import classification_report, accuracy_score, recall_score, precision_score, f1_score
import wandb

In [4]:
def translate_label(dirname: str, mapping: dict[str, str]) -> str:
    translated = mapping.get(dirname, dirname)
    return translated

In [5]:
df = pd.DataFrame()
data = []

for (dirpath, dirnames, filenames) in os.walk("./dataset/raw-img"):
    if filenames:
        label = os.path.basename(dirpath)
        translated_label = translate_label(label, translate_it_to_en)
        for filename in filenames:
            filepath = os.path.join(dirpath, filename)
            data.append({'label': translated_label, 'filepath': filepath})

df = pd.DataFrame(data)

In [6]:
labelEncoding = {label: str(idx) for idx, label in enumerate(df['label'].unique())}
df['label_encoded'] = df['label'].map(labelEncoding)
df.groupby('label_encoded').first().head(df['label'].nunique())

Unnamed: 0_level_0,label,filepath
label_encoded,Unnamed: 1_level_1,Unnamed: 2_level_1
0,dog,./dataset/raw-img/cane/OIP-ktq--iOSYwDnhvh3Qpg...
1,chicken,./dataset/raw-img/gallina/OIP-dj1Z2GKfp-KaEq1n...
2,cat,./dataset/raw-img/gatto/781.jpeg
3,butterfly,./dataset/raw-img/farfalla/OIP-ygBcWExGUYB08aj...
4,elephant,./dataset/raw-img/elefante/OIP-WvKqBqznWavK7uo...
5,horse,./dataset/raw-img/cavallo/OIP-RSi2mAr5BzIm3vYs...
6,cow,./dataset/raw-img/mucca/OIP-BoPr3fx0wl_d_PcKSv...
7,squirrel,./dataset/raw-img/scoiattolo/OIP-h8d-0mPCtOnF_...
8,spider,./dataset/raw-img/ragno/OIP-wuk6UjoTI_SVXP-2BT...
9,sheep,./dataset/raw-img/pecora/OIP-Vup7sNO42kb5BftOc...


In [7]:
def split_data(df: pd.DataFrame, test_size: float, val_size: float, random_state: int):
    train_val_df, test_df = train_test_split(
        df,
        test_size=test_size,
        stratify=df['label_encoded'],
        random_state=random_state
    )

    val_relative_size = val_size / (1 - test_size)

    train_df, val_df = train_test_split(
        train_val_df,
        test_size=val_relative_size,
        stratify=train_val_df['label_encoded'],
        random_state=random_state
    )

    train_x = train_df['filepath']
    train_y = train_df['label_encoded']

    val_x = val_df['filepath']
    val_y = val_df['label_encoded']

    test_x = test_df['filepath']
    test_y = test_df['label_encoded']

    return train_x, train_y, val_x, val_y, test_x, test_y

In [8]:
train_x, train_y, val_x, val_y, test_x, test_y = split_data(df, test_size=0.2, val_size=0.1, random_state=24)

In [9]:
from image_dataset import ImageDataset
tr = transforms.Compose([
    transforms.Resize(size=(128, 128)),
    transforms.ToTensor(),
])

train_dataset = ImageDataset(val_x, val_y, transform=tr)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)

test_dataset = ImageDataset(test_x, test_y, transform=tr)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

In [10]:
device = torch.device(device="cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [11]:
class baseCNN(nn.Module):
    def __init__(self, num_classes, conv_dropouts=None, linear_dropout=0.2, filters=None):
        super().__init__()

        # Default architecture
        if filters is None:
            filters = [32, 128, 256, 512]
        if conv_dropouts is None:
            conv_dropouts = [0.2, 0.2, 0.2, 0.25]

        self.model = nn.Sequential(
            self.conv_block(3, filters[0], conv_dropouts[0]),
            self.conv_block(filters[0], filters[1], conv_dropouts[1]),
            self.conv_block(filters[1], filters[2], conv_dropouts[2]),
            self.conv_block(filters[2], filters[3], conv_dropouts[3]),

            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(filters[3], 128),
            nn.ReLU(),
            nn.Dropout(linear_dropout),
            nn.Linear(128, num_classes)
        )

    def conv_block(self, in_chanel, out_chanel, drop):
        return nn.Sequential(
            nn.Conv2d(in_chanel, out_chanel, 3, padding=1),
            nn.BatchNorm2d(out_chanel),
            nn.ReLU(),

            nn.Conv2d(out_chanel, out_chanel, 3, padding=1),
            nn.BatchNorm2d(out_chanel),
            nn.ReLU(),

            nn.MaxPool2d(3),  # Note: using 3 instead of 2
            nn.Dropout(drop)
        )

    def forward(self, x):
        return self.model(x)

In [12]:
def train(model: nn.Module, optimizer: torch.optim.Optimizer, criterion: nn.Module, train_loader: DataLoader, num_epochs=8):
    loss_vals=  []

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        epoch_loss= []
        for _, (data, targets) in enumerate(train_loader):
            data = data.to(device=device)
            targets = targets.to(device=device)

            scores = model(data)
            loss = criterion(scores, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = scores.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100. * correct / total
        loss_vals.append(epoch_loss)

        print(epoch, "Current Loss:", loss , "Acc:" , epoch_acc )
    return loss_vals

def evaluate(loader, model):
    """
        @returns: (all_preds, all_targets)
    """
    all_preds = []
    all_targets = []
    running_loss = 0.0
    correct = 0
    total = 0

    model.eval()
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            loss = criterion(scores, y)
            _, pred = scores.max(1)

            all_preds.extend(pred.cpu().numpy())
            all_targets.extend(y.cpu().numpy())

            running_loss += loss.item()
            total += y.size(0)
            correct += pred.eq(y).sum().item()

    accuracy = 100. * correct / total
    avg_loss = running_loss / len(loader)

    return all_preds, all_targets, accuracy, avg_loss

In [13]:
def train_with_config(config, train_loader, val_loader, num_classes=10):

    model = baseCNN(
        num_classes=num_classes,
        conv_dropouts=config.get('conv_dropouts', [0.2, 0.2, 0.2, 0.25]),
        linear_dropout=config.get('linear_dropout', 0.2)
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
    criterion = nn.CrossEntropyLoss()

    train_losses = train(model, optimizer, criterion, train_loader,
                         num_epochs=config.get('epochs', 5))

    _, _, val_accuracy, _ = evaluate(val_loader, model)

    return val_accuracy

In [14]:
configs = [
    # {'lr': 0.001, 'conv_dropouts': [0.2, 0.2, 0.2, 0.25], 'linear_dropout': 0.2},
    {'lr': 0.0005, 'conv_dropouts': [0.2, 0.2, 0.2, 0.25], 'linear_dropout': 0.2},

    # {'lr': 0.001, 'conv_dropouts': [0.1, 0.1, 0.15, 0.2], 'linear_dropout': 0.1},
    {'lr': 0.001, 'conv_dropouts': [0.3, 0.3, 0.35, 0.4], 'linear_dropout': 0.3},

    {'lr': 0.001, 'filters': [32, 64, 128, 256]},
]

In [15]:
results = {}
best_acc = 0
best_config = None

for i, config in enumerate(configs):
    print("-----------------------------------------------------")
    print(f"Testing config {i}: {config}")

    val_accuracy = train_with_config(config, train_loader, test_loader)
    results[str(config)] = val_accuracy

    if val_accuracy > best_acc:
        best_acc = val_accuracy
        best_config = config

    print(f"Val Accuracy: {val_accuracy:.4f}")
    print("-----------------------------------------------------")

print(f"Best config: {best_config}")
print(f"Best accuracy: {best_acc:.4f}")

-----------------------------------------------------
Testing config 0: {'lr': 0.0005, 'conv_dropouts': [0.2, 0.2, 0.2, 0.25], 'linear_dropout': 0.2}
0 Current Loss: tensor(1.9908, device='cuda:0', grad_fn=<NllLossBackward0>) Acc: 24.63712757830405
1 Current Loss: tensor(1.7748, device='cuda:0', grad_fn=<NllLossBackward0>) Acc: 35.82887700534759
2 Current Loss: tensor(1.4678, device='cuda:0', grad_fn=<NllLossBackward0>) Acc: 42.32238349885409
3 Current Loss: tensor(1.4690, device='cuda:0', grad_fn=<NllLossBackward0>) Acc: 47.36440030557678
4 Current Loss: tensor(1.3574, device='cuda:0', grad_fn=<NllLossBackward0>) Acc: 54.08708938120703
Val Accuracy: 45.7219
-----------------------------------------------------
-----------------------------------------------------
Testing config 1: {'lr': 0.001, 'conv_dropouts': [0.3, 0.3, 0.35, 0.4], 'linear_dropout': 0.3}
0 Current Loss: tensor(2.2039, device='cuda:0', grad_fn=<NllLossBackward0>) Acc: 21.73414820473644
1 Current Loss: tensor(2.0185, 