### **Importing Libraries**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, ConcatDataset, TensorDataset
from torch.utils.data import Dataset
import torchvision.models as models
from torchvision import transforms
import torchvision
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torchvision.transforms as T
from PIL import ImageFilter, ImageEnhance
from PIL import Image
import math
import random
from timm.data.mixup import Mixup
import torchvision.transforms.functional as TF


import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


### **Dataset**

#### **Helper Functions**

In [2]:
transform_base = T.Compose([
    T.Resize((256, 256)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(degrees=15),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ,
    T.RandomErasing(p=0.25, scale=(0.02, 0.1), ratio=(0.3, 3.3)),
])

transform_color = T.Compose([
    T.Resize((256, 256)),
    T.ColorJitter(brightness=0.4, contrast=0.3, saturation=0.3, hue=0.1),
    T.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    T.RandomErasing(p=0.25, scale=(0.02, 0.1), ratio=(0.3, 3.3)),
])

transform_affine = T.Compose([
    T.Resize((288, 288)),
    T.RandomResizedCrop(256, scale=(0.8, 1.0), ratio=(0.9, 1.1)),
    T.RandomAffine(degrees=0, translate=(0.2, 0.2), scale=(0.85, 1.15), shear=10),
    T.RandomPerspective(distortion_scale=0.2, p=0.5),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    T.RandomErasing(p=0.25, scale=(0.02, 0.1), ratio=(0.3, 3.3)),
])

transform_val = T.Compose([
    T.Resize((256, 256)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

#### **Dataset Class**

In [3]:
class RegionDataset(Dataset):
    def __init__(self, image_dir, labels_df, transform=None):
        self.image_dir = image_dir
        self.labels_df = labels_df
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        row = self.labels_df.iloc[idx]
        img_path = os.path.join(self.image_dir, row['filename'])
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)
            region = int(row['Region_ID'])
            region = region - 1

        # Convert to tensor with proper dtype (long) for classification tasks
        region_tensor = torch.tensor(region, dtype=torch.long)
        
        return image, region_tensor

In [4]:
def create_extended_dataset(image_dir, labels_df):
    # Original dataset
    original_dataset = RegionDataset(
        image_dir=image_dir,
        labels_df=labels_df,
        transform=transform_base
    )
    
    # Color jitter augmented dataset
    color_dataset = RegionDataset(
        image_dir=image_dir,
        labels_df=labels_df,
        transform=transform_color
    )
    
    # # Affine transform augmented dataset
    affine_dataset = RegionDataset(
        image_dir=image_dir,
        labels_df=labels_df,
        transform=transform_affine
    )
    
    extended_dataset = ConcatDataset([original_dataset, color_dataset, affine_dataset])
    
    return extended_dataset

#### **Training**

In [5]:
image_dir_train = "Dataset/Train/images_train"
labels_path_train = "Dataset/Train/labels_train.csv"

labels_df = pd.read_csv(labels_path_train)

train_dataset = create_extended_dataset(image_dir_train, labels_df)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

#### **Validation**

In [6]:
images_dir_val = "Dataset/Val/images_val"
labels_path_val = "Dataset/Val/labels_val.csv"
labels_df_val = pd.read_csv(labels_path_val)

val_dataset = RegionDataset(images_dir_val, labels_df_val, transform_val)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

### **Model**

#### **Model Implementation**

In [7]:
class ResNetRegionClassifier(nn.Module):
    def __init__(self, num_regions=8, pretrained=True, dropout_rate=0.3):
        super().__init__()
        base_model = models.resnet50(weights='DEFAULT' if pretrained else None)
        num_features = base_model.fc.in_features

        self.features = nn.Sequential(*list(base_model.children())[:-1])
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(num_features, 1024),
            nn.BatchNorm1d(1024), 
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            nn.Linear(512, num_regions)
        )

    def forward(self, x):
        x = self.features(x)
        logits = self.classifier(x)
        return logits

### **Training**

In [8]:
mixup_fn = Mixup(
    mixup_alpha=0.8, cutmix_alpha=1.0, cutmix_minmax=None,
    prob=0.8, switch_prob=0.3, mode='batch',
    label_smoothing=0.1, num_classes=15
)

def train_classification_model(model, train_loader, val_loader, optimizer, num_epochs, device):
    model.to(device)
    best_val_loss = float('inf')

    loss_fn = nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):

        model.train()
        train_loss = 0.0
        correct_train = 0
        total_train = 0
        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")

        for images, targets in pbar:
            images, targets = mixup_fn(images, targets)
            images = images.to(device)
            targets = targets.to(device) 

            logits = model(images)

            loss = loss_fn(logits, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if len(targets.shape) == 1:
                _, preds = torch.max(logits, dim=1)
                correct_train = (preds == targets).sum().item()
                total_train = targets.size(0)
            else:
                _, preds = torch.max(logits, dim=1)
                _, target_labels = torch.max(targets, dim=1) 
                correct_train = (preds == target_labels).sum().item()
                total_train = targets.size(0)
            
            train_loss += loss.item() * images.size(0)
            pbar.set_postfix(loss=loss.item(), acc=f"{100*correct_train/total_train:.2f}%")

        avg_train_loss = train_loss / len(train_loader.dataset)
        train_accuracy = 100 * correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        
        with torch.no_grad():
            pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]")
            for images, targets in pbar:
                images = images.to(device)
                targets = targets.to(device)

                logits = model(images)
                loss = loss_fn(logits, targets)

                _, predicted = torch.max(logits.data, 1)
                total_val += targets.size(0)
                correct_val += (predicted == targets).sum().item()
                
                val_loss += loss.item() * images.size(0)
                pbar.set_postfix(loss=loss.item(), acc=f"{100*correct_val/total_val:.2f}%")

        avg_val_loss = val_loss / len(val_loader.dataset)
        val_accuracy = 100 * correct_val / total_val
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_model.pth')

        print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Train Acc = {train_accuracy:.2f}%, \
                Val Loss = {avg_val_loss:.4f}, Val Acc = {val_accuracy:.2f}%")

In [9]:
def evaluate_classification_model(model, test_loader, device, num_regions=15):
    model.eval()
    all_predictions = []
    all_ground_truths = []
    correct = 0
    total = 0
    
    confusion_matrix = torch.zeros(num_regions, num_regions)
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_predictions.extend(predicted.cpu().numpy())
            all_ground_truths.extend(labels.cpu().numpy())
            
            for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1
    
    accuracy = 100 * correct / total
    
    per_class_accuracy = confusion_matrix.diag() / confusion_matrix.sum(1)
    per_class_accuracy = per_class_accuracy.cpu().numpy()
    
    adjacent_correct = 0
    for i in range(len(all_predictions)):
        pred = all_predictions[i]
        true = all_ground_truths[i]

        if pred == true or (pred == (true + 1) % num_regions) or (pred == (true - 1) % num_regions):
            adjacent_correct += 1
    
    adjacent_accuracy = 100 * adjacent_correct / total
    
    results = {
        'accuracy': accuracy,
        'adjacent_accuracy': adjacent_accuracy,
        'per_class_accuracy': per_class_accuracy,
        'predictions': all_predictions,
        'ground_truths': all_ground_truths
    }
    
    return results

In [10]:
model = ResNetRegionClassifier(num_regions=15, pretrained=True, dropout_rate=0.1)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
num_epochs = 10
train_classification_model(model, train_loader, val_loader, optimizer, num_epochs, device)


Epoch 1/10 [Train]: 100%|██████████| 1227/1227 [06:42<00:00,  3.05it/s, acc=60.00%, loss=2.34]  
Epoch 1/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.07it/s, acc=84.82%, loss=0.475]


Epoch 1: Train Loss = 1.9096, Train Acc = 60.00%,                 Val Loss = 0.6378, Val Acc = 84.82%


Epoch 2/10 [Train]: 100%|██████████| 1227/1227 [06:49<00:00,  3.00it/s, acc=90.00%, loss=1.04]  
Epoch 2/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.09it/s, acc=89.70%, loss=0.559]


Epoch 2: Train Loss = 1.5354, Train Acc = 90.00%,                 Val Loss = 0.4895, Val Acc = 89.70%


Epoch 3/10 [Train]: 100%|██████████| 1227/1227 [06:54<00:00,  2.96it/s, acc=90.00%, loss=0.755] 
Epoch 3/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.00it/s, acc=91.33%, loss=0.491]


Epoch 3: Train Loss = 1.3959, Train Acc = 90.00%,                 Val Loss = 0.4805, Val Acc = 91.33%


Epoch 4/10 [Train]: 100%|██████████| 1227/1227 [06:49<00:00,  3.00it/s, acc=100.00%, loss=0.63] 
Epoch 4/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.07it/s, acc=92.41%, loss=0.292]


Epoch 4: Train Loss = 1.3617, Train Acc = 100.00%,                 Val Loss = 0.4213, Val Acc = 92.41%


Epoch 5/10 [Train]: 100%|██████████| 1227/1227 [06:48<00:00,  3.00it/s, acc=90.00%, loss=1.74]  
Epoch 5/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.08it/s, acc=92.95%, loss=0.259]


Epoch 5: Train Loss = 1.2938, Train Acc = 90.00%,                 Val Loss = 0.4149, Val Acc = 92.95%


Epoch 6/10 [Train]: 100%|██████████| 1227/1227 [06:48<00:00,  3.00it/s, acc=100.00%, loss=0.615]
Epoch 6/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.10it/s, acc=93.50%, loss=0.183]


Epoch 6: Train Loss = 1.2722, Train Acc = 100.00%,                 Val Loss = 0.3939, Val Acc = 93.50%


Epoch 7/10 [Train]: 100%|██████████| 1227/1227 [06:48<00:00,  3.00it/s, acc=40.00%, loss=2.18]  
Epoch 7/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.13it/s, acc=93.50%, loss=0.124]


Epoch 7: Train Loss = 1.2595, Train Acc = 40.00%,                 Val Loss = 0.3880, Val Acc = 93.50%


Epoch 8/10 [Train]: 100%|██████████| 1227/1227 [06:44<00:00,  3.03it/s, acc=80.00%, loss=1.89]  
Epoch 8/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.16it/s, acc=94.85%, loss=0.178]


Epoch 8: Train Loss = 1.2452, Train Acc = 80.00%,                 Val Loss = 0.3863, Val Acc = 94.85%


Epoch 9/10 [Train]: 100%|██████████| 1227/1227 [06:44<00:00,  3.03it/s, acc=100.00%, loss=0.778]
Epoch 9/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.13it/s, acc=93.22%, loss=0.135]


Epoch 9: Train Loss = 1.2426, Train Acc = 100.00%,                 Val Loss = 0.4160, Val Acc = 93.22%


Epoch 10/10 [Train]: 100%|██████████| 1227/1227 [06:49<00:00,  2.99it/s, acc=90.00%, loss=1.59]  
Epoch 10/10 [Val]: 100%|██████████| 24/24 [00:02<00:00,  8.05it/s, acc=94.58%, loss=0.276]

Epoch 10: Train Loss = 1.2214, Train Acc = 90.00%,                 Val Loss = 0.4010, Val Acc = 94.58%





In [11]:
# Evaluate the model on the validation set
results = evaluate_classification_model(model, val_loader, device)
print(f"Validation Accuracy: {results['accuracy']:.2f}%")
print(f"Adjacent Accuracy: {results['adjacent_accuracy']:.2f}%")
print(f"Per-Class Accuracy: {results['per_class_accuracy']}")


Validation Accuracy: 94.58%
Adjacent Accuracy: 98.64%
Per-Class Accuracy: [1.         0.85714287 0.962963   1.         1.         0.9259259
 0.9583333  1.         0.6666667  0.969697   0.9583333  0.8888889
 0.9583333  1.         0.9444444 ]


In [12]:
# Load the best model for evaluation

model_ev = ResNetRegionClassifier(num_regions=15, pretrained=True, dropout_rate=0.1)
model_ev.to(device)
model_ev.load_state_dict(torch.load('best_model.pth'))

# Evaluate the model on the validation set
results = evaluate_classification_model(model_ev, val_loader, device)
print(f"Validation Accuracy: {results['accuracy']:.2f}%")
print(f"Adjacent Accuracy: {results['adjacent_accuracy']:.2f}%")
print(f"Per-Class Accuracy: {results['per_class_accuracy']}")

Validation Accuracy: 94.85%
Adjacent Accuracy: 98.10%
Per-Class Accuracy: [0.95238096 0.95238096 0.962963   0.962963   1.         0.962963
 0.9166667  0.962963   0.73333335 0.93939394 0.9583333  0.9259259
 0.9583333  1.         0.9722222 ]
