# 1. Imports & Environment Setup

This section imports all required libraries and defines the computation device (CPU / GPU).

In [None]:
import os
import copy

import torch
import torch.nn as nn      # (Linear, ReLU, Conv etc.)
import torch.optim as optim  # optimizer (Adam, SGD etc.)

from torch.utils.data import DataLoader, random_split  # Dividing the dataset into train/val/test and batching them.
from torchvision import transforms                     
from torchvision.datasets import ImageFolder           # For the folder where I combined the datasets I pulled from Kaggle
from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights  

from torchvision.datasets.folder import default_loader # ImageFolder's default loader

import numpy as np             
import matplotlib.pyplot as plt 
import seaborn as sns        
from sklearn.metrics import confusion_matrix, classification_report  




device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
print("Device used:", device)

# 2. Safe Dataset Loader

A custom ImageFolder class is implemented to safely skip corrupted or incomplete image files.

In [None]:
class SafeImageFolder(ImageFolder):
    
    def __getitem__(self, index):
        while True:
            path, target = self.samples[index]   # (file path, class id)
            try:
                sample = self.loader(path)       # upload image
            except (FileNotFoundError, OSError):
                print(f"[WARNING] Corrupted or incomplete files were skipped: {path}")
                index = (index + 1) % len(self.samples)
                continue

            if self.transform is not None:
                sample = self.transform(sample)
            if self.target_transform is not None:
                target = self.target_transform(target)

            return sample, target

# 3. Dataset Paths and Hyperparameters

## Offline Data Augmentation

Offline augmentation was performed separately to increase the dataset size
from ~18k to ~75k images.  
This notebook focuses on training and evaluation only.

In [None]:

data_dir = "data/architectural-styles-dataset"  
batch_size    = 16     
num_epochs    = 12    
learning_rate = 1e-4
val_ratio     = 0.15    # 15% of the data is for validation.
test_ratio    = 0.15    # 15% of the data is for testing, the rest is for training.

# 4. Image Transformations

ImageNet normalization and resizing are applied for EfficientNet-B3.

In [None]:
#ImageNet Normalization
imagenet_mean = [0.485, 0.456, 0.406]  
imagenet_std  = [0.229, 0.224, 0.225]

# resize and normalize for training
train_transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(),  
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
])


val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
])

# 5. Dataset Splitting and DataLoaders

The dataset is split into training, validation, and test sets.

In [None]:

#  DATASET and DATALOADER (Train/test/validation)
full_dataset = SafeImageFolder(root=data_dir, transform=train_transform)

class_names = full_dataset.classes
num_classes = len(class_names)

print("Class number:", num_classes)
print("Classes:", class_names)

# Total example number
n_total = len(full_dataset)


n_val   = int(val_ratio * n_total)
n_test  = int(test_ratio * n_total)
n_train = n_total - n_val - n_test

print(f"Total: {n_total}, Train: {n_train}, Val: {n_val}, Test: {n_test}")

# To reproduce the same compartment, use a fixed seed
generator = torch.Generator().manual_seed(42)

# Split full_dataset into 3 parts
train_dataset, val_dataset, test_dataset = random_split(
    full_dataset,
    [n_train, n_val, n_test],
    generator=generator
)


val_dataset.dataset.transform  = val_test_transform
test_dataset.dataset.transform = val_test_transform


train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True, num_workers=0)   # shuffle=True : mix in training
val_loader   = DataLoader(val_dataset, batch_size=batch_size,
                          shuffle=False, num_workers=0)
test_loader  = DataLoader(test_dataset, batch_size=batch_size,
                          shuffle=False, num_workers=0)

print("DataLoaders are ready")

# 6. Model Definition â€“ EfficientNet-B3 (Transfer Learning)

In [None]:
# Creating the EFFICIENTNET-B3 model (transfer learning)
def create_efficientnet_b3(num_classes: int) -> nn.Module:

    weights = EfficientNet_B3_Weights.DEFAULT
    model = efficientnet_b3(weights=weights)

    in_features = model.classifier[1].in_features 
    model.classifier[1] = nn.Linear(in_features, num_classes)

    return model


model = create_efficientnet_b3(num_classes).to(device)
print("The EfficientNet-B3 model was created and deployed to the device.")

# 7. Loss Function and Optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 8. Training and Evaluation Functions

In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    
    model.train() 

    running_loss = 0.0
    running_correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() 
        outputs = model(images) 
        loss = criterion(outputs, labels) 

        loss.backward() 
        optimizer.step() 

        running_loss += loss.item() * images.size(0)  
        _, preds = torch.max(outputs, dim=1) 
        running_correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = running_correct / total
    return epoch_loss, epoch_acc


def eval_one_epoch(model, loader, criterion, device):
    
    model.eval()

    running_loss = 0.0
    running_correct = 0
    total = 0

    with torch.no_grad():   
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)  # forward
            loss = criterion(outputs, labels)  # loss

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, dim=1)
            running_correct += (preds == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = running_correct / total
    return epoch_loss, epoch_acc

# 9. Model Training Loop

In [None]:
best_val_loss = float("inf") 
best_model_wts = copy.deepcopy(model.state_dict())  


train_losses = []
val_losses   = []
train_accs   = []
val_accs     = []

for epoch in range(1, num_epochs + 1):
    print(f"\nEpoch {epoch}/{num_epochs} ")

    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc     = eval_one_epoch(model, val_loader, criterion, device)

    print(f"Train - Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print(f"Val   - Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")


    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        print(">> The best EfficientNet-B3 model has been updated")


model.load_state_dict(best_model_wts)


os.makedirs("models", exist_ok=True)

model_path = os.path.join("models", "efficientnet_b3_arch_styles_best.pth")
torch.save(model.state_dict(), model_path)
print(f"\nThe best EfficientNet-B3 model was recorded: {model_path}")

# 10. Test Performance Evaluation

In [None]:
test_loss, test_acc = eval_one_epoch(model, test_loader, criterion, device)
print("\nTEST RESULT (EfficientNet-B3) ")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}  (approximately %{test_acc*100:.2f})")

# 11. Training Curves (Loss & Accuracy)

In [None]:
os.makedirs("plots", exist_ok=True) 

epochs_range = range(1, num_epochs + 1)

plt.figure(figsize=(12, 4))


plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, marker='o', label='Train Loss')
plt.plot(epochs_range, val_losses, marker='o', label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Train / Val Loss')
plt.legend()
plt.grid(True)


plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accs, marker='o', label='Train Acc')
plt.plot(epochs_range, val_accs, marker='o', label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train / Val Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig(os.path.join("plots", "loss_accuracy_curves.png"), dpi=300)
plt.show()

# 12. Confusion Matrix, Classification Report and Error Analysis

In [None]:
model.eval()
all_labels = []
all_preds  = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, preds = torch.max(outputs, dim=1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

all_labels = np.array(all_labels)
all_preds  = np.array(all_preds)

cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(12, 10))
sns.heatmap(cm,
            annot=False,
            cmap='Blues',
            xticklabels=class_names,
            yticklabels=class_names)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - EfficientNet-B3")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig(os.path.join("plots", "confusion_matrix.png"), dpi=300)  
plt.show()

# Classification report (precision / recall / F1)
report_str = classification_report(all_labels, all_preds, target_names=class_names)
print("\nCLASSIFICATION REPORT")
print(report_str)

# To find the most commonly confused classes (error analysis)
cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
mis_list = []
for i in range(num_classes):
    for j in range(num_classes):
        if i != j and cm[i, j] > 0: 
            mis_list.append((cm_norm[i, j], cm[i, j], i, j))

mis_list.sort(reverse=True)  

print("\nTop 10 Most Confused Classes")
error_lines = []
for frac, count, i, j in mis_list[:10]:
    line = (f"Actual: {class_names[i]:25s}  -> Predicted: {class_names[j]:25s} "
            f"Piece: {count:4d}  ratio: {frac*100:5.2f}%")
    print(line)
    error_lines.append(line)

# txt 
analysis_path = os.path.join("plots", "error_analysis.txt")
with open(analysis_path, "w", encoding="utf-8") as f:
    f.write("CLASSIFICATION REPORT \n")
    f.write(report_str)
    f.write("\n\nTop 10 Most Confused Classes\n")
    for line in error_lines:
        f.write(line + "\n")

print(f"\nError analysis recorded: {analysis_path}")

# 13. Grad-CAM Visualization

Grad-CAM is used to visualize the regions that the model focuses on during prediction.

In [None]:
target_layer = model.features[-1]


gradcam_activations = None
gradcam_gradients   = None

def forward_hook(module, input, output):
    global gradcam_activations
    gradcam_activations = output.detach()

def backward_hook(module, grad_input, grad_output):
    
    global gradcam_gradients
    gradcam_gradients = grad_output[0].detach()



forward_handle  = target_layer.register_forward_hook(forward_hook)
backward_handle = target_layer.register_full_backward_hook(backward_hook)

def denormalize(img_tensor):
    
    mean = torch.tensor(imagenet_mean).view(3, 1, 1).to(img_tensor.device)
    std  = torch.tensor(imagenet_std).view(3, 1, 1).to(img_tensor.device)
    img = img_tensor * std + mean
    img = img.clamp(0, 1)
    img = img.permute(1, 2, 0).cpu().numpy()
    return img

def generate_gradcam(model, image_tensor, class_idx=None):
    
    model.eval()
    image_tensor = image_tensor.unsqueeze(0).to(device)

    output = model(image_tensor) 

    if class_idx is None:
        class_idx = output.argmax(dim=1).item()

    score = output[0, class_idx]
    model.zero_grad() 
    score.backward(retain_graph=True) 

    global gradcam_activations, gradcam_gradients
    activations = gradcam_activations
    gradients   = gradcam_gradients 

    weights = gradients.mean(dim=(2, 3), keepdim=True)   
    cam = (weights * activations).sum(dim=1, keepdim=True)  
    cam = torch.relu(cam)

    cam = cam.squeeze(0).squeeze(0) 
    cam -= cam.min()
    if cam.max() > 0:
        cam /= cam.max()
    cam = cam.cpu().numpy()
    return cam, class_idx

def show_gradcam_on_image(img_tensor, cam, true_label=None, pred_label=None, save_path=None):

    

    img = denormalize(img_tensor)  

    plt.figure(figsize=(5, 5))
    plt.imshow(img)
    plt.imshow(cam, cmap='jet', alpha=0.4)
    title = "Grad-CAM"
    if true_label is not None:
        title += f"\nActual: {true_label}"
    if pred_label is not None:
        title += f"\nPredicted: {pred_label}"
    plt.title(title)
    plt.axis('off')
    if save_path is not None:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.show()


#Grad-CAM for some examples from the test set
sample_batch = next(iter(test_loader)) 
sample_images, sample_labels = sample_batch

num_show = min(4, sample_images.size(0))

for i in range(num_show):
    img_tensor = sample_images[i]    
    true_idx   = sample_labels[i].item()

    model.eval()
    with torch.no_grad():
        out = model(img_tensor.unsqueeze(0).to(device))
        pred_idx = out.argmax(dim=1).item()  

    cam, _ = generate_gradcam(model, img_tensor, class_idx=pred_idx)

    gradcam_path = os.path.join("plots", f"gradcam_example_{i}.png")
    show_gradcam_on_image(
        img_tensor,
        cam,
        true_label=class_names[true_idx],
        pred_label=class_names[pred_idx],
        save_path=gradcam_path
    )

print(f"\nGrad-CAM images were saved to the 'plots' folder.")


forward_handle.remove()
backward_handle.remove()