# Initial Part

## Mount google-drive & Load dataset

In [15]:
from google.colab import drive
drive.mount('/content/drive')

#shared path to drive
SHARED_PATH="/content/drive/MyDrive/Classroom/ChandernagoreCollegeSemVI-Addon 2025 Sem VI Computer Science Honours/ChandernagoreCollegeSemVI-Addon 2025 Sem VI Computer Science Honours"

#direct path to drive
DIRECT_PATH="/content/drive/MyDrive/Classroom/ChandernagoreCollegeSemVI-Addon 2025 Sem VI Computer Science Honours"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
import os

# Determine the data path based on the existence of shared or direct paths
root_path = SHARED_PATH if os.path.exists(SHARED_PATH) else DIRECT_PATH if os.path.exists(DIRECT_PATH) else None

DATA_PATH = ''
# Print the chosen path or an error message
if root_path:
    print(f"Using path: {root_path}")
    DATA_PATH = root_path + "/Proj2_Heritage_places_in_CGR_classify"
    for filename in os.listdir(DATA_PATH):
        print(filename)
else:
    print("Neither shared nor direct path exists. Please check the paths.")


Using path: /content/drive/MyDrive/Classroom/ChandernagoreCollegeSemVI-Addon 2025 Sem VI Computer Science Honours/ChandernagoreCollegeSemVI-Addon 2025 Sem VI Computer Science Honours
Data
old_experiments
expr2_effnet.ipynb
expr1_resnet18.ipynb
training_images
validate_images
data.csv
maximize_dataset_make_csv_data.ipynb


##  Setup Device & Random State

In [17]:
import torch
import random
import numpy as np

# Define device (CPU or GPU)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

# define the random seed
SEED = 2025

torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(SEED)

cpu


## Define data transformations

In [18]:
import torchvision.transforms as transforms

mean=[0.485, 0.456, 0.406]
standev=[0.229, 0.224, 0.225]
px=256

train_transform = transforms.Compose([
    transforms.Resize((px, px)),
    transforms.RandomCrop((224, 224)),
    transforms.RandomRotation(15, fill=10), # Increased rotation degrees
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    # transforms.RandomVerticalFlip(), # Uncomment if vertical flips are appropriate
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(mean), std=(standev)),
])

test_transform =  transforms.Compose([
    transforms.Resize((px, px)),
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(mean), std=(standev))
])

print(train_transform)

Compose(
    Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=True)
    RandomCrop(size=(224, 224), padding=None)
    RandomRotation(degrees=[-15.0, 15.0], interpolation=nearest, expand=False, fill=10)
    RandomAffine(degrees=[0.0, 0.0], translate=(0.1, 0.1), scale=(0.9, 1.1))
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


## Define data path

In [19]:
train_path = DATA_PATH + '/training_images'
test_path = DATA_PATH + '/validate_images'

## Load dataset with Torch

In [20]:
from torchvision.datasets import ImageFolder

train_dataset = ImageFolder(root=DATA_PATH+'/training_images', transform=train_transform)
test_dataset = ImageFolder(root=DATA_PATH+'/validate_images', transform=test_transform)

CLASSES  = train_dataset.classes
print(CLASSES)

print(train_dataset)
print(test_dataset)

['charch', 'clock_tower', 'jora_ghat', 'mondir', 'musium', 'patalbari']
Dataset ImageFolder
    Number of datapoints: 309
    Root location: /content/drive/MyDrive/Classroom/ChandernagoreCollegeSemVI-Addon 2025 Sem VI Computer Science Honours/ChandernagoreCollegeSemVI-Addon 2025 Sem VI Computer Science Honours/Proj2_Heritage_places_in_CGR_classify/training_images
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=True)
               RandomCrop(size=(224, 224), padding=None)
               RandomRotation(degrees=[-15.0, 15.0], interpolation=nearest, expand=False, fill=10)
               RandomAffine(degrees=[0.0, 0.0], translate=(0.1, 0.1), scale=(0.9, 1.1))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )
Dataset ImageFolder
    Number of datapoints: 15
    Root location: /content/drive/MyDrive/Classroom/ChandernagoreCollegeSemVI-Addon 20

In [21]:
print(train_dataset.class_to_idx)
print(len(train_dataset.class_to_idx))

{'charch': 0, 'clock_tower': 1, 'jora_ghat': 2, 'mondir': 3, 'musium': 4, 'patalbari': 5}
6


## Class definition

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

# Define the BuildingClassifier class
class BuildingClassifier:
    """
    A class to classify buildings and their facades using a given model.
    """
    # Initialize the classifier with model, loss functions, optimizer, and data parameters
    def __init__(self, model, optimizer, batch_size, loss_fn=nn.CrossEntropyLoss(),
    train_dataset=train_dataset, test_dataset=test_dataset,
    categories=train_dataset.classes, device=DEVICE):
        """
        Initializes the BuildingClassifier.
        Args:
            model (torch.nn.Module): The neural network model to use for classification.
            building_loss_fn (torch.nn.Module): The loss function for building classification.
            facade_loss_fn (torch.nn.Module): The loss function for facade classification.
            optimizer (torch.optim.Optimizer): The optimizer for model training.
            learning_rate (float): The learning rate for the optimizer.
            batch_size (int): The batch size for data loaders.
            train_dataset (torch.utils.data.Dataset): The training dataset.
            test_dataset (torch.utils.data.Dataset): The testing dataset.
            categories (list): A list of building categories.
            device (torch.device): The device (CPU/GPU) to train the model on.
        """
        self.model = model.to(device)  # Move model to the specified device (CPU/GPU)
        self.loss_fn = loss_fn # Loss function for building classification
        self.optimizer = optimizer # Optimizer for model training
        self.batch_size = batch_size # Batch size for data loaders
        self.train_dataset = train_dataset # Training dataset
        self.test_dataset = test_dataset # Testing dataset
        self.categories = categories # List of building categories
        self.device = device # Device to train the model on
        self.learning_rate = self.optimizer.param_groups[0]['lr']# Learning rate for the optimizer

        # Create DataLoaders for training and testing
        self.train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=2)
        self.test_loader = DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=2)


    ############################################################################
    # Train the model for a specified number of epochs
    def train(self, n_epochs=10, output_logging=True):
        loss_graph_list, accy_list, valid_loss_list = [], [], [] # Lists to store training metrics
        print(f"Starting training with Learning Rate: {self.learning_rate}, Batch Size: {self.batch_size}, Epochs: {n_epochs}, Device: {self.device}")

        for epoch in tqdm(range(n_epochs)):
            self.model.train()  # Set the model to training mode
            training_loss = [] # List to store loss for the current epoch

            # Iterate over the training data
            for i_image, i_label in self.train_loader:
                i_image = i_image.to(self.device) # Move image to device
                i_label = i_label.to(self.device) # Move label to device

                self.optimizer.zero_grad() # Zero the gradients
                output = self.model(i_image) # Get model outputs for building and facade

                loss = self.loss_fn(output, i_label) # Calculate building loss
                loss.backward(retain_graph=True) # Backpropagate building loss, retain graph for facade loss

                training_loss.append(loss.item()) # Append loss to list

            if output_logging:
                print(f"Training loss at iteration--{epoch+1} is:- {np.mean(training_loss)}")
            loss_graph_list.append(np.mean(training_loss)) # Store average training loss

            accy, current_valid_loss = self.evaluate(output_logging=output_logging) # Eval on the valid set
            valid_loss_list.append(current_valid_loss) # Store validation loss
            accy_list.append(accy) # Store accuracy

        print("Training done.....................")
        return loss_graph_list, valid_loss_list, accy_list # Return training metrics


    ############################################################################
    # Evaluate the model on the test set
    def evaluate(self, output_logging=True, disp_confusion_matrix=False):
        self.model.eval() # Set the model to evaluation mode
        pred_correctly = 0 # Counter for correctly predicted samples
        total = 0 # Counter for total samples
        actual_labels = [] # List to store actual labels
        predicted_labels = [] # List to store predicted labels
        validation_loss = [] # List to store validation loss

        with torch.no_grad(): # Disable gradient calculation
            for i_image, i_label in self.test_loader:
                i_image = i_image.to(self.device) # Move image to device
                i_label = i_label.to(self.device) # Move label to device

                total += i_label.size(0) # Increment total samples
                output = self.model(i_image) # Get model outputs

                loss = self.loss_fn(output, i_label) # Calculate building loss

                _, predicted = torch.max(output.data, 1) # Get predicted class for building
                pred_correctly += (predicted == i_label).sum().item() # Count correct predictions

                actual_labels.extend(i_label.cpu().numpy()) # Store actual labels
                predicted_labels.extend(predicted.cpu().numpy()) # Store predicted labels

        accuracy = 100.0 * pred_correctly / total # Calculate accuracy
        average_validation_loss = np.mean(validation_loss) # Calculate average validation loss

        if output_logging:
            print(f' --Validation loss:- {average_validation_loss}', end='')
            print(f' --Testing....got-({pred_correctly}/{total})-correctly-->>accuracy=({accuracy:.2f}%)')

        # Display confusion matrix if requested
        if disp_confusion_matrix:
            display_labels_list = [str(label) for label in self.categories]
            ConfusionMatrixDisplay(
                confusion_matrix=confusion_matrix(actual_labels, predicted_labels),
                display_labels=display_labels_list
            ).plot()
            plt.xticks(rotation=90)
            plt.show()

        return accuracy, average_validation_loss # Return accuracy and validation loss


    ############################################################################
    # Classify a single image
    def classify_image(self, img_path, img_transform):
        """
        Classifies a single image.
        Args:
            img_path (str): The path to the image.
            img_transform (callable): The image transform to apply.
        """
        self.model.eval() # Set the model to evaluation mode
        input_img = Image.open(DATA_PATH + img_path) # Load the image

        plt.figure(figsize=(10, 10),dpi=30) # Create a figure for plotting
        plt.imshow(input_img) # Display the image
        plt.axis('off') # Turn off the axis
        plt.show() # Show the plot

        img_tensor = img_transform(input_img).unsqueeze(0).to(self.device) # Transform and prepare the image tensor
        with torch.no_grad(): # Disable gradient calculation
            output = self.model(img_tensor) # Get model output
            probabilities1 = torch.nn.functional.softmax(output[0], dim=1) # Calculate probabilities for building classes
            print(probabilities1)
            _, predicted_class_index = torch.max(probabilities1, 1) # Get predicted building class index
            predicted_class = self.categories[predicted_class_index.item()] # Get predicted building class name
            print(f"Predicted class: {predicted_class}") # Print predictions

## Plotting Training Results

In [23]:
import matplotlib.pyplot as plt

def ploting(ax, arr, color,lname, ylabel='loss', xlabel='epoch'):
    ax.plot(arr, color=color, label=lname)
    ax.scatter(range(len(arr)), arr, color=color)
    ax.set_ylabel(ylabel, color=color)
    ax.tick_params(axis='y', labelcolor=color)
    ax.set_xlabel(xlabel)


def plot_classifier_results(train_loss, valid_loss, accy, classifier_name):
    fig, ax1 = plt.subplots(figsize=(10, 4), dpi=90)
    ax2 = ax1.twinx()

    ploting(ax1, train_loss, color='red', lname=f'Training Loss ({classifier_name})')
    ploting(ax1, valid_loss, color='blue', lname=f'Validation Loss ({classifier_name})')
    ploting(ax2, accy, color='green', lname=f'Test Accuracy (%) ({classifier_name})', ylabel='Accuracy (%)')

    handles1, labels1 = ax1.get_legend_handles_labels()
    handles2, labels2 = ax2.get_legend_handles_labels()
    plt.legend(handles1 + handles2, labels1 + labels2, loc='lower left')

    plt.title(f'Training Loss, Validation Loss, and Test Accuracy per Epoch ({classifier_name})')
    plt.tight_layout()
    plt.show()

## Modify the last layer





In [24]:
def modify_last_layer(model, layer_type, pretrained, categories=train_dataset.classes):
    model._is_pretrained_backbone = pretrained
    # Assuming the last layer is named 'fc' as in many torchvision models
    last_layer_in = model.fc.in_features
    no_of_classes = len(categories)
    new_mid_layer_out = 256 # You can adjust this based on your needs

    if layer_type == 'linear':
        print(f'convert from {last_layer_in} to-> {no_of_classes} output')
        model.fc = nn.Linear(last_layer_in, no_of_classes)
    elif layer_type == 'sequential':
        print(f'convert from {last_layer_in} to-> {new_mid_layer_out} to-> {no_of_classes} output')
        model.fc = nn.Sequential(
        torch.nn.Linear(in_features=last_layer_in, out_features=new_mid_layer_out),
        torch.nn.ReLU(),
        torch.nn.Dropout(p=0.2),
        torch.nn.Linear(in_features=new_mid_layer_out,
                        out_features=no_of_classes,),
        # torch.nn.Softmax(dim=1), handels by crossentropyloss()
    )

    if pretrained:
        model = fc_weight_bias_init(model)
    else:
        model = all_weight_bias_init(model)
    return model

In [25]:
import torch.nn.init as init
def fc_weight_bias_init(model):
    for module in model.fc.modules():
        if isinstance(module, nn.Linear):
            init.kaiming_normal_(
                module.weight, a=0, mode='fan_out', nonlinearity='relu',
            )
            if module.bias is not None:
                fan_in, fan_out = init._calculate_fan_in_and_fan_out(module.weight)
                bound = 1 / (fan_out)**0.5
                init.normal_(module.bias, -bound, bound)
    return model

def all_weight_bias_init(model):
    for module in model.modules():
        if isinstance(module, nn.Linear):
            init.kaiming_normal_(
                module.weight, a=0, mode='fan_out', nonlinearity='relu',
            )
            if module.bias is not None:
                fan_in, fan_out = init._calculate_fan_in_and_fan_out(module.weight)
                bound = 1 / (fan_out)**0.5
                init.normal_(module.bias, -bound, bound)
    return model

In [26]:
# Resets the weights of the entire model instance using the custom init_weights function.
def reset_model(model):
    if model._is_pretrained_backbone:
        model.apply(fc_weight_bias_init)
        print('reset last')
    else:
        model.apply(all_weight_bias_init)
        print('reset all')

# Main part

## Create instances and calsify

### Model 1 ResNet18 non pretrained

In [None]:
from sys import last_type
# test model 1
import torchvision.models as models
model1 = models.resnet18(weights=None)
model1 = modify_last_layer(model1, layer_type='linear', pretrained=False)


#### Classifier 1

In [None]:
classifier1 = BuildingClassifier(
    model=model1,
    optimizer=optim.Adam(model1.parameters(), lr=0.001),
    batch_size=16,
)

# change output_logging=True to print output of each iteration
train_loss1, valid_loss1, accy1 = classifier1.train(n_epochs=15, output_logging=False)

Starting training with Learning Rate: 0.001, Batch Size: 16, Epochs: 15, Device: cuda


 47%|████▋     | 7/15 [01:24<01:37, 12.19s/it]

In [None]:
# plot results
plot_classifier_results(train_loss1, valid_loss1, accy1, 'Classifier 1 (ResNet18)')
classifier1.evaluate(disp_confusion_matrix=True)

#### Classifier 2

In [None]:
# Instance 2 with different parameters
reset_model(model1) # NEEDED for clearing pre run data
classifier2 = BuildingClassifier(
    model=model1,
    optimizer=optim.Adam(model1.parameters(), lr=0.01),
    batch_size=32,
)

# change output_logging=True to print output of each iteration
train_loss2, valid_loss2, accy2 = classifier2.train(n_epochs=10, output_logging=False)

In [None]:
# plot results
plot_classifier_results(train_loss2, valid_loss2, accy2, 'Classifier 2 (ResNet18)')
classifier2.evaluate(disp_confusion_matrix=True)