# Multi-Label Image Classification on MLRS Net Dataset
Dataset Link: [https://www.kaggle.com/datasets/vigneshwar472/mlrs-net]

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.15.3-py3-none-any.whl.metadata (5.5 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading lightning_utilities-0.15.3-py3-none-any.whl (31 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.15.3 torchmetrics-1.8.2


In [3]:
import torch
import torchvision
import torchmetrics
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

from sklearn.preprocessing import MultiLabelBinarizer

import os
import ast
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
# Paths
train_dir = "/content/drive/MyDrive/mlrs_dataset/train"
val_dir = "/content/drive/MyDrive/mlrs_dataset/validation"
test_dir = "/content/drive/MyDrive/mlrs_dataset/test"

# Dataset Class

In [6]:
class MLRSDataset(Dataset):
    def __init__(self, images_dir, csv_path, image_transforms=None, classes=None):
        self.images_dir = images_dir
        self.image_transorms = image_transforms
        
        # Load and parse the CSV
        self.df = pd.read_csv(csv_path)
        self.df["labels"] = self.df["labels"].apply(ast.literal_eval)
        
        if classes is None:
            self.classes = sorted(list(set([label for sublist in self.df["labels"] for label in sublist])))
        else:
            self.classes = classes

        self.mlb = MultiLabelBinarizer(classes=self.classes)
        self.mlb.fit(self.df["labels"])

        self.image_paths = [os.path.join(self.images_dir, f"{name}.jpg") for name in self.df["image_id"]]

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index):
        image_path = self.image_paths[index]

        image = Image.open(image_path).convert("RGB")
        
        if self.image_transorms:
            image = self.image_transorms(image)

        current_labels = self.df["labels"].iloc[index]
        binary_vector = self.mlb.transform([current_labels]).squeeze()
        binary_tensor = torch.from_numpy(binary_vector).float()

        return image, binary_tensor

# Helper Functions

In [7]:
def get_loaders(train_dir, val_dir, test_dir, batch_size=128):
    train_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
    ])
    
    val_test_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
    ])

    train_labels = os.path.join(train_dir, os.listdir(train_dir)[0])
    train_images = os.path.join(train_dir, os.listdir(train_dir)[1])
    train_dataset = MLRSDataset(train_images, train_labels, train_transforms)
    
    val_labels = os.path.join(val_dir, os.listdir(val_dir)[0])
    val_images = os.path.join(val_dir, os.listdir(val_dir)[1])
    val_dataset = MLRSDataset(val_images, val_labels, val_test_transforms)
    
    test_labels = os.path.join(test_dir, os.listdir(test_dir)[0])
    test_images = os.path.join(test_dir, os.listdir(test_dir)[1])
    test_dataset = MLRSDataset(test_images, test_labels, val_test_transforms)
    
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True
    )
    
    test_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True
    )

    return train_loader, val_loader, test_loader

In [16]:
def get_metrics(device, num_classes=60):
    metrics = torchmetrics.MetricCollection({
        "accuracy": torchmetrics.classification.MultilabelAccuracy(num_labels=num_classes, average="macro"),
        "precision": torchmetrics.classification.MultilabelPrecision(num_labels=num_classes, average="macro"),
        "recall": torchmetrics.classification.MultilabelRecall(num_labels=num_classes, average="macro"),
    }).to(device)

    return metrics

In [18]:
def training_setup(model):
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.1, patience=3
    )

    return criterion, optimizer, scheduler

In [10]:
def train(model, dataloader, criterion, optimizer, metrics, device):
    model.train()
    metrics.reset()

    epoch_loss = 0.0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

        metrics.update(outputs, labels)
    
    epoch_loss /= len(dataloader)
    return epoch_loss

In [11]:
def validate(model, dataloader, criterion, metrics, device):
    model.eval()
    metrics.reset()

    epoch_loss = 0.0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            epoch_loss + loss.item()

            metrics.update(outputs, labels)
    
    epoch_loss /= len(dataloader)
    return epoch_loss

In [12]:
def plot_learning_curves(train_losses, val_losses, train_accs, val_accs):
    epochs = range(1, len(train_losses) + 1)
    
    plt.figure(figsize=(12, 5))

    # Loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, 'b-', label='Training Loss')
    plt.plot(epochs, val_losses, 'r-', label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    # Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accs, 'b-', label='Training Acc')
    plt.plot(epochs, val_accs, 'r-', label='Validation Acc')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

In [13]:
def predict_plot_single_image(model, image_path, true_labels, transforms, classes, device):
    model.eval()
    raw_image = Image.open(image_path).convert("RGB")
    input_tensor = transforms(raw_image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        logits = model(input_tensor)
        probs = torch.sigmoid(logits).squeeze()
        
    predicted_indices = (probs > 0.5).nonzero(as_tuple=True)[0].cpu().numpy()
    predicted_strings = [classes[i] for i in predicted_indices]
    
    plt.figure(figsize=(8, 8))
    plt.imshow(raw_image)
    plt.axis('off')
    
    title = f"Actual: {true_labels}\nPredicted: {predicted_strings}"
    color = 'green' if set(true_labels) == set(predicted_strings) else 'red'
    
    plt.title(title, color=color, fontsize=12, pad=20)
    plt.show()

# Baseline Model

In [None]:
class BaselineCNN(nn.Module):
    def __init__(self, num_classes=60):
        super(BaselineCNN, self).__init__()

        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(128, 256, kernel_size=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),

            nn.AdaptiveAvgPool2d((1,1))
        )

        self.classifier = nn.Linear(512, num_classes)
    
    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

## Training

In [19]:
baseline_model = BaselineCNN().to(device)

train_loader, val_loader, test_loader = get_loaders(train_dir, val_dir, test_dir)
train_metrics, val_metrics, test_metrics = get_metrics(device), get_metrics(device), get_metrics(device)
criterion, optimizer, scheduler = training_setup(baseline_model)

EPOCHS = 50

In [20]:
train_history = {"train_loss":[], "train_acc":[]}
val_history = {"val_loss":[], "val_acc":[]}

In [None]:
print("Starting Training...")

for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1} Started...\n")

    train_loss = train(baseline_model, train_loader, criterion, optimizer, train_metrics, device)
    val_loss = validate(baseline_model, val_loader, criterion, val_metrics, device)

    train_results = train_metrics.compute()
    val_results = val_metrics.compute()

    train_history["train_loss"].append(train_loss)
    train_history["train_acc"].append(train_results["accuracy"].item())
    
    val_history["val_loss"].append(val_loss)
    val_history["val_acc"].append(val_results["accuracy"].item())

    if scheduler:
        scheduler.step(val_loss)

    current_lr = optimizer.param_groups[0]['lr']
    
    print(f"Epoch: {epoch+1}/{EPOCHS} | Current LR: {current_lr} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

print("\nTraining Complete!")

Starting Training...
Epoch 1 Started...



In [None]:
plot_learning_curves(train_history["train_loss"], val_history["val_loss"], train_history["train_acc"], val_history["val_acc"])

In [None]:
for metric, value in train_results.items():
    print(f"Train {metric.capitalize()}: {value.item():.4f}")

In [None]:
for metric, value in val_results.items():
    print(f"Val {metric.capitalize()}: {value.item():.4f}")

## Evaluation

In [None]:
test_loss = validate(baseline_model, test_loader, criterion, test_metrics)
test_results = test_metrics.compute()
print(f"Test Loss: {test_loss:.4f}")
for metric, value in test_results.items():
    print(f"Test {metric.capitalize()}: {value.item():.4f}")