# Multi-Label Image Classification on MLRS Net Dataset
Dataset Link: [https://www.kaggle.com/datasets/vigneshwar472/mlrs-net]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install torchmetrics

In [None]:
import torch
import torchvision
import torchmetrics
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

from sklearn.preprocessing import MultiLabelBinarizer

import os
import ast
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# Paths
train_dir = "/content/drive/MyDrive/mlrs_dataset/train"
val_dir = "/content/drive/MyDrive/mlrs_dataset/validation"
test_dir = "/content/drive/MyDrive/mlrs_dataset/test"

# Dataset Class

In [None]:
class MLRSDataset(Dataset):
    def __init__(self, images_dir, csv_path, image_transforms=None, classes=None):
        self.images_dir = images_dir
        self.image_transorms = image_transforms
        
        # Load and parse the CSV
        self.df = pd.read_csv(csv_path)
        self.df["labels"] = self.df["labels"].apply(ast.literal_eval)
        
        if classes is None:
            self.classes = sorted(list(set([label for sublist in self.df["labels"] for label in sublist])))
        else:
            self.classes = classes

        self.mlb = MultiLabelBinarizer(classes=self.classes)
        self.mlb.fit(self.df["labels"])

        self.image_paths = [os.path.join(self.images_dir, f"{name}.jpg") for name in self.df["image_id"]]

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index):
        image_path = self.image_paths[index]

        image = Image.open(image_path).convert("RGB")
        
        if self.image_transorms:
            image = self.image_transorms(image)

        current_labels = self.df["labels"].iloc[index]
        binary_vector = self.mlb.transform([current_labels]).squeeze()
        binary_tensor = torch.from_numpy(binary_vector).float()

        return image, binary_tensor

# Helper Functions

In [None]:
def get_loaders(train_dir, val_dir, test_dir, batch_size=128):
    train_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
    ])
    
    val_test_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
    ])

    train_labels = os.path.join(train_dir, os.listdir(train_dir)[0])
    train_images = os.path.join(train_dir, os.listdir(train_dir)[1])
    train_dataset = MLRSDataset(train_images, train_labels, train_transforms)
    
    val_labels = os.path.join(val_dir, os.listdir(val_dir)[0])
    val_images = os.path.join(val_dir, os.listdir(val_dir)[1])
    val_dataset = MLRSDataset(val_images, val_labels, val_test_transforms)
    
    test_labels = os.path.join(test_dir, os.listdir(test_dir)[0])
    test_images = os.path.join(test_dir, os.listdir(test_dir)[1])
    test_dataset = MLRSDataset(test_images, test_labels, val_test_transforms)
    
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True
    )
    
    test_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True
    )

    return train_loader, val_loader, test_loader