## Import Libraries

In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score


# Load and Split Dataset

In [2]:
# Set device
device = torch.device("cuda")

In [4]:
# Load the multilabel data
data = pd.read_csv('LandUse_Multilabeled.txt', sep='\t')
X = data.iloc[:, 0]  # Image names
y = data.iloc[:, 1:]  # Labels

# Split into train, validation, and test sets
# No stratification is applied because it gave a message that some classes only occur once
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [5]:
num_labels = len(data.iloc[0, :])-1

In [6]:
print("Train size: ", len(X_train))
print("Validation size: ", len(X_val))
print("Test size: ", len(X_test))

Train size:  1470
Validation size:  315
Test size:  315


# Define Dataset Class

In [7]:
class UCMercedDataset(Dataset):
    def __init__(self, image_names, labels, image_dir, transform=None):
        self.image_names = image_names
        self.labels = labels
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        base_filename = self.image_names.iloc[idx]
        subfolder = base_filename[:-2]     # 'airplane22' -> 'airplane'

        img_path = os.path.join("Images", subfolder, f"{base_filename}.tif")

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Image not found at: {img_path}")

        image = datasets.folder.default_loader(img_path)
        label = torch.tensor(self.labels.iloc[idx].values, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label


# Preprocess Images

In [8]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = UCMercedDataset(X_train, y_train, 'Images', transform)
val_dataset = UCMercedDataset(X_val, y_val, 'Images', transform)
test_dataset = UCMercedDataset(X_test, y_test, 'Images', transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# Load and Adjust Pretrained Models

In [9]:
# Load DINO v2 ResNet 50
resnet50 = torch.hub.load('facebookresearch/dino:main', 'dino_resnet50')

# Load DINO v2 ViT-S/8
vits8 = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')

Using cache found in C:\Users\tiesk/.cache\torch\hub\facebookresearch_dino_main
Using cache found in C:\Users\tiesk/.cache\torch\hub\facebookresearch_dino_main


In [10]:
# Change the Dino model to multi-label classification
class DinoResNetMultiLabel(nn.Module):
    def __init__(self, base_model, num_labels):
        super().__init__()
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])  # remove final classification layer
        self.classifier = nn.Linear(2048, num_labels)  # 2048 is the output feature dim of ResNet50

    def forward(self, x):
        x = self.backbone(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [11]:
# Change the ViT model to multi-label classification
class DinoViTMultiLabel(nn.Module):
    def __init__(self, base_model, num_labels):
        super().__init__()
        self.backbone = base_model
        self.classifier = nn.Linear(base_model.embed_dim, num_labels)  # embed_dim is the output feature dim of ViT-S/8

    def forward(self, x):
        features = self.backbone(x)
        return self.classifier(features)

## Prepare for training

In [12]:
# Model setup
resnet_model = DinoResNetMultiLabel(resnet50, num_labels).to(device)
vits_model = DinoViTMultiLabel(vits8, num_labels).to(device)

# Loss and optimizers
criterion = nn.BCEWithLogitsLoss()
resnet_optimizer = torch.optim.Adam(resnet_model.parameters(), lr=1e-4)
vits_optimizer = torch.optim.Adam(vits_model.parameters(), lr=1e-4)

# Define Training Loop

In [13]:
# Training function
def train_model(model, optimizer, name):
    for epoch in range(10):
        model.train()
        total_loss = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"[{name}] Epoch {epoch+1}: Train Loss = {avg_loss:.4f}")

        validate_model(model, name)

# Validation function
def validate_model(model, name):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = torch.sigmoid(model(images))
            preds = (outputs > 0.5).int()

            all_preds.append(preds.cpu())
            all_targets.append(labels.cpu().int())

    preds = torch.cat(all_preds)
    targets = torch.cat(all_targets)

    f1 = f1_score(targets, preds, average="macro", zero_division=0)
    print(f"[{name}] Validation F1 score: {f1:.4f}\n")

# Train and Evaluate Models

In [14]:
# Train ResNet 50
print("Training ResNet 50...")
train_model(resnet_model, resnet_optimizer, name="ResNet50")

Training ResNet 50...
[ResNet50] Epoch 1: Train Loss = 0.5360
[ResNet50] Validation F1 score: 0.2895

[ResNet50] Epoch 2: Train Loss = 0.3222
[ResNet50] Validation F1 score: 0.4852

[ResNet50] Epoch 3: Train Loss = 0.2248
[ResNet50] Validation F1 score: 0.5975

[ResNet50] Epoch 4: Train Loss = 0.1740
[ResNet50] Validation F1 score: 0.7227

[ResNet50] Epoch 5: Train Loss = 0.1462
[ResNet50] Validation F1 score: 0.8683

[ResNet50] Epoch 6: Train Loss = 0.1232
[ResNet50] Validation F1 score: 0.9016

[ResNet50] Epoch 7: Train Loss = 0.1044
[ResNet50] Validation F1 score: 0.9151

[ResNet50] Epoch 8: Train Loss = 0.0921
[ResNet50] Validation F1 score: 0.9237

[ResNet50] Epoch 9: Train Loss = 0.0817
[ResNet50] Validation F1 score: 0.9303

[ResNet50] Epoch 10: Train Loss = 0.0678
[ResNet50] Validation F1 score: 0.9289

Training ViT-S/8...


OutOfMemoryError: CUDA out of memory. Tried to allocate 452.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 9.88 GiB is allocated by PyTorch, and 752.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Train ViT-S/8
print("Training ViT-S/8...")
train_model(vits_model, vits_optimizer, name="ViT-S/8")

# Evaluate Models

In [None]:
'''

def evaluate_model(model, test_loader):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = torch.sigmoid(model(images))
            preds = (outputs > 0.5).float()
            all_labels.append(labels.cpu())
            all_preds.append(preds.cpu())

    all_labels = torch.cat(all_labels)
    all_preds = torch.cat(all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

# Evaluate ResNet 50
print("Evaluating ResNet 50...")
evaluate_model(resnet50, test_loader)

# Evaluate ViT-S/8
print("Evaluating ViT-S/8...")
evaluate_model(vit_s8, test_loader)
'''