## Import Libraries

In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score


# Load and Split Dataset

In [2]:
# Set device
device = torch.device("cuda")

In [3]:
# Load the multilabel data
data = pd.read_csv('LandUse_Multilabeled.txt', sep='\t')
X = data.iloc[:, 0]  # Image names
y = data.iloc[:, 1:]  # Labels

# Split into train, validation, and test sets
# No stratification is applied because it gave a message that some classes only occur once
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [4]:
num_labels = len(data.iloc[0, :])-1

In [5]:
print("Train size: ", len(X_train))
print("Validation size: ", len(X_val))
print("Test size: ", len(X_test))

Train size:  1470
Validation size:  315
Test size:  315


# Define Dataset Class

In [6]:
class UCMercedDataset(Dataset):
    def __init__(self, image_names, labels, image_dir, transform=None):
        self.image_names = image_names
        self.labels = labels
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        base_filename = self.image_names.iloc[idx]
        subfolder = base_filename[:-2]     # 'airplane22' -> 'airplane'

        img_path = os.path.join("Images", subfolder, f"{base_filename}.tif")

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Image not found at: {img_path}")

        image = datasets.folder.default_loader(img_path)
        label = torch.tensor(self.labels.iloc[idx].values, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label


# Preprocess Images

In [7]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = UCMercedDataset(X_train, y_train, 'Images', transform)
val_dataset = UCMercedDataset(X_val, y_val, 'Images', transform)
test_dataset = UCMercedDataset(X_test, y_test, 'Images', transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


# Load and Adjust Pretrained Models

In [8]:
# Load DINO v2 ResNet 50
resnet50 = torch.hub.load('facebookresearch/dino:main', 'dino_resnet50')

# Load DINO v2 ViT-S/8
vits8 = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')

Using cache found in C:\Users\tiesk/.cache\torch\hub\facebookresearch_dino_main
Using cache found in C:\Users\tiesk/.cache\torch\hub\facebookresearch_dino_main


In [9]:
# Change the Dino model to multi-label classification
class DinoResNetMultiLabel(nn.Module):
    def __init__(self, base_model, num_labels):
        super().__init__()
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])  # remove final classification layer
        self.classifier = nn.Linear(2048, num_labels)  # 2048 is the output feature dim of ResNet50

    def forward(self, x):
        x = self.backbone(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [10]:
# Change the ViT model to multi-label classification
class DinoViTMultiLabel(nn.Module):
    def __init__(self, base_model, num_labels):
        super().__init__()
        self.backbone = base_model
        self.classifier = nn.Linear(base_model.embed_dim, num_labels)  # embed_dim is the output feature dim of ViT-S/8

    def forward(self, x):
        features = self.backbone(x)
        return self.classifier(features)

## Prepare for training

In [11]:
# Model setup
resnet_model = DinoResNetMultiLabel(resnet50, num_labels).to(device)
vits_model = DinoViTMultiLabel(vits8, num_labels).to(device)

# Loss and optimizers
criterion = nn.BCEWithLogitsLoss()
resnet_optimizer = torch.optim.Adam(resnet_model.parameters(), lr=1e-4)
vits_optimizer = torch.optim.Adam(vits_model.parameters(), lr=1e-4)

# Define Training Loop

In [12]:
# Training function
def train_model(model, optimizer, name):
    for epoch in range(20):
        model.train()
        total_loss = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"[{name}] Epoch {epoch+1}: Train Loss = {avg_loss:.4f}")

        validate_model(model, name)

# Validation function
def validate_model(model, name):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = torch.sigmoid(model(images))
            preds = (outputs > 0.5).int()

            all_preds.append(preds.cpu())
            all_targets.append(labels.cpu().int())

    preds = torch.cat(all_preds)
    targets = torch.cat(all_targets)

    f1 = f1_score(targets, preds, average="macro", zero_division=0)
    print(f"[{name}] Validation F1 score: {f1:.4f}\n")

# Train and Evaluate Models

In [13]:
# Train ResNet 50
print("Training ResNet 50...")
train_model(resnet_model, resnet_optimizer, name="ResNet50")

Training ResNet 50...
[ResNet50] Epoch 1: Train Loss = 0.4551
[ResNet50] Validation F1 score: 0.3232

[ResNet50] Epoch 2: Train Loss = 0.2482
[ResNet50] Validation F1 score: 0.5990

[ResNet50] Epoch 3: Train Loss = 0.1794
[ResNet50] Validation F1 score: 0.7923

[ResNet50] Epoch 4: Train Loss = 0.1469
[ResNet50] Validation F1 score: 0.8876

[ResNet50] Epoch 5: Train Loss = 0.1292
[ResNet50] Validation F1 score: 0.9119

[ResNet50] Epoch 6: Train Loss = 0.1050
[ResNet50] Validation F1 score: 0.9289

[ResNet50] Epoch 7: Train Loss = 0.0922
[ResNet50] Validation F1 score: 0.9353

[ResNet50] Epoch 8: Train Loss = 0.0810
[ResNet50] Validation F1 score: 0.9339

[ResNet50] Epoch 9: Train Loss = 0.0717
[ResNet50] Validation F1 score: 0.9384

[ResNet50] Epoch 10: Train Loss = 0.0632
[ResNet50] Validation F1 score: 0.9426

[ResNet50] Epoch 11: Train Loss = 0.0524
[ResNet50] Validation F1 score: 0.9378

[ResNet50] Epoch 12: Train Loss = 0.0450
[ResNet50] Validation F1 score: 0.9360

[ResNet50] Epoc

In [14]:
# Train ViT-S/8
print("Training ViT-S/8...")
train_model(vits_model, vits_optimizer, name="ViT-S/8")

Training ViT-S/8...
[ViT-S/8] Epoch 1: Train Loss = 0.1867
[ViT-S/8] Validation F1 score: 0.9240

[ViT-S/8] Epoch 2: Train Loss = 0.0687
[ViT-S/8] Validation F1 score: 0.9403



KeyboardInterrupt: 

In [None]:
torch.save(resnet_model.state_dict(), "resnet50_trained.pth")
torch.save(vits_model.state_dict(), "vit_s8_trained.pth")
