In [10]:
import torch
import pandas as pd
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
# from sklearn.metrics import precision_recall_fscore_support

from src.dataset.dataset import TrainDataset, TestDataset

## Vorbereitung des Dataloaders

### Metadaten laden und Dataloader vorbereiten

In [11]:
# Dataset and DataLoader
batch_size = 128
num_workers = 8
model_name = "ResNet-18-Eurosat-BLU"

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5, 0.5)),
])

# Load Test metadata
test_data_path = "data/SatellitePatches/PA-test/"
test_metadata_path = "data/GLC25_PA_metadata_test.csv"
test_metadata = pd.read_csv(test_metadata_path)
test_dataset = TestDataset(test_data_path, test_metadata, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

print(f"Test dataset size: {len(test_dataset)}")

# Load Training metadata
train_data_path = "data/SatellitePatches/PA-train"
train_metadata_path = "data/GLC25_PA_metadata_train.csv"
train_metadata = pd.read_csv(train_metadata_path)
train_dataset = TrainDataset(train_data_path, train_metadata, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

print(f"Train dataset size: {len(train_dataset)}")

Test dataset size: 14784
Train dataset size: 88987


## Vortrainiertes Modell Laden sowie Gerät für Berechnungen auswählen

### Gerät auswählen

In [12]:
from src.helpers import select_device

# Check if cuda is available
device = select_device()
print(f"Using device: {device}")

# Hyperparameters
learning_rate = 0.0001
num_epochs = 25
positive_weigh_factor = 1.0
num_classes = 11255 # Number of all unique classes within the PO and PA data.

Using device: mps


### Modell instanziieren und zum Gerät verschieben

In [13]:
from src.model.ResNets import ResNet18, ResNet50


model = ResNet18()
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = CosineAnnealingLR(optimizer, T_max=25)

### Modell visualisieren

In [14]:
print(model)

ResNet18(
  (model): ResNet(
    (conv1): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act1): ReLU(inplace=True)
        (aa): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1):

### Zum Zweck der Reproduzierbarkeit Seed setzen

In [None]:
from src.helpers import set_seed

set_seed()

## Training Loop

### Erst mit PA-Daten

In [None]:
from src.helpers import train_loop

train_loop(model=model,
           train_loader=train_loader,
           optimizer=optimizer,
           device=device,
           scheduler=scheduler)

### Modell im Evaluierungsmodus speichern und testen

In [None]:
# Save the trained model
model.eval()
torch.save(model.state_dict(), f"{model_name}")

from src.helpers import test_loop

surveys, top_k_indices = test_loop(model, test_loader, device)

data_concatenated = [' '.join(map(str, row)) for row in top_k_indices]

pd.DataFrame(
    {
        'surveyId': surveys,
        'predictions': data_concatenated,
    }
).to_csv(f"csv_submissions/{model_name}.csv", index = False)

### Modell wieder in Trainingsmodus versetzen, Dataloader mit PO-Dataset instanziieren und erneut trainieren

In [None]:
model.train()
# Load Training metadata
train_data_path = "data/SatellitePatches/po/output/TIFF_64"
train_metadata_path = "data/GLC25_PO_metadata_train.csv"
train_metadata = pd.read_csv(train_metadata_path)
train_dataset = TrainDataset(train_data_path, train_metadata, transform=transform, grid_length=0.01)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

print(f"Train dataset size: {len(train_dataset)}")

### Training mit PO-Daten

In [None]:
train_loop(model=model,
           train_loader=train_loader,
           optimizer=optimizer,
           device=device,
           scheduler=scheduler,
           num_epochs=15)

Training for 25 epochs started.
Epoch 1/25, Batch 0/696, Loss: 0.7137155532836914
Epoch 1/25, Batch 348/696, Loss: 0.014051029458642006
Scheduler: {'T_max': 25, 'eta_min': 0.0, 'base_lrs': [0.0001], 'last_epoch': 1, '_step_count': 2, '_get_lr_called_within_step': False, '_last_lr': [9.96057350657239e-05]}
Epoch 2/25, Batch 0/696, Loss: 0.008304858580231667
Epoch 2/25, Batch 348/696, Loss: 0.00687360018491745
Scheduler: {'T_max': 25, 'eta_min': 0.0, 'base_lrs': [0.0001], 'last_epoch': 2, '_step_count': 3, '_get_lr_called_within_step': False, '_last_lr': [9.842915805643155e-05]}
Epoch 3/25, Batch 0/696, Loss: 0.0066201346926391125
Epoch 3/25, Batch 348/696, Loss: 0.00669951131567359
Scheduler: {'T_max': 25, 'eta_min': 0.0, 'base_lrs': [0.0001], 'last_epoch': 3, '_step_count': 4, '_get_lr_called_within_step': False, '_last_lr': [9.648882429441257e-05]}
Epoch 4/25, Batch 0/696, Loss: 0.006014692131429911
Epoch 4/25, Batch 348/696, Loss: 0.006399218924343586
Scheduler: {'T_max': 25, 'eta_mi

100%|██████████| 116/116 [01:22<00:00,  1.41it/s]


Training for 25 epochs started.
Epoch 1/25, Batch 0/30044, Loss: 0.013165660202503204
Epoch 1/25, Batch 348/30044, Loss: 0.014004808850586414
Epoch 1/25, Batch 696/30044, Loss: 0.014226202853024006
Epoch 1/25, Batch 1044/30044, Loss: 0.01219420600682497
Epoch 1/25, Batch 1392/30044, Loss: 0.014183687046170235
Epoch 1/25, Batch 1740/30044, Loss: 0.015224668197333813
Epoch 1/25, Batch 2088/30044, Loss: 0.01579141430556774
Epoch 1/25, Batch 2436/30044, Loss: 0.01270377542823553
Epoch 1/25, Batch 2784/30044, Loss: 0.013817595317959785
Epoch 1/25, Batch 3132/30044, Loss: 0.01760709285736084
Epoch 1/25, Batch 3480/30044, Loss: 0.016772344708442688
Epoch 1/25, Batch 3828/30044, Loss: 0.0129721499979496
Epoch 1/25, Batch 4176/30044, Loss: 0.016191553324460983
Epoch 1/25, Batch 4524/30044, Loss: 0.01632431522011757
Epoch 1/25, Batch 4872/30044, Loss: 0.01413026824593544
Epoch 1/25, Batch 5220/30044, Loss: 0.01404015813022852
Epoch 1/25, Batch 5568/30044, Loss: 0.013363116420805454
Epoch 1/25, B

KeyboardInterrupt: 

### 

### Modell im Evaluierungsmodus speichern und testen

In [None]:
# Save the trained model
model.eval()
torch.save(model.state_dict(), f"{model_name}-po-trained")

surveys, top_k_indices = test_loop(model, test_loader, device)

data_concatenated = [' '.join(map(str, row)) for row in top_k_indices]

pd.DataFrame(
    {
        'surveyId': surveys,
        'predictions': data_concatenated,
    }
).to_csv(f"csv_submissions/{model_name}.csv", index = False)

### Laden des PA-Datensatzes und instanziieren des Dataloaders für das Finetuning

In [None]:
# Load Training metadata
train_data_path = "data/SatellitePatches/PA-train"
train_metadata_path = "data/GLC25_PA_metadata_train.csv"
train_metadata = pd.read_csv(train_metadata_path)
train_dataset = TrainDataset(train_data_path, train_metadata, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

print(f"Train dataset size: {len(train_dataset)}")

### Finetuning mit den PA-Daten

In [None]:
train_loop(model=model,
           train_loader=train_loader,
           optimizer=optimizer,
           device=device,
           scheduler=scheduler)

### Speichern des trainierten Modells im Evaluierungmoduss und testen

In [None]:
# Save the trained model
model.eval()
torch.save(model.state_dict(), f"{model_name}-po-trained-finetuned")

from src.helpers import test_loop

surveys, top_k_indices = test_loop(model, test_loader, device)

data_concatenated = [' '.join(map(str, row)) for row in top_k_indices]

pd.DataFrame(
    {
        'surveyId': surveys,
        'predictions': data_concatenated,
    }
).to_csv(f"csv_submissions/{model_name}-po-trained-finetuned.csv", index = False)

Train dataset size: 88987
Training for 25 epochs started.
Epoch 1/25, Batch 0/696, Loss: 0.008249456062912941
Epoch 1/25, Batch 348/696, Loss: 0.006688857451081276
Scheduler: {'T_max': 25, 'eta_min': 0.0, 'base_lrs': [0.0001], 'last_epoch': 41, '_step_count': 42, '_get_lr_called_within_step': False, '_last_lr': [7.128896457825359e-05]}
Epoch 2/25, Batch 0/696, Loss: 0.006024002097547054
Epoch 2/25, Batch 348/696, Loss: 0.005029215477406979
Scheduler: {'T_max': 25, 'eta_min': 0.0, 'base_lrs': [0.0001], 'last_epoch': 42, '_step_count': 43, '_get_lr_called_within_step': False, '_last_lr': [7.679133974894983e-05]}
Epoch 3/25, Batch 0/696, Loss: 0.005333971697837114
Epoch 3/25, Batch 348/696, Loss: 0.005356017965823412
Scheduler: {'T_max': 25, 'eta_min': 0.0, 'base_lrs': [0.0001], 'last_epoch': 43, '_step_count': 44, '_get_lr_called_within_step': False, '_last_lr': [8.187119948743445e-05]}
Epoch 4/25, Batch 0/696, Loss: 0.005329010542482138
Epoch 4/25, Batch 348/696, Loss: 0.005227837711572

100%|██████████| 116/116 [01:04<00:00,  1.79it/s]


## Evaluierung der in den CSV-Dateien gespeicherten Ergebnisse erfolgt via Kaggle: https://www.kaggle.com/competitions/geolifeclef-2025/submissions