<a href="https://colab.research.google.com/github/RingoKid/EuroSAT-DeepLearning/blob/main/ViTxEuroSAT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import subprocess
import sys

# Suppress outputs for pip install
subprocess.run(["pip", "install", "configilm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.run(["pip", "install", "lightning"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.run(["pip", "install", "lmdb"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Suppress output for git clone
subprocess.run(["git", "clone", "https://git.tu-berlin.de/rsim/reben-training-scripts.git"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Change directory without output
# os.chdir('/content/reben-training-scripts')
# from reben_publication.BigEarthNetv2_0_ImageClassifier import BigEarthNetv2_0_ImageClassifier
sys.path.append('/content/reben-training-scripts')


In [2]:
!pip uninstall -y tensorflow
!pip install tensorflow-cpu

Found existing installation: tensorflow 2.17.1
Uninstalling tensorflow-2.17.1:
  Successfully uninstalled tensorflow-2.17.1
Collecting tensorflow-cpu
  Downloading tensorflow_cpu-2.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting tensorboard<2.19,>=2.18 (from tensorflow-cpu)
  Downloading tensorboard-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Downloading tensorflow_cpu-2.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (230.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m230.0/230.0 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorboard-2.18.0-py3-none-any.whl (5.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m104.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboard, tensorflow-cpu
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 2.17.1
    Uninstalling tensorboard-2.17.1:
      Successfull

In [3]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.image import resize
from tqdm import tqdm

import json
import pickle
import torch
import torch.nn as nn
from torch.utils.data import Dataset, random_split, DataLoader
import torch.optim as optim
from torch.nn import BCEWithLogitsLoss
from sklearn.metrics import classification_report

from reben_publication.BigEarthNetv2_0_ImageClassifier import BigEarthNetv2_0_ImageClassifier
from configilm.extra.BENv2_utils import band_combi_to_mean_std, STANDARD_BANDS

### Initialize the Preprocessor

In [4]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import sys
sys.path.append('/content/drive/My Drive/EuroSAT')


In [6]:
from configilm.extra.BENv2_utils import band_combi_to_mean_std, STANDARD_BANDS
from preprocessing import EuroSATPreprocessor, TensorflowToTorchDataset

# Get mean and std for normalization
means, stds = band_combi_to_mean_std(STANDARD_BANDS[10], interpolation="120_nearest")

# Initialize the preprocessor
preprocessor = EuroSATPreprocessor(input_size=120, band_indices=[1, 2, 3, 4, 5, 6, 7, 8, 10, 11], means=means, stds=stds)


In [7]:
# Save the dataset to a specific directory
save_path = '/content/eurosat_dataset'
dataset, info = tfds.load("eurosat/all", split='train', with_info=True, data_dir=save_path)


Downloading and preparing dataset 1.93 GiB (download: 1.93 GiB, generated: Unknown size, total: 1.93 GiB) to /content/eurosat_dataset/eurosat/all/2.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/27000 [00:00<?, ? examples/s]

Shuffling /content/eurosat_dataset/eurosat/all/incomplete.R99YXX_2.0.0/eurosat-train.tfrecord*...:   0%|      …

Dataset eurosat downloaded and prepared to /content/eurosat_dataset/eurosat/all/2.0.0. Subsequent calls will reuse this data.


In [8]:
# Inspect dataset labels
for sample in dataset.take(5):  # Inspect 5 samples
    label = sample['label']
    print("Label Type:", type(label.numpy()))
    print("Label Shape:", label.shape)
    print("Label Value:", label.numpy())
    print("-" * 30)


Label Type: <class 'numpy.int64'>
Label Shape: ()
Label Value: 8
------------------------------
Label Type: <class 'numpy.int64'>
Label Shape: ()
Label Value: 4
------------------------------
Label Type: <class 'numpy.int64'>
Label Shape: ()
Label Value: 5
------------------------------
Label Type: <class 'numpy.int64'>
Label Shape: ()
Label Value: 5
------------------------------
Label Type: <class 'numpy.int64'>
Label Shape: ()
Label Value: 3
------------------------------


EuroSAT dataset is multi class

In [9]:
# Apply preprocessing using map
def preprocess_sample(sample):
    """Preprocess a single sample using the EuroSATPreprocessor."""
    return preprocessor.preprocess_sample(sample)

# Preprocess the entire dataset
preprocessed_dataset = dataset.map(preprocess_sample, num_parallel_calls=tf.data.AUTOTUNE)

# Check a preprocessed sample
for sample in preprocessed_dataset.take(1):
    print("Filename:", sample['filename'].numpy())
    print("Image Shape:", sample['sentinel2'].shape)
    print("Label:", sample['label'].numpy())

Filename: b'River_15.tif'
Image Shape: (120, 120, 10)
Label: 8


In [10]:
# Convert TensorFlow dataset to PyTorch dataset
torch_dataset = TensorflowToTorchDataset(preprocessed_dataset)

# Calculate dataset sizes for train, validation, and test splits
dataset_size = len(torch_dataset)
train_size = int(0.7 * dataset_size)
val_size = int(0.15 * dataset_size)
test_size = dataset_size - train_size - val_size  # Ensures no data is left out

# Set the random seed for reproducibility
torch.manual_seed(42)

# Split the dataset
train_dataset, val_dataset, test_dataset = random_split(torch_dataset, [train_size, val_size, test_size])

# Create DataLoaders for each split
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Check a batch from the train_loader
for images, labels in train_loader:
    print("Batch Image Shape:", images.shape)  # [batch_size, channels, height, width]
    print("Batch Label Shape:", labels.shape)  # [batch_size]
    break


Batch Image Shape: torch.Size([64, 10, 120, 120])
Batch Label Shape: torch.Size([64])


In [11]:
for _, labels in train_loader:
    print("Label Shape:", labels.shape)
    print("Sample Label:", labels[0])
    break


Label Shape: torch.Size([64])
Sample Label: tensor(1)


In [None]:
for images, labels in train_loader:
    print(f"Image Shape: {images.shape}")  # [batch_size, channels, height, width]
    print(f"Label Shape: {labels.shape}")  # [batch_size]
    print(f"Sample Labels: {labels[:5]}")
    break


Image Shape: torch.Size([64, 10, 120, 120])
Label Shape: torch.Size([64])
Sample Labels: tensor([7, 1, 8, 5, 8])


### Loading the pre trained model

In [12]:
model = BigEarthNetv2_0_ImageClassifier.from_pretrained(
  "BIFOLD-BigEarthNetv2-0/vit_base_patch8_224-s2-v0.1.1")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/343M [00:00<?, ?B/s]

In [None]:
model.config

ILMConfiguration(timm_model_name='vit_base_patch8_224', hf_model_name=None, image_size=120, channels=10, classes=19, class_names=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18'], network_type=0, visual_features_out=512, fusion_in=512, fusion_out=512, fusion_hidden=256, v_dropout_rate=0.25, t_dropout_rate=0.25, fusion_dropout_rate=0.25, _fusion_method='torch.mul', _fusion_activation='nn.Tanh()', drop_rate=0.15, drop_path_rate=0.15, use_pooler_output=True, max_sequence_length=32, load_pretrained_timm_if_available=False, load_pretrained_hf_if_available=True, custom_fusion_method=None, custom_fusion_activation=None)

In [None]:
model

BigEarthNetv2_0_ImageClassifier(
  (model): ConfigILM(
    (vision_encoder): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(10, 768, kernel_size=(8, 8), stride=(8, 8))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=768, out_features=768, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): 

#### Freeze N Layers

In [13]:
def unfreeze_last_n_blocks(model, num_unfreeze_blocks):
    """
    Unfreeze the last N blocks of the Vision Transformer model and the classification head.

    Args:
        model: The Vision Transformer model (BigEarthNetv2_0_ImageClassifier).
        num_unfreeze_blocks: Number of blocks to unfreeze.
    """
    # Reference to the blocks in the Vision Transformer
    vit_blocks = model.model.vision_encoder.blocks
    total_blocks = len(vit_blocks)

    # Freeze all parameters by default
    for param in model.parameters():
        param.requires_grad = False

    # Unfreeze the last N blocks
    for i in range(total_blocks - num_unfreeze_blocks, total_blocks):
        for param in vit_blocks[i].parameters():
            param.requires_grad = True

    # Unfreeze the final classification head
    for param in model.model.vision_encoder.head.parameters():
         param.requires_grad = True

    print(f"Unfroze the last {num_unfreeze_blocks} blocks and the classification head.")


In [14]:
# Number of blocks to unfreeze
num_unfreeze_blocks = 3

# Unfreeze the last N blocks
unfreeze_last_n_blocks(model, num_unfreeze_blocks)

# Check if parameters are properly frozen/unfrozen
for name, param in model.named_parameters():
    print(f"{name} - Requires Grad: {param.requires_grad}")


Unfroze the last 3 blocks and the classification head.
model.vision_encoder.cls_token - Requires Grad: False
model.vision_encoder.pos_embed - Requires Grad: False
model.vision_encoder.patch_embed.proj.weight - Requires Grad: False
model.vision_encoder.patch_embed.proj.bias - Requires Grad: False
model.vision_encoder.blocks.0.norm1.weight - Requires Grad: False
model.vision_encoder.blocks.0.norm1.bias - Requires Grad: False
model.vision_encoder.blocks.0.attn.qkv.weight - Requires Grad: False
model.vision_encoder.blocks.0.attn.qkv.bias - Requires Grad: False
model.vision_encoder.blocks.0.attn.proj.weight - Requires Grad: False
model.vision_encoder.blocks.0.attn.proj.bias - Requires Grad: False
model.vision_encoder.blocks.0.norm2.weight - Requires Grad: False
model.vision_encoder.blocks.0.norm2.bias - Requires Grad: False
model.vision_encoder.blocks.0.mlp.fc1.weight - Requires Grad: False
model.vision_encoder.blocks.0.mlp.fc1.bias - Requires Grad: False
model.vision_encoder.blocks.0.mlp.f

#### Update the Final Layer

In [None]:
# Update the final classification head for 10 classes
model.model.vision_encoder.head = nn.Linear(in_features=768, out_features=10)

# Check if the layer is updated
print(model.model.vision_encoder.head)

Linear(in_features=768, out_features=10, bias=True)


In [None]:
model

BigEarthNetv2_0_ImageClassifier(
  (model): ConfigILM(
    (vision_encoder): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(10, 768, kernel_size=(8, 8), stride=(8, 8))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=768, out_features=768, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): 

#### Early Stopping

In [15]:
class EarlyStopping:
    def __init__(self, patience=3):
        self.patience = patience
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True


In [16]:
# Set hyperparameters
num_epochs = 30
batch_size = 64
learning_rate = 1e-4
weight_decay = 1e-2
step_size = 5
gamma = 0.1
patience = 5

# Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss function, optimizer, and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
#     optimizer, mode='min', factor=0.5, patience=2, min_lr=1e-6
# )
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=1e-4,  # Set a slightly higher max learning rate
    steps_per_epoch=len(train_loader),
    epochs=num_epochs,
    pct_start=0.3,  # Fraction of the cycle spent increasing the LR
    anneal_strategy='cos',  # Cosine annealing
    final_div_factor=10  # Final LR = max_lr / 10
)

# Early Stopping Initialization
early_stopper = EarlyStopping(patience=patience)

# Training loop
best_val_accuracy = 0.0
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    # Training Phase
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)  # Labels are already integers

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)  # Get class predictions
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    # Training Metrics
    train_loss = running_loss / len(train_loader)
    train_accuracy = 100. * correct / total
    current_lr = optimizer.param_groups[0]['lr']  # Get current learning rate
    print(f"Training Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.2f}%, LR: {current_lr:.6f}")

    # Validation Phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for val_images, val_labels in val_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)

            val_outputs = model(val_images)
            loss = criterion(val_outputs, val_labels)
            val_loss += loss.item()

            _, val_predicted = val_outputs.max(1)
            val_total += val_labels.size(0)
            val_correct += val_predicted.eq(val_labels).sum().item()

    # Validation Metrics
    val_loss = val_loss / len(val_loader)
    val_accuracy = 100. * val_correct / val_total
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%")

    # Save the best model
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), "best_eurosat_model.pth")
        print("Saved Best Model!")

    # Early Stopping Check
    early_stopper(val_loss)
    if early_stopper.early_stop:
        print("Early stopping triggered. Training stopped.")
        break

    # Step the scheduler
    scheduler.step(val_loss)

print("Training Complete.")
print(f"Best Validation Accuracy: {best_val_accuracy:.2f}%")


Epoch 1/30


100%|██████████| 296/296 [05:28<00:00,  1.11s/it]


Training Loss: 7.5373, Accuracy: 3.01%, LR: 0.000004
Validation Loss: 6.8295, Accuracy: 4.72%
Saved Best Model!
Epoch 2/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 6.2211, Accuracy: 5.58%, LR: 0.000004
Validation Loss: 5.5946, Accuracy: 6.84%
Saved Best Model!
Epoch 3/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 5.0838, Accuracy: 7.90%, LR: 0.000004
Validation Loss: 4.4718, Accuracy: 9.83%
Saved Best Model!
Epoch 4/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 4.0527, Accuracy: 13.05%, LR: 0.000004
Validation Loss: 3.4751, Accuracy: 18.49%
Saved Best Model!
Epoch 5/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 3.1726, Accuracy: 19.37%, LR: 0.000004
Validation Loss: 2.6616, Accuracy: 24.47%
Saved Best Model!
Epoch 6/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 2.5077, Accuracy: 25.52%, LR: 0.000004
Validation Loss: 2.0890, Accuracy: 30.81%
Saved Best Model!
Epoch 7/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 2.0769, Accuracy: 33.49%, LR: 0.000004
Validation Loss: 1.7684, Accuracy: 41.09%
Saved Best Model!
Epoch 8/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 1.8435, Accuracy: 38.12%, LR: 0.000004
Validation Loss: 1.5891, Accuracy: 44.84%
Saved Best Model!
Epoch 9/30


100%|██████████| 296/296 [05:34<00:00,  1.13s/it]


Training Loss: 1.6934, Accuracy: 42.35%, LR: 0.000004
Validation Loss: 1.4702, Accuracy: 48.96%
Saved Best Model!
Epoch 10/30


100%|██████████| 296/296 [05:33<00:00,  1.13s/it]


Training Loss: 1.5805, Accuracy: 45.94%, LR: 0.000004
Validation Loss: 1.3784, Accuracy: 52.02%
Saved Best Model!
Epoch 11/30


100%|██████████| 296/296 [05:33<00:00,  1.13s/it]


Training Loss: 1.4954, Accuracy: 48.79%, LR: 0.000004
Validation Loss: 1.3020, Accuracy: 55.70%
Saved Best Model!
Epoch 12/30


100%|██████████| 296/296 [05:33<00:00,  1.13s/it]


Training Loss: 1.4125, Accuracy: 51.40%, LR: 0.000004
Validation Loss: 1.2358, Accuracy: 58.96%
Saved Best Model!
Epoch 13/30


100%|██████████| 296/296 [05:33<00:00,  1.13s/it]


Training Loss: 1.3452, Accuracy: 53.80%, LR: 0.000004
Validation Loss: 1.1761, Accuracy: 61.58%
Saved Best Model!
Epoch 14/30


100%|██████████| 296/296 [05:33<00:00,  1.13s/it]


Training Loss: 1.2808, Accuracy: 55.67%, LR: 0.000004
Validation Loss: 1.1223, Accuracy: 63.75%
Saved Best Model!
Epoch 15/30


100%|██████████| 296/296 [05:33<00:00,  1.13s/it]


Training Loss: 1.2295, Accuracy: 57.85%, LR: 0.000004
Validation Loss: 1.0742, Accuracy: 65.80%
Saved Best Model!
Epoch 16/30


 78%|███████▊  | 230/296 [04:21<01:14,  1.14s/it]


KeyboardInterrupt: 

In [None]:
# Test evaluation
model.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for test_images, test_labels in test_loader:
        test_images, test_labels = test_images.to(device), test_labels.to(device)

        test_outputs = model(test_images)
        _, test_predicted = test_outputs.max(1)
        test_total += test_labels.size(0)
        test_correct += test_predicted.eq(test_labels).sum().item()

test_accuracy = 100. * test_correct / test_total
print(f"Test Accuracy: {test_accuracy:.2f}%")
