In [None]:
!wget http://madm.dfki.de/files/sentinel/EuroSAT.zip
!unzip EuroSAT.zip -d EuroSAT/

In [None]:
from torch.utils.data import DataLoader
import torch

def collate_fn(batch):
  images, labels = zip(*batch)
  return list(images), torch.tensor(labels)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision import datasets
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

from transformers import (
    AutoImageProcessor,
    AutoModelForImageClassification,
    AutoModelForPreTraining,
    AutoModel,
    VitMatteForImageMatting
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_dir = "/content/EuroSAT/2750"

dataset = datasets.ImageFolder(root=data_dir)
classes = dataset.classes
num_classes = len(classes)
print("Classes:", classes)

valid_size = 0.2
batch_size = 32
num_data = len(dataset)
indices = list(range(num_data))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_data))
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(
    dataset,
    batch_size=batch_size,
    sampler=train_sampler,
    collate_fn=collate_fn
)

valid_loader = DataLoader(
    dataset,
    batch_size=batch_size,
    sampler=valid_sampler,
    collate_fn=collate_fn
)

In [None]:
def train_one_epoch(model, processor, dataloader, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in dataloader:
        labels = labels.to(device)
        inputs = processor(images, return_tensors="pt").to(device)

        outputs = model(**inputs, labels=labels)
        loss = outputs["loss"]
        logits = outputs["logits"]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * labels.size(0)
        _, predicted = logits.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = 100.0 * correct / total
    return epoch_loss, epoch_acc

def validate(model, processor, dataloader):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            labels = labels.to(device)
            inputs = processor(images, return_tensors="pt").to(device)

            outputs = model(**inputs, labels=labels)
            loss = outputs["loss"]
            logits = outputs["logits"]

            val_loss += loss.item() * labels.size(0)
            _, predicted = logits.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss /= total
    val_acc = 100.0 * correct / total
    return val_loss, val_acc

def evaluate_model(model, processor, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in dataloader:
            labels = labels.to(device)
            inputs = processor(images, return_tensors="pt").to(device)
            outputs = model(**inputs)
            logits = outputs["logits"]
            _, predicted = logits.max(1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return all_labels, all_preds

def plot_confusion_matrix(all_labels, all_preds, classes):
    cm = confusion_matrix(all_labels, all_preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
    disp.plot(cmap=plt.cm.Blues, xticks_rotation='vertical', values_format='d')
    plt.title("Confusion Matrix on Validation Set")
    plt.show()

def print_classification_report(all_labels, all_preds, classes):
    report = classification_report(all_labels, all_preds, target_names=classes)
    print("Classification Report:\n", report)

In [None]:
def train_and_evaluate(model, processor, train_loader, valid_loader, epochs=35, lr=1e-3, wd=1e-4):
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

    best_val_acc = 0.0
    train_losses, train_accuracies = [], []
    val_losses, val_accuracies = [], []

    best_model_state = None

    for epoch in range(epochs):
        train_loss, train_acc = train_one_epoch(model, processor, train_loader, optimizer)
        val_loss, val_acc = validate(model, processor, valid_loader)

        scheduler.step()

        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch [{epoch+1}/{epochs}]")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%\n")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict()

    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    plt.figure(figsize=(10,5))
    plt.title("Training and Validation Loss")
    plt.plot(train_losses, label="Train Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

    plt.figure(figsize=(10,5))
    plt.title("Training and Validation Accuracy")
    plt.plot(train_accuracies, label="Train Accuracy")
    plt.plot(val_accuracies, label="Validation Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy (%)")
    plt.legend()
    plt.show()

    all_labels, all_preds = evaluate_model(model, processor, valid_loader)
    plot_confusion_matrix(all_labels, all_preds, classes)
    print_classification_report(all_labels, all_preds, classes)

    return best_val_acc

In [None]:
def get_deit_model(num_classes):
    from transformers import AutoImageProcessor, AutoModelForImageClassification
    processor = AutoImageProcessor.from_pretrained(
        "facebook/deit-small-patch16-224",
        use_fast=True
    )
    model = AutoModelForImageClassification.from_pretrained(
        "facebook/deit-small-patch16-224",
        num_labels=num_classes,
        ignore_mismatched_sizes=True
    )
    model.to(device)
    return model, processor


In [None]:
print("=== DeiT ===")
deit_model, deit_processor = get_deit_model(num_classes)
deit_acc = train_and_evaluate(deit_model, deit_processor, train_loader, valid_loader)

print(f"DeiT Val Acc: {deit_acc:.2f}%")

In [None]:
def get_swin_model(num_classes):
    from transformers import AutoImageProcessor, AutoModelForImageClassification
    processor = AutoImageProcessor.from_pretrained(
        "microsoft/swin-tiny-patch4-window7-224",
        use_fast=True
        )
    model = AutoModelForImageClassification.from_pretrained(
        "microsoft/swin-tiny-patch4-window7-224",
        num_labels=num_classes,
        ignore_mismatched_sizes=True
        )
    model.to(device)
    return model, processor

In [None]:
print("=== Swin Transformer ===")
swin_model, swin_processor = get_swin_model(num_classes)
swin_acc = train_and_evaluate(swin_model, swin_processor, train_loader, valid_loader)

In [None]:
def get_mae_model(num_classes):
    from transformers import AutoImageProcessor, AutoModelForPreTraining
    processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-large", use_fast=True)
    pretrained_model = AutoModelForPreTraining.from_pretrained("facebook/vit-mae-large")
    base_model = pretrained_model.vit

    classification_head = nn.Linear(base_model.config.hidden_size, num_classes)

    class MAEForClassification(nn.Module):
        def __init__(self, base_model, classification_head):
            super().__init__()
            self.base_model = base_model
            self.classifier = classification_head

        def forward(self, pixel_values, labels=None):
            outputs = self.base_model(pixel_values=pixel_values)
            pooled_output = outputs.last_hidden_state[:, 0, :]
            logits = self.classifier(pooled_output)
            loss = None
            if labels is not None:
                loss_fn = nn.CrossEntropyLoss()
                loss = loss_fn(logits, labels)
            return {"loss": loss, "logits": logits}

    model = MAEForClassification(base_model, classification_head).to(device)
    return model, processor


In [None]:
print("=== MAE ===")
mae_model, mae_processor = get_mae_model(num_classes)
mae_acc = train_and_evaluate(mae_model, mae_processor, train_loader, valid_loader)

In [None]:
def get_pvt_model(num_classes):
    import torch
    import torch.nn as nn
    from transformers import AutoImageProcessor, PvtForImageClassification

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    processor = AutoImageProcessor.from_pretrained("Zetatech/pvt-tiny-224")
    model = PvtForImageClassification.from_pretrained(
        "Zetatech/pvt-tiny-224",
        num_labels=num_classes,
        problem_type="single_label_classification",
        ignore_mismatched_sizes=True
    )
    model.to(device)
    return model, processor


In [None]:
print("=== PVT ===")
pvt_model, pvt_processor = get_pvt_model(num_classes)
pvt_acc = train_and_evaluate(pvt_model, pvt_processor, train_loader, valid_loader)
print(f"PVT Validation Accuracy: {pvt_acc:.2f}%")

In [None]:
def get_yolos_small_for_classification(num_classes):

    import torch
    import torch.nn as nn
    from transformers import YolosModel, AutoImageProcessor

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    processor = AutoImageProcessor.from_pretrained("hustvl/yolos-small")
    base_model = YolosModel.from_pretrained("hustvl/yolos-small")

    hidden_size = base_model.config.hidden_size
    classification_head = nn.Linear(hidden_size, num_classes)

    class YOLOSSmallForClassification(nn.Module):
        def __init__(self, base_model, classifier):
            super().__init__()
            self.base_model = base_model
            self.classifier = classifier

        def forward(self, pixel_values, labels=None):
            outputs = self.base_model(pixel_values=pixel_values, return_dict=True)
            pooled_output = outputs.pooler_output  =

            logits = self.classifier(pooled_output)
            loss = None
            if labels is not None:
                loss_fn = nn.CrossEntropyLoss()
                loss = loss_fn(logits, labels)
            return {"loss": loss, "logits": logits}

    model = YOLOSSmallForClassification(base_model, classification_head).to(device)
    return model, processor


In [None]:
print("=== YOLOS ===")
yolos_model, yolos_processor = get_yolos_small_for_classification(num_classes=num_classes)
yolos_acc = train_and_evaluate(yolos_model, yolos_processor, train_loader, valid_loader)
print(f"YOLOS Validation Accuracy: {yolos_acc:.2f}%")