In [1]:
import os
import glob
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

from timm import create_model
from timm.data.mixup import Mixup
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.cuda.amp import autocast, GradScaler

torch.manual_seed(42)
np.random.seed(42)


In [2]:
data_dir = '/kaggle/input/eurosat10-classes/EuroSAT_RGB/'

image_paths = glob.glob(os.path.join(data_dir, '*', '*.jpg'))
labels = [os.path.basename(os.path.dirname(path)) for path in image_paths]

df = pd.DataFrame({'image_path': image_paths, 'label': labels})

le = LabelEncoder()
df['label_enc'] = le.fit_transform(df['label'])

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['label_enc'],
    random_state=42
)

print(f"Total images: {len(df)}")
print(f"Training images: {len(train_df)}")
print(f"Validation images: {len(val_df)}")


Total images: 27000
Training images: 21600
Validation images: 5400


In [3]:
mean = [0.3444, 0.3809, 0.4082]
std = [0.1829, 0.1603, 0.1321]

train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

val_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])


In [4]:
class EuroSATDataset(Dataset):
    def __init__(self, df, transforms):
        self.df = df.reset_index(drop=True)
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = Image.open(self.df.loc[idx, 'image_path']).convert('RGB')
        image = self.transforms(image)
        label = self.df.loc[idx, 'label_enc']
        return image, label

train_dataset = EuroSATDataset(train_df, train_transforms)
val_dataset = EuroSATDataset(val_df, val_transforms)

batch_size = 64
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [5]:
model = create_model(
    'vit_base_patch16_224',
    pretrained=True,
    num_classes=10,
    drop_rate=0.0,
    drop_path_rate=0.1 
)


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [6]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

In [7]:
def get_param_groups(model, base_lr, weight_decay):
    no_weight_decay = model.no_weight_decay()
    param_groups = {}
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue
        group_name = 'layer_0'
        if 'blocks' in name:
            block_num = int(name.split('.')[1])
            group_name = f'layer_{block_num + 1}'
        elif 'cls_token' in name or 'pos_embed' in name:
            group_name = 'layer_0'
        else:
            group_name = 'layer_0'

        if group_name not in param_groups:
            param_groups[group_name] = {'params': [], 'weight_decay': weight_decay, 'lr': base_lr}
        param_groups[group_name]['params'].append(param)

    param_groups_list = []
    num_layers = len(param_groups)
    for i, (group_name, group) in enumerate(sorted(param_groups.items(), key=lambda x: x[0])):
        group['lr'] = base_lr * (0.95 ** (num_layers - i - 1))  
        param_groups_list.append(group)

    return param_groups_list

base_lr = 3e-5
weight_decay = 0.01
optimizer = AdamW(get_param_groups(model, base_lr, weight_decay))


In [8]:
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)

In [9]:
mixup_fn = Mixup(
    mixup_alpha=0.8,
    cutmix_alpha=1.0,
    cutmix_minmax=None,
    prob=1.0,
    switch_prob=0.5,
    mode='batch',
    label_smoothing=0.1,
    num_classes=10
)


In [10]:
scaler = GradScaler()

def train_one_epoch(epoch, model, dataloader, optimizer, criterion, scheduler):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)

        images, labels = mixup_fn(images, labels)

        optimizer.zero_grad()
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        running_loss += loss.item() * images.size(0)
        total += labels.size(0)

    epoch_loss = running_loss / total
    print(f'Epoch {epoch} - Training Loss: {epoch_loss:.4f}')
    return epoch_loss

def validate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.cuda(non_blocking=True)
            labels = labels.cuda(non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    print(f'Validation Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')
    return epoch_loss, epoch_acc


  scaler = GradScaler()


In [None]:
model = model.cuda()

num_epochs = 30
best_acc = 0.0

for epoch in range(1, num_epochs + 1):
    print(f"Epoch {epoch}/{num_epochs}")
    train_loss = train_one_epoch(epoch, model, train_loader, optimizer, criterion, scheduler)
    val_loss, val_acc = validate(model, val_loader, criterion)

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_vit_model.pth')
        print("Saved Best Model")

    print('-' * 30)

Epoch 1/30


  with autocast():


In [None]:
model.load_state_dict(torch.load('best_vit_model.pth'))

for param in model.parameters():
    param.requires_grad = True

base_lr = 1e-5
optimizer = AdamW(get_param_groups(model, base_lr, weight_decay))

scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)


for epoch in range(1, num_epochs + 1):
    print(f"Fine-tuning Epoch {epoch}/{num_epochs}")
    train_loss = train_one_epoch(epoch, model, train_loader, optimizer, criterion, scheduler)
    val_loss, val_acc = validate(model, val_loader, criterion)

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_vit_model_finetuned.pth')
        print("Saved Fine-Tuned Best Model")

    print('-' * 30)


In [None]:
model.load_state_dict(torch.load('best_vit_model_finetuned.pth'))

val_loss, val_acc = validate(model, val_loader, criterion)
print(f'Final Model Validation Accuracy: {val_acc:.4f}')
