In [1]:
import os 
import glob
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

from torchvision.transforms import RandAugment, RandomErasing

from timm import create_model
from timm.data.mixup import Mixupt
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.cuda.amp import autocast, GradScaler
from torch.amp import autocast, GradScaler
from torch.optim.swa_utils import AveragedModel, SWALR

torch.manual_seed(42)
np.random.seed(42)

data_dir = '/kaggle/input/eurosat10-classes/EuroSAT_RGB/'  

image_paths = glob.glob(os.path.join(data_dir, '*', '*.jpg'))
labels = [os.path.basename(os.path.dirname(path)) for path in image_paths]

df = pd.DataFrame({'image_path': image_paths, 'label': labels})

le = LabelEncoder()
df['label_enc'] = le.fit_transform(df['label'])

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['label_enc'],
    random_state=42
)

print(f"Total images: {len(df)}")
print(f"Training images: {len(train_df)}")
print(f"Validation images: {len(val_df)}")

Total images: 27000
Training images: 21600
Validation images: 5400


In [10]:
mean = [0.3444, 0.3809, 0.4082]
std = [0.1829, 0.1603, 0.1321]

train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    RandAugment(num_ops=2, magnitude=7),  
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
    RandomErasing(p=0.5, scale=(0.02, 0.2))  
])

val_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

class EuroSATDataset(Dataset):
    def __init__(self, df, transforms):
        self.df = df.reset_index(drop=True)
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = Image.open(self.df.loc[idx, 'image_path']).convert('RGB')
        image = self.transforms(image)
        label = self.df.loc[idx, 'label_enc']
        return image, label

train_dataset = EuroSATDataset(train_df, train_transforms)
val_dataset = EuroSATDataset(val_df, val_transforms)

batch_size = 64
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [11]:
model = create_model(
    'swin_base_patch4_window7_224',
    pretrained=True,
    num_classes=10,      
    drop_rate=0.0,
    drop_path_rate=0.1    
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

def get_param_groups(model, base_lr, weight_decay):
    no_weight_decay = model.no_weight_decay()
    param_groups = {}
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue
        group_name = 'layer_0'
        if 'blocks' in name:
            block_num = int(name.split('.')[1])
            group_name = f'layer_{block_num + 1}'
        elif 'cls_token' in name or 'pos_embed' in name:
            group_name = 'layer_0'
        else:
            group_name = 'layer_0'

        if group_name not in param_groups:
            param_groups[group_name] = {
                'params': [],
                'weight_decay': weight_decay,
                'lr': base_lr
            }
        param_groups[group_name]['params'].append(param)

    param_groups_list = []
    num_layers = len(param_groups)
    for i, (group_name, group) in enumerate(sorted(param_groups.items(), key=lambda x: x[0])):
        group['lr'] = base_lr * (0.95 ** (num_layers - i - 1))
        param_groups_list.append(group)

    return param_groups_list

base_lr = 3e-5
weight_decay = 0.01
optimizer = AdamW(get_param_groups(model, base_lr, weight_decay))

scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)

mixup_fn = Mixup(
    mixup_alpha=0.8,
    cutmix_alpha=1.0,
    prob=1.0,
    switch_prob=0.5,
    mode='batch',
    label_smoothing=0.1,
    num_classes=10
)

scaler = GradScaler()

model = model.cuda()

swa_model = AveragedModel(model)
swa_start_epoch = 25  
swa_scheduler = SWALR(optimizer, swa_lr=base_lr, anneal_epochs=5, anneal_strategy="cos")

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]



RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
def train_one_epoch(epoch, model, dataloader, optimizer, criterion, scheduler, mixup_fn):
    model.train()
    running_loss = 0.0
    total = 0

    for images, labels in dataloader:
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)

        images, labels = mixup_fn(images, labels)

        optimizer.zero_grad()
        with autocast(device_type='cuda', dtype=torch.float16):  
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * images.size(0)
        total += labels.size(0)

    epoch_loss = running_loss / total
    if epoch < swa_start_epoch:
        scheduler.step()

    print(f"[Epoch {epoch} | Train] Loss: {epoch_loss:.4f}")
    return epoch_loss


In [2]:
def validate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.cuda(non_blocking=True)
            labels = labels.cuda(non_blocking=True)

            with autocast(device_type='cuda', dtype=torch.float16):  
                outputs = model(images)
                loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    class_report = classification_report(all_labels, all_preds, target_names=le.classes_, digits=4)  
    print("Classification Report:")
    print(class_report)

    cm = confusion_matrix(all_labels, all_preds)

    print(f"[Validation] Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")
    return epoch_loss, epoch_acc

In [None]:
num_epochs = 30
best_acc = 0.0

for epoch in range(1, num_epochs + 1):
    print(f"Epoch {epoch}/{num_epochs}")

    train_loss = train_one_epoch(
        epoch, model, train_loader, optimizer, criterion, scheduler, mixup_fn
    )
    
    if epoch >= swa_start_epoch:
        swa_model.update_parameters(model)
        swa_scheduler.step()
    
    val_loss, val_acc = validate(model, val_loader, criterion)

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_vit_model.pth')
        print("Saved Best Model!")

    print('-' * 40)

torch.optim.swa_utils.update_bn(train_loader, swa_model, device='cuda')

torch.save(swa_model.state_dict(), 'swa_vit_model.pth')
print("SWA Model Saved!")

In [1]:
import torch
import torch.nn as nn
from torch.cuda.amp import autocast, GradScaler
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from timm import create_model
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

model_finetune = create_model(
    'swin_base_patch4_window7_224',
    pretrained=False,
    num_classes=10
)
model_finetune.load_state_dict(torch.load('/kaggle/input/prefinetune/pytorch/default/1/best_vit_model (4).pth'))
model_finetune = model_finetune.cuda()


for name, param in model_finetune.named_parameters():
    if 'head' not in name:
        param.requires_grad = False

finetune_lr = 1e-5
finetune_weight_decay = 1e-4
finetune_optimizer = AdamW(
    filter(lambda p: p.requires_grad, model_finetune.parameters()), 
    lr=finetune_lr, 
    weight_decay=finetune_weight_decay
)
finetune_scheduler = CosineAnnealingWarmRestarts(finetune_optimizer, T_0=5, T_mult=1)

finetune_criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
finetune_scaler = GradScaler()

def finetune_one_epoch(epoch, model, dataloader, optimizer, criterion, scheduler):
    model.train()
    running_loss = 0.0
    total = 0
    
    for images, labels in dataloader:
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)

        optimizer.zero_grad()
        with autocast(device_type='cuda', dtype=torch.float16):
            outputs = model(images)
            loss = criterion(outputs, labels)

        finetune_scaler.scale(loss).backward()
        nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        finetune_scaler.step(optimizer)
        finetune_scaler.update()

        running_loss += loss.item() * images.size(0)
        total += labels.size(0)

    epoch_loss = running_loss / total
    scheduler.step()
    print(f"[Fine-Tune Epoch {epoch}] Loss: {epoch_loss:.4f}")
    return epoch_loss

def validate_finetune(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.cuda(non_blocking=True)
            labels = labels.cuda(non_blocking=True)
            
            with autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(images)
                loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    print(f"[Fine-Tune Validation] Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    final_report = classification_report(all_labels, all_preds, target_names=le.classes_, digits=4)
    print("\nFinal Classification Report (Fine-Tuned Model):")
    print(final_report)

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(8,6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=le.classes_, yticklabels=le.classes_)
    plt.title("Confusion Matrix (Fine-Tuned Model)")
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.show()

    return epoch_loss, epoch_acc

finetune_epochs = 10
best_acc_ft = 0.0
for ep in range(1, finetune_epochs+1):
    train_loss_ft = finetune_one_epoch(
        ep, model_finetune, train_loader, finetune_optimizer, finetune_criterion, finetune_scheduler
    )
    val_loss_ft, val_acc_ft = validate_finetune(model_finetune, val_loader, finetune_criterion)
    if val_acc_ft > best_acc_ft:
        best_acc_ft = val_acc_ft
        torch.save(model_finetune.state_dict(), "best_vit_model_finetuned.pth")
        print("Saved Best Fine-Tuned Model!\n")

  model_finetune.load_state_dict(torch.load('/kaggle/input/prefinetune/pytorch/default/1/best_vit_model (4).pth'))


RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.