In [None]:
import wandb
wandb.login()  # Opens a browser once to authenticate
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, ConcatDataset
from torchvision import datasets, transforms, models
from torchvision.models import resnet50
from itertools import product
import numpy as np
import os, ssl, zipfile, urllib
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, SequentialLR
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from tqdm import tqdm
import ssl
import time

In [3]:

TARGET_GPU_INDEX = 1

if torch.cuda.is_available():
    if TARGET_GPU_INDEX < torch.cuda.device_count():
        DEVICE = torch.device(f"cuda:{TARGET_GPU_INDEX}")
        print(f"Successfully set to use GPU: {TARGET_GPU_INDEX} ({torch.cuda.get_device_name(TARGET_GPU_INDEX)})")
    else:
        print(f"Error: Physical GPU {TARGET_GPU_INDEX} is not available. There are only {torch.cuda.device_count()} GPUs (0 to {torch.cuda.device_count() - 1}).")
        print("Falling back to CPU.")
        DEVICE = torch.device("CPU")
else:
    print("CUDA is not available. Falling back to CPU.")
    DEVICE = torch.device("CPU")

print(f"Final DEVICE variable is set to: {DEVICE}")
if DEVICE.type == 'cuda':
    print(f"Current PyTorch default device: {torch.cuda.current_device()}")
    torch.cuda.set_device(TARGET_GPU_INDEX)
    print(f"Current PyTorch default device (after set_device): {torch.cuda.current_device()}")


dummy_tensor = torch.randn(2, 2)
dummy_tensor_on_gpu = dummy_tensor.to(DEVICE)
print(f"Dummy tensor is on device: {dummy_tensor_on_gpu.device}")

Successfully set to use GPU: 1 (Quadro RTX 5000)
Final DEVICE variable is set to: cuda:1
Current PyTorch default device: 0
Current PyTorch default device (after set_device): 1
Dummy tensor is on device: cuda:1


In [4]:

LOCAL_OR_COLAB = "LOCAL"
SEED           = 42
NUM_EPOCHS     = 34

TRAIN_FRAC = 0.8
VAL_FRAC   = 0.1
TEST_FRAC  = 0.1

# hyperparameter grid
# BATCH_SIZES = [64, 128, 256]
BATCH_SIZES = [32]  # Using a single batch size for simplicity
LRS = [1e-4, 3e-4]

GRID        = [
    (3.75e-4, 0.5  ),
]

WEIGHT_DECAY = 0.5

BETAS=(0.9,0.98)
EPS = 1e-8

if LOCAL_OR_COLAB == "LOCAL":
    DATA_DIR = "/users/c/carvalhj/datasets/EuroSAT_RGB/"
else:
    data_root = "/content/EuroSAT_RGB"
    zip_path  = "/content/EuroSAT.zip"
    if not os.path.exists(data_root):
        ssl._create_default_https_context = ssl._create_unverified_context
        urllib.request.urlretrieve(
            "https://madm.dfki.de/files/sentinel/EuroSAT.zip", zip_path
        )
        with zipfile.ZipFile(zip_path, "r") as z:
            z.extractall("/content")
        os.rename("/content/2750", data_root)
    DATA_DIR = data_root

NUM_WORKERS = 4 


In [5]:

def compute_mean_std(dataset, batch_size):
    loader = DataLoader(dataset, batch_size, shuffle=False, num_workers=2)
    mean = 0.0
    std = 0.0
    n_samples = 0

    for data, _ in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)  # (B, C, H*W)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        n_samples += batch_samples

    mean /= n_samples
    std /= n_samples
    return mean.tolist(), std.tolist()

def get_data_loaders(data_dir, batch_size):

    base_tf = transforms.ToTensor()
    ds_all = datasets.ImageFolder(root=data_dir, transform=base_tf)
    labels = np.array(ds_all.targets)   # numpy array of shape (N,)
    num_classes = len(ds_all.classes)
    total_count = len(ds_all)
    print(f"Total samples in folder: {total_count}, classes: {ds_all.classes}")

    train_idx, val_idx, test_idx = get_split_indexes(labels, total_count)

    train_subset_for_stats = Subset(ds_all, train_idx)
    mean, std = compute_mean_std(train_subset_for_stats, batch_size)
    print(f"Computed mean: {mean}")
    print(f"Computed std:  {std}")

    tf_final = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

    #  full ImageFolder but now with normalization baked in
    ds_all_norm = datasets.ImageFolder(root=data_dir, transform=tf_final)

    train_ds = Subset(ds_all_norm, train_idx)
    val_ds   = Subset(ds_all_norm, val_idx)
    test_ds  = Subset(ds_all_norm, test_idx)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=NUM_WORKERS, generator=torch.Generator().manual_seed(SEED))
    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=NUM_WORKERS, generator=torch.Generator().manual_seed(SEED))
    test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=NUM_WORKERS, generator=torch.Generator().manual_seed(SEED))

    print(f"Train/Val/Test splits: {len(train_ds)}/{len(val_ds)}/{len(test_ds)}")

    return train_loader, val_loader, test_loader, num_classes

def get_proportion(num_classes, dataset):
    return np.bincount(np.array(dataset.dataset.targets)[dataset.indices], minlength=num_classes) / len(dataset)

def get_split_indexes(labels, total_count):
    n_train = int(np.floor(TRAIN_FRAC * total_count))
    n_temp = total_count - n_train   # this is val + test

    sss1 = StratifiedShuffleSplit(
        n_splits=1,
        train_size=n_train,
        test_size=n_temp,
        random_state=SEED
    )
    # Train and temp(val+test) indices
    train_idx, temp_idx = next(sss1.split(np.zeros(total_count), labels))

    n_val = int(np.floor(VAL_FRAC * total_count))
    n_test = total_count - n_train - n_val
    assert n_temp == n_val + n_test, "Fractions must sum to 1."

    labels_temp = labels[temp_idx]

    sss2 = StratifiedShuffleSplit(
        n_splits=1,
        train_size=n_val,
        test_size=n_test,
        random_state=SEED
    )
    val_idx_in_temp, test_idx_in_temp = next(sss2.split(np.zeros(len(temp_idx)), labels_temp))

    val_idx = temp_idx[val_idx_in_temp]
    test_idx = temp_idx[test_idx_in_temp]

    assert len(train_idx) == n_train
    assert len(val_idx) == n_val
    assert len(test_idx) == n_test

    print(f"Stratified split sizes: train={len(train_idx)}, val={len(val_idx)}, test={len(test_idx)}")
    return train_idx,val_idx,test_idx



# Logistic regresssion with Scikit-learn for comparing linear probing

In [19]:
BATCH_SIZE = BATCH_SIZES[0]
LEARNING_RATE, WEIGHT_DECAY = GRID[0]

In [None]:
def get_common_feature_extractor_model():
    """
    Initializes a ResNet50 backbone with random weights, freezes it,
    and prepares it for feature extraction.
    """
    base_model = models.resnet50(weights=None) # No pre-trained ImageNet weights
    feature_extractor = nn.Sequential(*list(base_model.children())[:-1]) # Exclude the last fc layer
    for param in feature_extractor.parameters():
        param.requires_grad = False
    feature_extractor.eval() 
    feature_extractor.to(DEVICE)
    print("Common ResNet50 feature extractor (randomly initialized and frozen) created.")
    return feature_extractor

def extract_features(dataloader, model):
    """Extracts features and labels from a DataLoader using the provided model."""
    all_features = []
    all_labels = []
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Extracting features"):
            images = images.to(DEVICE)
            features = model(images)
            features = features.squeeze(-1).squeeze(-1) # Flatten (batch_size, 2048, 1, 1) to (batch_size, 2048)
            all_features.append(features.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
    return np.vstack(all_features), np.concatenate(all_labels)

class PyTorchLinearProbingModel(nn.Module):
    """
    A PyTorch model that uses a shared, frozen feature extractor
    and a trainable linear classification head.
    """
    def __init__(self, shared_feature_extractor, num_classes):
        super().__init__()
        self.backbone = shared_feature_extractor # Use the externally created and frozen feature extractor
        self.backbone.eval()
        for param in self.backbone.parameters():
            param.requires_grad = False

        feature_dim = 2048 
        self.linear_head = nn.Linear(feature_dim, num_classes)
        self.linear_head.to(DEVICE)

    def forward(self, x):
        with torch.no_grad(): # Ensure no gradients for the frozen backbone
            features = self.backbone(x)
            features = features.squeeze(-1).squeeze(-1)
        logits = self.linear_head(features)
        return logits

def train_epoch(model, dataloader, criterion, optimizer, epoch, total_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{total_epochs} (Train)")
    for inputs, labels in pbar:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()
        pbar.set_postfix(loss=f"{loss.item():.4f}", acc=f"{correct_predictions/total_samples:.4f}")
    return running_loss / total_samples, correct_predictions / total_samples

def evaluate_test_set_pytorch(model, dataloader, num_classes):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating Test Set (PyTorch)"):
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    print("\n--- Test Set Performance (PyTorch Linear Probing) ---")
    print(classification_report(all_labels, all_preds, target_names=[f'class_{i}' for i in range(num_classes)]))
    test_accuracy = accuracy_score(all_labels, all_preds)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    print("Confusion Matrix:\n", confusion_matrix(all_labels, all_preds))
    return test_accuracy

if __name__ == "__main__":
    start_time_total = time.time()

    train_loader, val_loader, test_loader, num_classes = get_data_loaders(DATA_DIR, BATCH_SIZE)

    common_feature_extractor = get_common_feature_extractor_model()

    print("\n--- Running Logistic Regression (sklearn) ---")

    X_train_features, y_train_labels = extract_features(train_loader, common_feature_extractor)
    X_val_features, y_val_labels = extract_features(val_loader, common_feature_extractor)
    X_test_features, y_test_labels = extract_features(test_loader, common_feature_extractor)

    print(f"Train features shape: {X_train_features.shape}, labels shape: {y_train_labels.shape}")
    print(f"Validation features shape: {X_val_features.shape}, labels shape: {y_val_labels.shape}")
    print(f"Test features shape: {X_test_features.shape}, labels shape: {y_test_labels.shape}")

    param_grid = {
        'C': [0.1, 1.0],
        'solver': ['lbfgs'],
        'max_iter': [1000]
    }

    logistic_classifier = LogisticRegression(random_state=SEED)
    grid_search = GridSearchCV(
        logistic_classifier,
        param_grid,
        cv=3,
        scoring='accuracy',
        n_jobs=-1,
        verbose=0 # Suppress verbose output for cleaner run
    )
    grid_search.fit(X_train_features, y_train_labels)

    print(f"\nBest parameters found for Logistic Regression: {grid_search.best_params_}")
    print(f"Best cross-validation accuracy for Logistic Regression: {grid_search.best_score_:.4f}")

    best_logistic_model = grid_search.best_estimator_

    y_pred_val_lr = best_logistic_model.predict(X_val_features)
    val_accuracy_lr = accuracy_score(y_val_labels, y_pred_val_lr)
    print(f"Logistic Regression Validation Accuracy: {val_accuracy_lr:.4f}")

    y_pred_test_lr = best_logistic_model.predict(X_test_features)
    test_accuracy_lr = accuracy_score(y_test_labels, y_pred_test_lr)
    print(f"Logistic Regression Test Accuracy: {test_accuracy_lr:.4f}")

    print("\n--- Running PyTorch Linear Probing ---")

    pytorch_model = PyTorchLinearProbingModel(common_feature_extractor, num_classes=num_classes)
    optimizer = optim.Adam(pytorch_model.linear_head.parameters(), lr=LEARNING_RATE)
    criterion = nn.CrossEntropyLoss()

    combined_train_val_dataset = ConcatDataset([train_loader.dataset, val_loader.dataset])
    combined_train_val_loader = DataLoader(combined_train_val_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=NUM_WORKERS,
                                           generator=torch.Generator().manual_seed(SEED),
                                           pin_memory=True)
    
    print(f"Starting PyTorch Linear Probing Training for {NUM_EPOCHS} epochs on combined Train+Val set.")
    for epoch in range(NUM_EPOCHS):
        train_loss, train_acc = train_epoch(pytorch_model, combined_train_val_loader, criterion, optimizer, epoch, NUM_EPOCHS)
        print(f"Epoch {epoch+1}/{NUM_EPOCHS} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    
    test_accuracy_pytorch = evaluate_test_set_pytorch(pytorch_model, test_loader, num_classes)

    end_time_total = time.time()
    print(f"\nTotal script runtime: {end_time_total - start_time_total:.2f} seconds.")

    # --- Final Comparison ---
    print("\n--- Final Comparison (Randomly Initialized & Frozen ResNet50 Features) ---")
    print(f"Logistic Regression Test Accuracy: {test_accuracy_lr:.4f}")
    print(f"PyTorch Linear Probing Test Accuracy: {test_accuracy_pytorch:.4f}")
    print("Expected: Both accuracies should be low (e.g., 10-20%) as they are based on randomly initialized, uninformative features. " \
    "PyTorch might be slightly higher due to Adam optimizer and more epochs, but fundamentally limited by the features.")

Script started at: Wed Jun 11 19:40:17 2025

--- Setting up DataLoaders (Train / Val / Test) ---
Total samples in folder: 27000, classes: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
Stratified split sizes: train=21600, val=2700, test=2700


Computing Mean/Std: 100%|██████████| 675/675 [00:03<00:00, 190.96it/s]


Computed mean: [0.3441457152366638, 0.38009852170944214, 0.40766340494155884]
Computed std:  [0.09299741685390472, 0.06464488059282303, 0.054139144718647]
Train/Val/Test splits: 21600/2700/2700

--- Setting up Common Feature Extractor (Randomly Initialized & Frozen) ---
Common ResNet50 feature extractor (randomly initialized and frozen) created.

--- Running Logistic Regression (sklearn) ---


Extracting features: 100%|██████████| 675/675 [00:34<00:00, 19.59it/s]
Extracting features: 100%|██████████| 85/85 [00:04<00:00, 18.71it/s]
Extracting features: 100%|██████████| 85/85 [00:04<00:00, 18.73it/s]

Train features shape: (21600, 2048), labels shape: (21600,)
Validation features shape: (2700, 2048), labels shape: (2700,)
Test features shape: (2700, 2048), labels shape: (2700,)



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver op


Best parameters found for Logistic Regression: {'C': 0.1, 'max_iter': 1000, 'solver': 'lbfgs'}
Best cross-validation accuracy for Logistic Regression: 0.7134
Logistic Regression Validation Accuracy: 0.7078
Logistic Regression Test Accuracy: 0.7122

--- Running PyTorch Linear Probing ---
Starting PyTorch Linear Probing Training for 10 epochs on combined Train+Val set.


Epoch 1/10 (Train): 100%|██████████| 760/760 [00:41<00:00, 18.11it/s, acc=0.3582, loss=2.2528]


Epoch 1/10 - Train Loss: 1.8052, Train Acc: 0.3582


Epoch 2/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.97it/s, acc=0.4563, loss=1.4161]


Epoch 2/10 - Train Loss: 1.5845, Train Acc: 0.4563


Epoch 3/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.96it/s, acc=0.4863, loss=1.4693]


Epoch 3/10 - Train Loss: 1.4871, Train Acc: 0.4863


Epoch 4/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.90it/s, acc=0.5041, loss=1.9950]


Epoch 4/10 - Train Loss: 1.4269, Train Acc: 0.5041


Epoch 5/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.91it/s, acc=0.5183, loss=1.3394]


Epoch 5/10 - Train Loss: 1.3808, Train Acc: 0.5183


Epoch 6/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.96it/s, acc=0.5310, loss=1.1902]


Epoch 6/10 - Train Loss: 1.3449, Train Acc: 0.5310


Epoch 7/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.96it/s, acc=0.5366, loss=1.1155]


Epoch 7/10 - Train Loss: 1.3283, Train Acc: 0.5366


Epoch 8/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.91it/s, acc=0.5453, loss=0.8521]


Epoch 8/10 - Train Loss: 1.3045, Train Acc: 0.5453


Epoch 9/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.94it/s, acc=0.5536, loss=1.3915]


Epoch 9/10 - Train Loss: 1.2826, Train Acc: 0.5536


Epoch 10/10 (Train): 100%|██████████| 760/760 [00:42<00:00, 17.95it/s, acc=0.5581, loss=1.4818]


Epoch 10/10 - Train Loss: 1.2695, Train Acc: 0.5581


Evaluating Test Set (PyTorch): 100%|██████████| 85/85 [00:04<00:00, 18.64it/s]


--- Test Set Performance (PyTorch Linear Probing) ---
              precision    recall  f1-score   support

     class_0       0.44      0.80      0.57       300
     class_1       0.54      0.91      0.67       300
     class_2       0.66      0.40      0.50       300
     class_3       0.25      0.02      0.04       250
     class_4       0.84      0.80      0.82       250
     class_5       0.64      0.73      0.69       200
     class_6       0.55      0.35      0.43       250
     class_7       0.56      0.69      0.61       300
     class_8       0.47      0.29      0.36       250
     class_9       0.75      0.72      0.73       300

    accuracy                           0.58      2700
   macro avg       0.57      0.57      0.54      2700
weighted avg       0.57      0.58      0.55      2700

Test Accuracy: 0.5811
Confusion Matrix:
 [[240   8   4   1   0   8  19   9   2   9]
 [  0 272   2   0   0  18   0   0   2   6]
 [ 72  36 121   3   5   6  18  20   9  10]
 [ 73  51   8   


