In [1]:
import torch
import torchvision
print(torch.__version__)
print(torchvision.__version__)

2.5.1
0.20.1


In [2]:
import wandb
wandb.login()  # Opens a browser once to authenticate
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, ConcatDataset
from torchvision import datasets, transforms
from torchvision.models import resnet50
from itertools import product
import numpy as np
import random
import copy
import os, ssl, zipfile, urllib
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, SequentialLR
from sklearn.metrics import confusion_matrix
import seaborn as sns
import torch.optim as optim
from torch.optim.lr_scheduler import LinearLR, SequentialLR, MultiStepLR
from torch.utils.data import ConcatDataset, DataLoader


[34m[1mwandb[0m: Currently logged in as: [33manaliju[0m ([33manaliju-paris[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:

LOCAL_OR_COLAB = "LOCAL"
SEED           = 42
NUM_EPOCHS     = 34
DEVICE         = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRAIN_FRAC = 0.8
VAL_FRAC   = 0.1
TEST_FRAC  = 0.1

BATCH_SIZES = [128]  
LRS = [1e-4, 3e-4]

GRID = product(
    [0.1, 0.01],    # learning rate
    [0.01, 0.0001]  # weight decay
)

TRAINING_SCHEDULES = {
    "short": {"p": [750, 1500, 2500], "w": 200, "unit": "steps"},
    "medium": {"p": [3000, 6000], "w": 500, "unit": "steps"},
    "long": {"p": [30, 60], "w": 5, "unit": "epochs"}
}


if LOCAL_OR_COLAB == "LOCAL":
    DATA_DIR = "/share/DEEPLEARNING/carvalhj/EuroSAT_RGB/"
else:
    data_root = "/content/EuroSAT_RGB"
    zip_path  = "/content/EuroSAT.zip"
    if not os.path.exists(data_root):
        ssl._create_default_https_context = ssl._create_unverified_context
        urllib.request.urlretrieve(
            "https://madm.dfki.de/files/sentinel/EuroSAT.zip", zip_path
        )
        with zipfile.ZipFile(zip_path, "r") as z:
            z.extractall("/content")
        os.rename("/content/2750", data_root)
    DATA_DIR = data_root

NUM_WORKERS = 4 

In [4]:
# Use GPU1
TARGET_GPU_INDEX = 1

if torch.cuda.is_available():
    if TARGET_GPU_INDEX < torch.cuda.device_count():
        DEVICE = torch.device(f"cuda:{TARGET_GPU_INDEX}")
        print(f"Successfully set to use GPU: {TARGET_GPU_INDEX} ({torch.cuda.get_device_name(TARGET_GPU_INDEX)})")
    else:
        print(f"Error: Physical GPU {TARGET_GPU_INDEX} is not available. There are only {torch.cuda.device_count()} GPUs (0 to {torch.cuda.device_count() - 1}).")
        print("Falling back to CPU.")
        DEVICE = torch.device("CPU")
else:
    print("CUDA is not available. Falling back to CPU.")
    DEVICE = torch.device("CPU")

print(f"Final DEVICE variable is set to: {DEVICE}")
if DEVICE.type == 'cuda':
    print(f"Current PyTorch default device: {torch.cuda.current_device()}")

    torch.cuda.set_device(TARGET_GPU_INDEX)
    print(f"Current PyTorch default device (after set_device): {torch.cuda.current_device()}")

dummy_tensor = torch.randn(2, 2)
dummy_tensor_on_gpu = dummy_tensor.to(DEVICE)
print(f"Dummy tensor is on device: {dummy_tensor_on_gpu.device}")

Successfully set to use GPU: 1 (Quadro RTX 6000)
Final DEVICE variable is set to: cuda:1
Current PyTorch default device: 0
Current PyTorch default device (after set_device): 1
Dummy tensor is on device: cuda:1


In [5]:

def compute_mean_std(dataset, batch_size):
    loader = DataLoader(dataset, batch_size, shuffle=False, num_workers=NUM_WORKERS)
    mean = 0.0
    std = 0.0
    n_samples = 0

    for data, _ in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)  # (B, C, H*W)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        n_samples += batch_samples

    mean /= n_samples
    std /= n_samples
    return mean.tolist(), std.tolist()

def get_split_indexes(labels, total_count):
    indices = np.arange(total_count)
    np.random.seed(SEED) # for reproducibility
    np.random.shuffle(indices)

    train_split = int(0.8 * total_count)
    val_split = int(0.9 * total_count)

    train_idx = indices[:train_split]
    val_idx = indices[train_split:val_split]
    test_idx = indices[val_split:]
    return train_idx, val_idx, test_idx

def get_data_loaders(data_dir, batch_size):

    base_tf = transforms.ToTensor()
    ds_all = datasets.ImageFolder(root=data_dir, transform=base_tf)
    labels = np.array(ds_all.targets)
    num_classes = len(ds_all.classes)
    total_count = len(ds_all)
    print(f"Total samples in folder: {total_count}, classes: {ds_all.classes}")

    train_idx, val_idx, test_idx = get_split_indexes(labels, total_count)

    train_subset_for_stats = Subset(ds_all, train_idx)
    mean, std = compute_mean_std(train_subset_for_stats, batch_size)
    print(f"Computed mean: {mean}")
    print(f"Computed std:  {std}")



    train_transform_augmented = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomApply([transforms.RandomRotation(angle) for angle in [0, 90, 180, 270]], p=1.0), # Apply one of 0, 90, 180, 270 rotations
        transforms.RandomHorizontalFlip(p=0.5), # Randomly apply horizontal flip (50% chance)
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])


    eval_transform = transforms.Compose([
        transforms.Resize(256), # Resize to 256x256
        transforms.CenterCrop(224), # Perform a central crop of 224x224
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

    # Create datasets with the respective transformations
    train_ds = datasets.ImageFolder(root=data_dir, transform=train_transform_augmented)
    val_ds = datasets.ImageFolder(root=data_dir, transform=eval_transform)
    test_ds = datasets.ImageFolder(root=data_dir, transform=eval_transform)

    # Apply subsets to the transformed datasets
    train_ds_subset = Subset(train_ds, train_idx)
    val_ds_subset = Subset(val_ds, val_idx)
    test_ds_subset = Subset(test_ds, test_idx)

    # Create DataLoaders
    train_loader = DataLoader(train_ds_subset, batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, generator=torch.Generator().manual_seed(SEED))
    val_loader   = DataLoader(val_ds_subset, batch_size=batch_size, shuffle=False, num_workers=NUM_WORKERS, generator=torch.Generator().manual_seed(SEED))
    test_loader  = DataLoader(test_ds_subset, batch_size=batch_size, shuffle=False, num_workers=NUM_WORKERS, generator=torch.Generator().manual_seed(SEED))

    print(f"Train/Val/Test splits: {len(train_ds_subset)}/{len(val_ds_subset)}/{len(test_ds_subset)}")

    return train_loader, val_loader, test_loader, num_classes

In [None]:
def build_lr_scheduler(optimizer, total_training_steps, schedule_cfg, steps_per_epoch):

    warmup_iters = schedule_cfg["w"]
    milestones = [] # Points at which LR drops

    if schedule_cfg["unit"] == "steps":
        milestones = schedule_cfg["p"]
    elif schedule_cfg["unit"] == "epochs":
        # Convert epoch milestones to step milestones
        milestones = [m * steps_per_epoch for m in schedule_cfg["p"]]
        warmup_iters = schedule_cfg["w"] * steps_per_epoch # Convert warmup epochs to steps

    # Linear warm-up scheduler
    warmup_scheduler = LinearLR(optimizer, start_factor=1e-6, end_factor=1.0, total_iters=warmup_iters)

    decay_scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1)

    scheduler = SequentialLR(
        optimizer,
        schedulers=[warmup_scheduler, decay_scheduler],
        milestones=[warmup_iters]
    )
    return scheduler

def hyperparam_search(pretrained=True):
    best_val = -1.0
    best_cfg = None
    best_model = None

    for bs, (lr, wd), schedule_name in product(BATCH_SIZES, GRID, TRAINING_SCHEDULES.keys()):

        print(f"\n>>> Testing BS={bs}, LR={lr:.1e}, WD={wd:.1e}, Schedule={schedule_name}")

        tr_dl, val_dl, te_dl, n_cls = get_data_loaders(DATA_DIR, bs) 


        steps_per_epoch = len(tr_dl)

        schedule_cfg = TRAINING_SCHEDULES[schedule_name]

        if schedule_cfg["unit"] == "steps":

            total_steps = max(schedule_cfg["p"]) 
            NUM_EPOCHS_FOR_RUN = int(np.ceil(total_steps / steps_per_epoch)) + 1 # Add a buffer epoch
        else: # epochs
            total_epochs_from_schedule = max(schedule_cfg["p"]) + schedule_cfg["w"] # max 'p' + warmup epochs
            NUM_EPOCHS_FOR_RUN = total_epochs_from_schedule # Total epochs to run
            total_steps = NUM_EPOCHS_FOR_RUN * steps_per_epoch


        model = build_model(n_cls, pretrained=pretrained)
        model.to(DEVICE) 

        opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
        crit = nn.CrossEntropyLoss()

        sched = build_lr_scheduler(opt, total_steps, schedule_cfg, steps_per_epoch)

        wandb_run = wandb.init(
            project="eurosat-supervised-scratch-grid-search-lrsched",
            name=f"BS{bs}_LR{lr:.0e}_WD{wd:.0e}_Sched_{schedule_name}",
            config={
                "batch_size": bs,
                "learning_rate": lr,
                "weight_decay": wd,
                "schedule_name": schedule_name,
                "total_epochs_for_run": NUM_EPOCHS_FOR_RUN,
                "pretrained": pretrained,
                "optimizer": "SGD_momentum_0.9",
                "scheduler_type": "LinearWarmup_MultiStepLR",
                "warmup_steps_or_epochs": schedule_cfg["w"],
                "decay_milestones": schedule_cfg["p"],
                "decay_unit": schedule_cfg["unit"]
            }
        )

        for ep in range(NUM_EPOCHS_FOR_RUN):
            tr_loss, tr_acc = train_one_epoch(model, tr_dl, opt, crit, sched, DEVICE) 
            model.eval()
            val_loss, corr, tot = 0.0, 0, 0
            with torch.no_grad():
                for xb, yb in val_dl:
                    xb, yb = xb.to(DEVICE), yb.to(DEVICE)
                    logits = model(xb)
                    loss = crit(logits, yb)
                    val_loss += loss.item()
                    preds = logits.argmax(dim=1)
                    corr += (preds == yb).sum().item()
                    tot  += yb.size(0)
            val_loss /= len(val_dl)
            val_acc = 100.0 * corr / tot

            print(f"  Ep{ep+1}/{NUM_EPOCHS_FOR_RUN}: train_acc={tr_acc:.1f}%  train_loss={tr_loss:.4f}, "
                  f"val_acc={val_acc:.1f}%, val_loss={val_loss:.4f}")

            wandb.log({
                "epoch":       ep + 1,
                "train_loss":  tr_loss,
                "train_acc":   tr_acc,
                "val_loss":    val_loss,
                "val_acc":     val_acc,
                "learning_rate": opt.param_groups[0]['lr'] 
            })

        wandb_run.finish()

        # Only use val_acc to pick best
        if val_acc > best_val:
            best_val   = val_acc
            best_cfg   = (bs, lr, wd, schedule_name)
            best_model = copy.deepcopy(model)

    print(f"\n>>> Best config: BS={best_cfg[0]}, LR={best_cfg[1]:.1e}, WD={best_cfg[2]:.1e}, Schedule={best_cfg[3]}, val_acc={best_val:.1f}%")

    return best_cfg, best_model

def train_one_epoch(model, dataloader, optimizer, criterion, scheduler, device):
    model.train()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for batch_idx, (inputs, labels) in enumerate(dataloader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item() * inputs.size(0) # Accumulate weighted by batch size
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    avg_loss = total_loss / total_samples
    accuracy = 100 * correct_predictions / total_samples
    return avg_loss, accuracy

In [7]:

def get_proportion(num_classes, dataset):
    return np.bincount(np.array(dataset.dataset.targets)[dataset.indices], minlength=num_classes) / len(dataset)

def get_split_indexes(labels, total_count):
    n_train = int(np.floor(TRAIN_FRAC * total_count))
    n_temp = total_count - n_train   

    sss1 = StratifiedShuffleSplit(
        n_splits=1,
        train_size=n_train,
        test_size=n_temp,
        random_state=SEED
    )
    # Train and temp(val+test) indices
    train_idx, temp_idx = next(sss1.split(np.zeros(total_count), labels))

    n_val = int(np.floor(VAL_FRAC * total_count))
    n_test = total_count - n_train - n_val
    assert n_temp == n_val + n_test, "Fractions must sum to 1."

    labels_temp = labels[temp_idx]

    sss2 = StratifiedShuffleSplit(
        n_splits=1,
        train_size=n_val,
        test_size=n_test,
        random_state=SEED
    )
    val_idx_in_temp, test_idx_in_temp = next(sss2.split(np.zeros(len(temp_idx)), labels_temp))

    val_idx = temp_idx[val_idx_in_temp]
    test_idx = temp_idx[test_idx_in_temp]

    assert len(train_idx) == n_train
    assert len(val_idx) == n_val
    assert len(test_idx) == n_test

    print(f"Stratified split sizes: train={len(train_idx)}, val={len(val_idx)}, test={len(test_idx)}")
    return train_idx,val_idx,test_idx



In [8]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark     = False

def build_model(n_cls, pretrained=False):
    m = resnet50(weights=None if not pretrained else "DEFAULT")
    m.fc = nn.Linear(m.fc.in_features, n_cls)
    return m.to(DEVICE)

def evaluate(model, loader, num_classes):
    model.eval()

    total_correct = 0
    total_samples = 0

    correct_per_class = torch.zeros(num_classes, dtype=torch.int64)
    total_per_class   = torch.zeros(num_classes, dtype=torch.int64)

    all_labels = []
    all_preds  = []

    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            preds  = logits.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(yb.cpu().numpy())

            total_correct += (preds == yb).sum().item()
            total_samples += yb.size(0)

            for c in range(num_classes):
                # mask of samples in this batch whose true label == c
                class_mask = (yb == c)
                if class_mask.sum().item() == 0:
                    continue

                total_per_class[c] += class_mask.sum().item()

                correct_per_class[c] += ((preds == yb) & class_mask).sum().item()

    overall_acc = 100.0 * total_correct / total_samples

    acc_per_class = {}
    for c in range(num_classes):
        if total_per_class[c].item() > 0:
            acc = 100.0 * correct_per_class[c].item() / total_per_class[c].item()
        else:
            acc = 0.0
        acc_per_class[c] = acc

    return overall_acc, acc_per_class, all_labels, all_preds

def plot_confusion_matrix_from_preds(y_true, y_pred, class_names):
    cm = confusion_matrix(y_true, y_pred)
    cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
    
    plt.figure(figsize=(8, 8))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.colorbar()
    
    ticks = np.arange(len(class_names))
    plt.xticks(ticks, class_names, rotation=90)
    plt.yticks(ticks, class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    
    # threshold for text color
    thresh = cm.max() / 2.0
    for i in range(len(class_names)):
        for j in range(len(class_names)):
            pct = cm_norm[i, j] * 100
            plt.text(
                j, i,
                f"{cm[i, j]}\n{pct:.1f}%",
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black"
            )
    
    plt.tight_layout()
    plt.show()

def plot_class_acc_prop(te_dl, acc_vals, class_proportions_test):
    classes = te_dl.dataset.dataset.classes
    x = np.arange(len(classes))

    acc   = acc_vals
    prop  = class_proportions_test * 100

    fig, ax1 = plt.subplots(figsize=(12,6))
    bars = ax1.bar(x, acc, color='C0', alpha=0.7)
    ax1.set_ylabel('Accuracy (%)', color='C0')
    ax1.set_ylim(0, 100)
    ax1.tick_params(axis='y', labelcolor='C0')

    for bar in bars:
        h = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2, h + 1, f'{h:.1f}%', ha='center', va='bottom', color='C0')

    ax2 = ax1.twinx()
    line = ax2.plot(x, prop, color='C1', marker='o', linewidth=2)
    ax2.set_ylabel('Test Proportion (%)', color='C1')
    ax2.set_ylim(0, max(prop)*1.2)
    ax2.tick_params(axis='y', labelcolor='C1')

    for xi, yi in zip(x, prop):
        ax2.text(xi, yi + max(prop)*0.02, f'{yi:.1f}%', ha='center', va='bottom', color='C1')

    ax1.set_xticks(x)
    ax1.set_xticklabels(classes, rotation=45, ha='right')
    plt.title('Per-class Accuracy vs. Test Proportion')
    plt.tight_layout()
    plt.show()



# Perform Hyperparameter Search, Retrain on Train + Validation Set, Evaluate on Test Set

In [9]:

def make_optimizer_scheduler_reused(params, lr, wd, schedule_name, steps_per_epoch):

    opt = optim.SGD(params, lr=lr, momentum=0.9, weight_decay=wd)
    schedule_cfg = TRAINING_SCHEDULES[schedule_name]


    total_steps_for_scheduler_config = max(schedule_cfg['p']) if schedule_cfg['unit'] == 'steps' else max(schedule_cfg['p']) * steps_per_epoch

    scheduler = build_lr_scheduler(opt, total_steps_for_scheduler_config, schedule_cfg, steps_per_epoch)
    return opt, scheduler



def retrain_final_model(tr_dl, val_dl, n_cls, bs, lr, wd, schedule_name): 

    print("\n>>> Retraining final model on TRAIN+VAL combined with best hyperparameters")
    combined_ds = ConcatDataset([tr_dl.dataset, val_dl.dataset])


    combined_dl = DataLoader(combined_ds, batch_size=bs, shuffle=True, num_workers=4) 

    model = build_model(n_cls, pretrained=False)
    model.to(DEVICE)

    steps_per_epoch = len(combined_dl)
    schedule_cfg = TRAINING_SCHEDULES[schedule_name]

    if schedule_cfg["unit"] == "steps":
        total_steps_for_run = max(schedule_cfg["p"]) 
        num_epochs_for_run = int(np.ceil(total_steps_for_run / steps_per_epoch)) + 1
    else: # epochs
        num_epochs_for_run = max(schedule_cfg["p"]) + schedule_cfg["w"]


    optimizer, scheduler = make_optimizer_scheduler_reused( 
        model.parameters(), lr, wd, schedule_name, steps_per_epoch
    )
    criterion = nn.CrossEntropyLoss()

    for ep in range(num_epochs_for_run): 
        loss, acc = train_one_epoch(model, combined_dl, optimizer, criterion, scheduler, DEVICE) 
        print(f"  Ep {ep+1}/{num_epochs_for_run}: train_acc={acc:.1f}%")
    return model, combined_ds

def evaluate_and_log(final_model, te_dl, combined_ds, n_cls, bs, lr):

    final_test_acc, acc_per_class, y_true, y_pred = evaluate(final_model, te_dl, n_cls)
    plot_confusion_matrix_from_preds(y_true, y_pred, te_dl.dataset.dataset.classes)

    test_targs = np.array(te_dl.dataset.dataset.targets)[te_dl.dataset.indices]
    prop_test = np.bincount(test_targs, minlength=n_cls) / len(test_targs)

    combined_targs = np.concatenate([
        np.array(ds.dataset.targets)[ds.indices] for ds in combined_ds.datasets
    ])
    prop_trainval = np.bincount(combined_targs, minlength=n_cls) / len(combined_targs)

    acc_vals = np.array([acc_per_class[c] for c in range(n_cls)])
    weighted_acc = (acc_vals * prop_test).sum()

    print("\n>>> Final Test Accuracy:")
    print(f"  Overall:             {final_test_acc:5.1f}%")
    print(f"  Weighted class acc.: {weighted_acc:5.1f}%\n")
    hdr = f"{'Class':20s}  {'Acc':>6s}   {'Train+Val':>9s}   {'Test':>6s}"
    print(hdr); print("-"*len(hdr))
    for c, name in enumerate(te_dl.dataset.dataset.classes):
        print(f"{name:20s}  {acc_vals[c]:6.1f}%   {prop_trainval[c]*100:8.0f}%   {prop_test[c]*100:6.0f}%")

    wandb.init(
        project="eurosat-supervised-scratch-final-lrsched",
        name=f"BS{bs}_LR{lr:.0e}_final",
        config={
            "batch_size": bs, "learning_rate": lr, "epochs": NUM_EPOCHS,
            "pretrained": False, "final_retrain": True
        }
    )
    wandb.log({
        "final_test_acc":     final_test_acc,
        "weighted_class_acc": weighted_acc,
        "per_class_acc":      acc_vals
    })
    wandb.finish()

    plot_class_acc_prop(te_dl, acc_vals, prop_test)


In [10]:
# Main
set_seed(SEED)

best_cfg, _    = hyperparam_search(pretrained=False)
bs, lr, wd     = best_cfg
tr_dl, val_dl, te_dl, n_cls = get_data_loaders(DATA_DIR, bs)

# Retrain on TRAIN+VAL
final_model, combined_ds = retrain_final_model(tr_dl, val_dl, n_cls, bs, lr, wd, NUM_EPOCHS)

evaluate_and_log(final_model, te_dl, combined_ds, n_cls, bs, lr)

final_path = f"models/eurosat_supervised_final_bs{bs}_lr{lr:.0e}_epcs{NUM_EPOCHS}.pth"
torch.save(final_model.state_dict(), final_path)
print(f"Final model saved to {final_path}")



>>> Testing BS=128, LR=1.0e-01, WD=1.0e-02, Schedule=short
Total samples in folder: 27000, classes: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
Stratified split sizes: train=21600, val=2700, test=2700


Computed mean: [0.3441525995731354, 0.3800968527793884, 0.407656192779541]
Computed std:  [0.09124630689620972, 0.06498812139034271, 0.055154334753751755]
Train/Val/Test splits: 21600/2700/2700


  Ep1/16: train_acc=43.8%  train_loss=1.6966, val_acc=15.7%, val_loss=3.6202




  Ep2/16: train_acc=54.1%  train_loss=1.2968, val_acc=12.7%, val_loss=2.7988
  Ep3/16: train_acc=57.0%  train_loss=1.1932, val_acc=21.9%, val_loss=4.1103
  Ep4/16: train_acc=58.3%  train_loss=1.1581, val_acc=21.4%, val_loss=4.5286
  Ep5/16: train_acc=60.4%  train_loss=1.1229, val_acc=40.9%, val_loss=1.9583
  Ep6/16: train_acc=65.4%  train_loss=0.9754, val_acc=62.2%, val_loss=0.9772
  Ep7/16: train_acc=74.5%  train_loss=0.7195, val_acc=71.5%, val_loss=0.7592
  Ep8/16: train_acc=76.5%  train_loss=0.6662, val_acc=72.2%, val_loss=0.7984
  Ep9/16: train_acc=78.1%  train_loss=0.6280, val_acc=70.8%, val_loss=0.8392
  Ep10/16: train_acc=79.3%  train_loss=0.6079, val_acc=62.9%, val_loss=1.0469
  Ep11/16: train_acc=83.8%  train_loss=0.4910, val_acc=84.7%, val_loss=0.4467
  Ep12/16: train_acc=85.1%  train_loss=0.4571, val_acc=86.3%, val_loss=0.4231
  Ep13/16: train_acc=85.5%  train_loss=0.4421, val_acc=84.9%, val_loss=0.4422
  Ep14/16: train_acc=86.4%  train_loss=0.4238, val_acc=87.5%, val_loss=0

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


  Ep16/16: train_acc=87.3%  train_loss=0.4001, val_acc=89.2%, val_loss=0.3520


0,1
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
learning_rate,▇████▂▂▂▂▂▁▁▁▁▁▁
train_acc,▁▃▃▃▄▄▆▆▇▇▇█████
train_loss,█▆▅▅▅▄▃▂▂▂▁▁▁▁▁▁
val_acc,▁▁▂▂▄▆▆▆▆▆██████
val_loss,▆▅▇█▄▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,16.0
learning_rate,0.0001
train_acc,87.31481
train_loss,0.40008
val_acc,89.18519
val_loss,0.35203



>>> Testing BS=128, LR=1.0e-01, WD=1.0e-02, Schedule=medium
Total samples in folder: 27000, classes: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
Stratified split sizes: train=21600, val=2700, test=2700
Computed mean: [0.3441525995731354, 0.3800968527793884, 0.407656192779541]
Computed std:  [0.09124630689620972, 0.06498812139034271, 0.055154334753751755]
Train/Val/Test splits: 21600/2700/2700


  Ep1/37: train_acc=44.2%  train_loss=1.6066, val_acc=22.0%, val_loss=6.7116
  Ep2/37: train_acc=59.4%  train_loss=1.1561, val_acc=31.5%, val_loss=2.0007




  Ep3/37: train_acc=60.9%  train_loss=1.1019, val_acc=37.6%, val_loss=1.8959
  Ep4/37: train_acc=59.9%  train_loss=1.1444, val_acc=32.2%, val_loss=2.1462
  Ep5/37: train_acc=65.0%  train_loss=0.9894, val_acc=21.8%, val_loss=4.4077
  Ep6/37: train_acc=69.1%  train_loss=0.8718, val_acc=33.6%, val_loss=2.3775
  Ep7/37: train_acc=66.3%  train_loss=0.9604, val_acc=9.4%, val_loss=7.1151
  Ep8/37: train_acc=65.4%  train_loss=0.9762, val_acc=36.7%, val_loss=2.0710
  Ep9/37: train_acc=59.9%  train_loss=1.1172, val_acc=40.0%, val_loss=1.6335
  Ep10/37: train_acc=58.4%  train_loss=1.1240, val_acc=25.1%, val_loss=3.2034
  Ep11/37: train_acc=63.9%  train_loss=1.0058, val_acc=30.4%, val_loss=3.4481
  Ep12/37: train_acc=63.2%  train_loss=1.0188, val_acc=19.7%, val_loss=6.1237
  Ep13/37: train_acc=58.6%  train_loss=1.1573, val_acc=12.0%, val_loss=4.2154
  Ep14/37: train_acc=56.8%  train_loss=1.1661, val_acc=11.1%, val_loss=6.8875
  Ep15/37: train_acc=50.7%  train_loss=1.2893, val_acc=13.7%, val_loss=4

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


  Ep37/37: train_acc=84.8%  train_loss=0.4711, val_acc=61.2%, val_loss=1.2107


0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
learning_rate,▃▅██████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_acc,▁▄▄▄▅▅▅▅▄▃▄▄▃▃▂▃▃▄▄▃▃▅▆▆▇▇▇▇▇▇▇▇█████
train_loss,█▅▅▅▄▃▄▄▅▅▄▄▅▅▆▆▅▄▅▆▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_acc,▂▃▄▃▂▃▁▄▄▃▃▂▁▁▁▂▁▂▃▂▆▇▇▆▇▅▇█▇█▇██▆▆▇▆
val_loss,▃▁▁▁▂▁▃▁▁▂▂▃▂▃▂▂▃▂▂█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,37.0
learning_rate,0.01
train_acc,84.81944
train_loss,0.47109
val_acc,61.18519
val_loss,1.2107



>>> Testing BS=128, LR=1.0e-01, WD=1.0e-02, Schedule=long
Total samples in folder: 27000, classes: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
Stratified split sizes: train=21600, val=2700, test=2700
Computed mean: [0.3441525995731354, 0.3800968527793884, 0.407656192779541]
Computed std:  [0.09124630689620972, 0.06498812139034271, 0.055154334753751755]
Train/Val/Test splits: 21600/2700/2700


  Ep1/65: train_acc=42.7%  train_loss=1.6083, val_acc=51.0%, val_loss=1.3646
  Ep2/65: train_acc=59.5%  train_loss=1.1921, val_acc=30.9%, val_loss=2.2272
  Ep3/65: train_acc=64.4%  train_loss=1.0205, val_acc=50.2%, val_loss=1.5006
  Ep4/65: train_acc=64.6%  train_loss=1.0106, val_acc=25.6%, val_loss=3.0075




  Ep5/65: train_acc=66.4%  train_loss=0.9625, val_acc=20.1%, val_loss=3.6842
  Ep6/65: train_acc=67.5%  train_loss=0.9321, val_acc=34.0%, val_loss=2.1947
  Ep7/65: train_acc=67.2%  train_loss=0.9362, val_acc=25.5%, val_loss=4.3641
  Ep8/65: train_acc=66.2%  train_loss=0.9685, val_acc=31.7%, val_loss=2.6308
  Ep9/65: train_acc=58.2%  train_loss=1.1181, val_acc=16.6%, val_loss=5.5139
  Ep10/65: train_acc=62.9%  train_loss=1.0355, val_acc=18.1%, val_loss=6.7130
  Ep11/65: train_acc=59.1%  train_loss=1.1391, val_acc=39.2%, val_loss=1.6384
  Ep12/65: train_acc=61.2%  train_loss=1.0826, val_acc=28.0%, val_loss=2.9579
  Ep13/65: train_acc=60.4%  train_loss=1.0905, val_acc=11.9%, val_loss=14.2652
  Ep14/65: train_acc=54.3%  train_loss=1.1970, val_acc=25.4%, val_loss=1.9681
  Ep15/65: train_acc=58.1%  train_loss=1.1376, val_acc=22.3%, val_loss=3.2624
  Ep16/65: train_acc=51.5%  train_loss=1.2640, val_acc=19.1%, val_loss=4.7405
  Ep17/65: train_acc=54.8%  train_loss=1.2473, val_acc=22.3%, val_lo

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


  Ep65/65: train_acc=69.2%  train_loss=0.8709, val_acc=14.2%, val_loss=3.6229


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇██
learning_rate,▃▅▆████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_acc,▃▇▇▇█▆▆▆▅▆▄▄▄▃▂▂▁▂▃▃▃▄▆▆▆▇▇▇██▇█████████
train_loss,█▄▂▃▂▃▄▃▄▄▃▅▅▆▇█▇▆▆▆▆▆▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_acc,▇▄▇▃▄▄▂▂▄▁▃▃▃▁▃▂▂▁▃▂▃▂▆▆▆▇█▆█▆▇▇▆▄▄▄▂▆▇▂
val_loss,▁▁▁▂▁▁▁▃▁▁▁▁▂▁▁▁█▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,65.0
learning_rate,0.001
train_acc,69.25
train_loss,0.87086
val_acc,14.22222
val_loss,3.62286



>>> Testing BS=128, LR=1.0e-01, WD=1.0e-04, Schedule=short
Total samples in folder: 27000, classes: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
Stratified split sizes: train=21600, val=2700, test=2700
Computed mean: [0.3441525995731354, 0.3800968527793884, 0.407656192779541]
Computed std:  [0.09124630689620972, 0.06498812139034271, 0.055154334753751755]
Train/Val/Test splits: 21600/2700/2700


  Ep1/16: train_acc=38.1%  train_loss=2.1847, val_acc=11.1%, val_loss=10448.7089




  Ep2/16: train_acc=45.9%  train_loss=1.8155, val_acc=56.2%, val_loss=1.2262
  Ep3/16: train_acc=59.6%  train_loss=1.1339, val_acc=64.7%, val_loss=1.0732
  Ep4/16: train_acc=67.2%  train_loss=0.9207, val_acc=67.3%, val_loss=0.9351
  Ep5/16: train_acc=72.0%  train_loss=0.7965, val_acc=56.3%, val_loss=1.5961
  Ep6/16: train_acc=77.6%  train_loss=0.6454, val_acc=82.9%, val_loss=0.5079
  Ep7/16: train_acc=80.9%  train_loss=0.5317, val_acc=80.5%, val_loss=0.5841
  Ep8/16: train_acc=81.9%  train_loss=0.5151, val_acc=81.9%, val_loss=0.5437
  Ep9/16: train_acc=82.3%  train_loss=0.4960, val_acc=84.4%, val_loss=0.4567
  Ep10/16: train_acc=82.8%  train_loss=0.4812, val_acc=74.2%, val_loss=1.0151
  Ep11/16: train_acc=83.5%  train_loss=0.4573, val_acc=85.3%, val_loss=0.4324
  Ep12/16: train_acc=84.0%  train_loss=0.4483, val_acc=84.9%, val_loss=0.4347
  Ep13/16: train_acc=84.2%  train_loss=0.4412, val_acc=85.4%, val_loss=0.4299
  Ep14/16: train_acc=84.5%  train_loss=0.4345, val_acc=85.5%, val_loss=0

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


  Ep16/16: train_acc=84.4%  train_loss=0.4346, val_acc=85.7%, val_loss=0.4224


0,1
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
learning_rate,▇████▂▂▂▂▂▁▁▁▁▁▁
train_acc,▁▂▄▅▆▇▇█████████
train_loss,█▇▄▃▂▂▁▁▁▁▁▁▁▁▁▁
val_acc,▁▅▆▆▅████▇██████
val_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,16.0
learning_rate,0.0001
train_acc,84.40741
train_loss,0.43463
val_acc,85.66667
val_loss,0.4224



>>> Testing BS=128, LR=1.0e-01, WD=1.0e-04, Schedule=medium
Total samples in folder: 27000, classes: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
Stratified split sizes: train=21600, val=2700, test=2700
Computed mean: [0.3441525995731354, 0.3800968527793884, 0.407656192779541]
Computed std:  [0.09124630689620972, 0.06498812139034271, 0.055154334753751755]
Train/Val/Test splits: 21600/2700/2700


  Ep1/37: train_acc=44.0%  train_loss=1.7020, val_acc=36.6%, val_loss=9.1951
  Ep2/37: train_acc=61.4%  train_loss=1.2114, val_acc=43.3%, val_loss=2.0115




  Ep3/37: train_acc=66.2%  train_loss=1.0039, val_acc=45.9%, val_loss=1.6876
  Ep4/37: train_acc=73.6%  train_loss=0.7864, val_acc=69.5%, val_loss=1.1702
  Ep5/37: train_acc=76.8%  train_loss=0.6768, val_acc=73.3%, val_loss=0.9381
  Ep6/37: train_acc=80.4%  train_loss=0.5743, val_acc=76.5%, val_loss=0.7067
  Ep7/37: train_acc=82.3%  train_loss=0.5254, val_acc=82.1%, val_loss=0.5694
  Ep8/37: train_acc=83.6%  train_loss=0.4794, val_acc=77.6%, val_loss=0.7421
  Ep9/37: train_acc=84.9%  train_loss=0.4460, val_acc=73.3%, val_loss=1.0088
  Ep10/37: train_acc=85.9%  train_loss=0.4078, val_acc=85.8%, val_loss=0.4161
  Ep11/37: train_acc=87.1%  train_loss=0.3703, val_acc=90.2%, val_loss=0.3453
  Ep12/37: train_acc=87.4%  train_loss=0.3682, val_acc=88.9%, val_loss=0.3472
  Ep13/37: train_acc=88.8%  train_loss=0.3297, val_acc=87.0%, val_loss=0.4397
  Ep14/37: train_acc=90.0%  train_loss=0.2982, val_acc=90.4%, val_loss=0.2903
  Ep15/37: train_acc=90.5%  train_loss=0.2860, val_acc=68.9%, val_loss=

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


  Ep37/37: train_acc=96.9%  train_loss=0.0880, val_acc=96.7%, val_loss=0.1019


0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
learning_rate,▃▅██████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_acc,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████████
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▂▂▅▅▆▆▆▅▇▇▇▇▇▅▇▇▇▇▇█████████████████
val_loss,█▂▂▂▂▁▁▁▂▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,37.0
learning_rate,0.01
train_acc,96.92593
train_loss,0.08799
val_acc,96.66667
val_loss,0.10193



>>> Testing BS=128, LR=1.0e-01, WD=1.0e-04, Schedule=long
Total samples in folder: 27000, classes: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
Stratified split sizes: train=21600, val=2700, test=2700
Computed mean: [0.3441525995731354, 0.3800968527793884, 0.407656192779541]
Computed std:  [0.09124630689620972, 0.06498812139034271, 0.055154334753751755]
Train/Val/Test splits: 21600/2700/2700


  Ep1/65: train_acc=43.3%  train_loss=1.5899, val_acc=51.4%, val_loss=1.6902
  Ep2/65: train_acc=63.0%  train_loss=1.1428, val_acc=52.0%, val_loss=2.1884
  Ep3/65: train_acc=62.8%  train_loss=1.1126, val_acc=60.8%, val_loss=1.1820
  Ep4/65: train_acc=73.4%  train_loss=0.7824, val_acc=68.2%, val_loss=1.4160




  Ep5/65: train_acc=73.1%  train_loss=0.7932, val_acc=63.3%, val_loss=1.3238
  Ep6/65: train_acc=79.1%  train_loss=0.6106, val_acc=75.3%, val_loss=0.7291
  Ep7/65: train_acc=79.8%  train_loss=0.5911, val_acc=81.2%, val_loss=0.5652
  Ep8/65: train_acc=83.5%  train_loss=0.4828, val_acc=66.0%, val_loss=1.8607
  Ep9/65: train_acc=85.6%  train_loss=0.4251, val_acc=82.4%, val_loss=0.5446
  Ep10/65: train_acc=86.6%  train_loss=0.3972, val_acc=85.6%, val_loss=0.4184


KeyboardInterrupt: 