In [3]:
# Importing needed modules
import torch
import torch.nn as nn
from tqdm import tqdm
from torch import optim
from timm import data, loss
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split

In [4]:
# Train augmentation
transform_train = transforms.Compose([
    
    transforms.Resize(224),
    transforms.ColorJitter(0.3, 0.3, 0.3, 0.1),
    transforms.RandomGrayscale(0.2),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(15),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),
    transforms.RandomPerspective(0.5, 0.2),
    transforms.GaussianBlur(3, (0.1, 0.2)),
    transforms.ToTensor(),
    transforms.Normalize((0.5071,0.4867,0.4408),(0.2675,0.2565,0.2761)),
    transforms.RandomErasing(0.3, (0.05, 0.2))
    
])

In [5]:
# Test augmentation
transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5071,0.4867,0.4408),(0.2675,0.2565,0.2761))
])

In [6]:
mix = data.Mixup(
    mixup_alpha=0.8,
    cutmix_alpha=0.8,
    prob=1.0,
    switch_prob=0.5,
    mode="batch",
    label_smoothing=0.1,
    num_classes=100
)

In [7]:
# Downloading CIFAR100 dataset
train_dataset = datasets.CIFAR100(root="./data", train=True, download=False, transform=transform_train)
test_dataset = datasets.CIFAR100(root="./data", train=False, download=False, transform=transform_test)

In [8]:
# Split the training dataset into training and validation subsets.
val_ratio = 0.1
train_size = int((1 - val_ratio) * len(train_dataset))
val_size = len(train_dataset) - train_size

train_data, val_data = random_split(train_dataset, [train_size, val_size])

In [9]:
print(f"Train size: {train_size} | Test size: {len(test_dataset)} | Validation size: {val_size}")

Train size: 45000 | Test size: 10000 | Validation size: 5000


In [10]:
# DataLoaders: train (shuffled), val/test (no shuffle)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

In [11]:
# Train function
def train_one_epoch(model, train_loader, optimizer, loss_fn, device, scheduler=None, mix_fn=None):
    model.train()
    running_loss = 0
    total = 0
    
    for batch_idx, data in enumerate(train_loader):
        
        images, labels = data[0].to(device, non_blocking=True), data[1].to(device, non_blocking=True)

        if mix_fn is not None:
            images, labels = mix_fn(images, labels)

        optimizer.zero_grad()
        
        # Forward
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        
        # Backward
        loss.backward()
        optimizer.step()

        if scheduler is not None:
            scheduler.step()
                    
        running_loss += loss.item() * images.size(0)
        total += labels.size(0)

    avg_loss = running_loss / total
    
    return avg_loss

In [12]:
# Evaluation function
def evaluate(model, test_loader, loss_fn, device):
    model.eval()
    running_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)

            outputs = model(images)
            loss = loss_fn(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, dim=1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / total
    accuracy = 100 * (correct / total)

    return avg_loss, accuracy

In [13]:
def unfreeze_layers(model, stage):

    for p in model.parameters():
        p.requires_grad = False

    if stage >= 1:
        for p in model.fc.parameters():
            p.requires_grad = True

    if stage >= 2:
        for p in model.layer4.parameters():
            p.requires_grad = True
        for p in model.layer3.parameters():
            p.requires_grad=True

    if stage >= 3:
        for p in model.parameters():
            p.requires_grad = True


    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())

    tqdm.write(f"Trainable params: {trainable_params:,} / {total_params:,}")

In [14]:
resnet50 = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
num_features = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_features, 100)

In [15]:
device = "cuda" if torch.cuda.is_available() else "cpu"
resnet50.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [16]:
loss_fn_train = loss.SoftTargetCrossEntropy()
loss_fn_eval = nn.CrossEntropyLoss()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, resnet50.parameters()), lr=0.01, weight_decay=5e-4, momentum=0.9, nesterov=True)

In [17]:
best_val_loss = float("inf")
patience_counter = 1
max_patience = 10
current_stage = 1

In [16]:
for epoch in tqdm(range(100), desc="Training Epochs", ncols=100):

    if epoch == 0 and current_stage == 1:

        tqdm.write("Stage 1: Training only FC layer")
        unfreeze_layers(resnet50, current_stage)
        
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, steps_per_epoch=len(train_loader), epochs=10, pct_start=0.3, anneal_strategy="cos")
        current_stage = 2
    
    if epoch == 10 and current_stage == 2:
        
        tqdm.write("Stage 2: Unfreezing layer4 + layer3")
        unfreeze_layers(resnet50, current_stage)

        optimizer = optim.SGD([
            {'params': resnet50.layer3.parameters(), 'lr': 0.0005},
            {'params': resnet50.layer4.parameters(), 'lr': 0.001},
            {'params': resnet50.fc.parameters(), 'lr': 0.005}
        ], momentum=0.9, weight_decay=5e-4, nesterov=True)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=[0.0005, 0.001, 0.005], steps_per_epoch=len(train_loader), epochs=15, pct_start=0.3, anneal_strategy="cos")
        patience_counter = 0
        current_stage = 3
        
    if epoch == 25 and current_stage == 3:

        tqdm.write("Stage 3: Unfreezing all layers")
        unfreeze_layers(resnet50, current_stage)

        optimizer = optim.SGD([
            {'params': resnet50.layer1.parameters(), 'lr': 0.00005},
            {'params': resnet50.layer2.parameters(), 'lr': 0.0001},
            {'params': resnet50.layer3.parameters(), 'lr': 0.0003},
            {'params': resnet50.layer4.parameters(), 'lr': 0.0008},
            {'params': resnet50.fc.parameters(), 'lr': 0.002}
        ], momentum=0.9, weight_decay=5e-4, nesterov=True)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=[0.00005, 0.0001, 0.0003, 0.0008, 0.002], steps_per_epoch=len(train_loader), epochs=75, pct_start=0.3, anneal_strategy="cos")
        patience_counter = 0
        
    train_loss = train_one_epoch(resnet50, train_loader, optimizer, loss_fn_train, device, scheduler, mix)
    val_loss, val_acc = evaluate(resnet50, val_loader, loss_fn_eval, device)

    tqdm.write(f"Epoch: [{epoch + 1:03d}/100] | "
    f"Train Loss: {train_loss:.4f} | "
    f"Val Loss: {val_loss:.4f} | Acc: {val_acc:6.2f}%", end=" ")

    lrs = [f"{pg['lr']:.6f}" for pg in optimizer.param_groups]
    tqdm.write(f"| Current LR(s): [{', '.join(lrs)}]")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": resnet50.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scheduler_state_dict": scheduler.state_dict(),
                "val_loss": val_loss,
                "val_acc": val_acc,
                "train_loss": train_loss,
                "total_params": sum(p.numel() for p in resnet50.parameters())
            },
            f"model_checkpoint.pth"
        )

        tqdm.write(f"Best model saved at epoch [{epoch + 1:03d}/100] | Val Loss: {val_loss:.5f}")
        
    else:
        patience_counter += 1
        tqdm.write(f"No improvement for {patience_counter} epochs.")
        if patience_counter >= max_patience:
            tqdm.write(f"Early stopping triggered in epoch [{epoch + 1:03d}/100] | Best Val Loss: [{best_val_loss:.6f}]")
            break

Training Epochs:   0%|                                                      | 0/100 [00:00<?, ?it/s]

Stage 1: Training only FC layer
Trainable params: 204,900 / 23,712,932


Training Epochs:   0%|                                                      | 0/100 [09:13<?, ?it/s]

Epoch: [001/100] | Train Loss: 4.4893 | Val Loss: 4.1869 | Acc:  19.72% | Current LR(s): [0.002801]


Training Epochs:   1%|▍                                         | 1/100 [09:41<15:58:57, 581.19s/it]

Best model saved at epoch [001/100] | Val Loss: 4.18686


Training Epochs:   1%|▍                                         | 1/100 [18:52<15:58:57, 581.19s/it]

Epoch: [002/100] | Train Loss: 4.1252 | Val Loss: 3.6931 | Acc:  28.60% | Current LR(s): [0.007602]


Training Epochs:   2%|▊                                         | 2/100 [19:16<15:43:09, 577.45s/it]

Best model saved at epoch [002/100] | Val Loss: 3.69313


Training Epochs:   2%|▊                                         | 2/100 [28:25<15:43:09, 577.45s/it]

Epoch: [003/100] | Train Loss: 3.9124 | Val Loss: 3.3788 | Acc:  33.86% | Current LR(s): [0.010000]


Training Epochs:   3%|█▎                                        | 3/100 [28:55<15:35:17, 578.53s/it]

Best model saved at epoch [003/100] | Val Loss: 3.37882


Training Epochs:   3%|█▎                                        | 3/100 [38:04<15:35:17, 578.53s/it]

Epoch: [004/100] | Train Loss: 3.7892 | Val Loss: 3.1834 | Acc:  35.64% | Current LR(s): [0.009504]


Training Epochs:   4%|█▋                                        | 4/100 [38:28<15:21:41, 576.05s/it]

Best model saved at epoch [004/100] | Val Loss: 3.18339


Training Epochs:   4%|█▋                                        | 4/100 [48:35<15:21:41, 576.05s/it]

Epoch: [005/100] | Train Loss: 3.7332 | Val Loss: 3.0662 | Acc:  36.60% | Current LR(s): [0.008116]


Training Epochs:   5%|██                                        | 5/100 [48:52<15:39:26, 593.33s/it]

Best model saved at epoch [005/100] | Val Loss: 3.06622


Training Epochs:   5%|██                                      | 5/100 [1:00:43<15:39:26, 593.33s/it]

Epoch: [006/100] | Train Loss: 3.7090 | Val Loss: 3.0191 | Acc:  35.54% | Current LR(s): [0.006111]


Training Epochs:   6%|██▍                                     | 6/100 [1:01:06<16:44:41, 641.29s/it]

Best model saved at epoch [006/100] | Val Loss: 3.01914


Training Epochs:   6%|██▍                                     | 6/100 [1:11:34<16:44:41, 641.29s/it]

Epoch: [007/100] | Train Loss: 3.6943 | Val Loss: 2.9689 | Acc:  37.64% | Current LR(s): [0.003886]


Training Epochs:   7%|██▊                                     | 7/100 [1:11:49<16:34:55, 641.89s/it]

Best model saved at epoch [007/100] | Val Loss: 2.96891


Training Epochs:   7%|██▊                                     | 7/100 [1:22:06<16:34:55, 641.89s/it]

Epoch: [008/100] | Train Loss: 3.6624 | Val Loss: 2.9411 | Acc:  39.68% | Current LR(s): [0.001881]


Training Epochs:   8%|███▏                                    | 8/100 [1:22:24<16:20:43, 639.60s/it]

Best model saved at epoch [008/100] | Val Loss: 2.94107


Training Epochs:   9%|███▌                                    | 9/100 [1:32:31<15:54:38, 629.43s/it]

Epoch: [009/100] | Train Loss: 3.6488 | Val Loss: 2.9733 | Acc:  38.78% | Current LR(s): [0.000495]
No improvement for 1 epochs.


Training Epochs:  10%|███▉                                   | 10/100 [1:42:56<15:42:05, 628.06s/it]

Epoch: [010/100] | Train Loss: 3.6269 | Val Loss: 2.9907 | Acc:  38.84% | Current LR(s): [0.000000]
No improvement for 2 epochs.
Stage 2: Unfreezing layer4 + layer3
Trainable params: 22,268,004 / 23,712,932


Training Epochs:  10%|███▉                                   | 10/100 [1:54:09<15:42:05, 628.06s/it]

Epoch: [011/100] | Train Loss: 3.5093 | Val Loss: 2.3225 | Acc:  46.36% | Current LR(s): [0.000076, 0.000152, 0.000762]


Training Epochs:  11%|████▎                                  | 11/100 [1:54:59<16:14:54, 657.24s/it]

Best model saved at epoch [011/100] | Val Loss: 2.32248


Training Epochs:  11%|████▎                                  | 11/100 [2:06:03<16:14:54, 657.24s/it]

Epoch: [012/100] | Train Loss: 3.2858 | Val Loss: 1.9450 | Acc:  55.74% | Current LR(s): [0.000218, 0.000437, 0.002184]


Training Epochs:  12%|████▋                                  | 12/100 [2:06:52<16:28:56, 674.28s/it]

Best model saved at epoch [012/100] | Val Loss: 1.94502


Training Epochs:  12%|████▋                                  | 12/100 [2:18:00<16:28:56, 674.28s/it]

Epoch: [013/100] | Train Loss: 3.1333 | Val Loss: 1.7549 | Acc:  59.80% | Current LR(s): [0.000380, 0.000760, 0.003801]


Training Epochs:  13%|█████                                  | 13/100 [2:18:39<16:31:38, 683.89s/it]

Best model saved at epoch [013/100] | Val Loss: 1.75493


Training Epochs:  13%|█████                                  | 13/100 [2:29:39<16:31:38, 683.89s/it]

Epoch: [014/100] | Train Loss: 3.0327 | Val Loss: 1.5926 | Acc:  63.24% | Current LR(s): [0.000486, 0.000971, 0.004856]


Training Epochs:  14%|█████▍                                 | 14/100 [2:30:17<16:26:38, 688.36s/it]

Best model saved at epoch [014/100] | Val Loss: 1.59263


Training Epochs:  15%|█████▊                                 | 15/100 [2:41:38<16:12:07, 686.21s/it]

Epoch: [015/100] | Train Loss: 2.9220 | Val Loss: 1.6032 | Acc:  64.30% | Current LR(s): [0.000497, 0.000994, 0.004972]
No improvement for 1 epochs.


Training Epochs:  15%|█████▊                                 | 15/100 [2:52:49<16:12:07, 686.21s/it]

Epoch: [016/100] | Train Loss: 2.8823 | Val Loss: 1.4377 | Acc:  67.28% | Current LR(s): [0.000475, 0.000950, 0.004752]


Training Epochs:  16%|██████▏                                | 16/100 [2:53:35<16:13:38, 695.46s/it]

Best model saved at epoch [016/100] | Val Loss: 1.43770


Training Epochs:  16%|██████▏                                | 16/100 [3:04:41<16:13:38, 695.46s/it]

Epoch: [017/100] | Train Loss: 2.8185 | Val Loss: 1.4299 | Acc:  68.56% | Current LR(s): [0.000433, 0.000866, 0.004332]


Training Epochs:  17%|██████▋                                | 17/100 [3:05:28<16:09:18, 700.71s/it]

Best model saved at epoch [017/100] | Val Loss: 1.42990


Training Epochs:  17%|██████▋                                | 17/100 [3:16:39<16:09:18, 700.71s/it]

Epoch: [018/100] | Train Loss: 2.7758 | Val Loss: 1.4131 | Acc:  69.06% | Current LR(s): [0.000375, 0.000750, 0.003750]


Training Epochs:  18%|███████                                | 18/100 [3:17:31<16:06:44, 707.37s/it]

Best model saved at epoch [018/100] | Val Loss: 1.41313


Training Epochs:  18%|███████                                | 18/100 [3:28:42<16:06:44, 707.37s/it]

Epoch: [019/100] | Train Loss: 2.7557 | Val Loss: 1.3542 | Acc:  69.30% | Current LR(s): [0.000306, 0.000611, 0.003056]


Training Epochs:  19%|███████▍                               | 19/100 [3:29:32<16:00:15, 711.30s/it]

Best model saved at epoch [019/100] | Val Loss: 1.35416


Training Epochs:  20%|███████▊                               | 20/100 [3:40:44<15:32:55, 699.69s/it]

Epoch: [020/100] | Train Loss: 2.7304 | Val Loss: 1.3589 | Acc:  70.46% | Current LR(s): [0.000231, 0.000463, 0.002313]
No improvement for 1 epochs.


Training Epochs:  20%|███████▊                               | 20/100 [3:52:20<15:32:55, 699.69s/it]

Epoch: [021/100] | Train Loss: 2.7140 | Val Loss: 1.3202 | Acc:  71.40% | Current LR(s): [0.000159, 0.000317, 0.001586]


Training Epochs:  21%|████████▏                              | 21/100 [3:53:09<15:39:05, 713.23s/it]

Best model saved at epoch [021/100] | Val Loss: 1.32023


Training Epochs:  21%|████████▏                              | 21/100 [4:05:02<15:39:05, 713.23s/it]

Epoch: [022/100] | Train Loss: 2.7137 | Val Loss: 1.2924 | Acc:  71.88% | Current LR(s): [0.000094, 0.000188, 0.000941]


Training Epochs:  22%|████████▌                              | 22/100 [4:05:49<15:45:17, 727.15s/it]

Best model saved at epoch [022/100] | Val Loss: 1.29242


Training Epochs:  22%|████████▌                              | 22/100 [4:17:39<15:45:17, 727.15s/it]

Epoch: [023/100] | Train Loss: 2.6700 | Val Loss: 1.2440 | Acc:  72.34% | Current LR(s): [0.000043, 0.000087, 0.000434]


Training Epochs:  23%|████████▉                              | 23/100 [4:18:13<15:39:43, 732.26s/it]

Best model saved at epoch [023/100] | Val Loss: 1.24402


Training Epochs:  24%|█████████▎                             | 24/100 [4:29:59<15:17:48, 724.58s/it]

Epoch: [024/100] | Train Loss: 2.6432 | Val Loss: 1.3184 | Acc:  71.24% | Current LR(s): [0.000011, 0.000022, 0.000111]
No improvement for 1 epochs.


Training Epochs:  25%|█████████▊                             | 25/100 [4:41:43<14:57:55, 718.34s/it]

Epoch: [025/100] | Train Loss: 2.6433 | Val Loss: 1.3016 | Acc:  71.82% | Current LR(s): [0.000000, 0.000000, 0.000000]
No improvement for 2 epochs.
Stage 3: Unfreezing all layers
Trainable params: 23,712,932 / 23,712,932


Training Epochs:  26%|██████████▏                            | 26/100 [4:56:52<15:56:18, 775.39s/it]

Epoch: [026/100] | Train Loss: 2.6425 | Val Loss: 1.3472 | Acc:  71.54% | Current LR(s): [0.000002, 0.000004, 0.000013, 0.000036, 0.000089]
No improvement for 1 epochs.


Training Epochs:  27%|██████████▌                            | 27/100 [5:11:58<16:31:18, 814.77s/it]

Epoch: [027/100] | Train Loss: 2.6300 | Val Loss: 1.2757 | Acc:  73.36% | Current LR(s): [0.000003, 0.000006, 0.000018, 0.000047, 0.000117]
No improvement for 2 epochs.


Training Epochs:  27%|██████████▌                            | 27/100 [5:27:05<16:31:18, 814.77s/it]

Epoch: [028/100] | Train Loss: 2.6551 | Val Loss: 1.2145 | Acc:  72.78% | Current LR(s): [0.000004, 0.000008, 0.000024, 0.000065, 0.000163]


Training Epochs:  28%|██████████▉                            | 28/100 [5:27:48<17:06:23, 855.32s/it]

Best model saved at epoch [028/100] | Val Loss: 1.21453


Training Epochs:  29%|███████████▎                           | 29/100 [5:42:59<17:11:37, 871.79s/it]

Epoch: [029/100] | Train Loss: 2.6222 | Val Loss: 1.2398 | Acc:  72.62% | Current LR(s): [0.000006, 0.000011, 0.000034, 0.000090, 0.000226]
No improvement for 1 epochs.


Training Epochs:  30%|███████████▋                           | 30/100 [5:58:24<17:15:45, 887.80s/it]

Epoch: [030/100] | Train Loss: 2.6219 | Val Loss: 1.2154 | Acc:  72.70% | Current LR(s): [0.000008, 0.000015, 0.000046, 0.000122, 0.000305]
No improvement for 2 epochs.


Training Epochs:  31%|████████████                           | 31/100 [6:13:48<17:13:32, 898.73s/it]

Epoch: [031/100] | Train Loss: 2.6076 | Val Loss: 1.2383 | Acc:  73.30% | Current LR(s): [0.000010, 0.000020, 0.000060, 0.000159, 0.000398]
No improvement for 3 epochs.


Training Epochs:  32%|████████████▍                          | 32/100 [6:29:03<17:04:07, 903.64s/it]

Epoch: [032/100] | Train Loss: 2.6149 | Val Loss: 1.2146 | Acc:  74.10% | Current LR(s): [0.000013, 0.000025, 0.000075, 0.000201, 0.000503]
No improvement for 4 epochs.


Training Epochs:  33%|████████████▊                          | 33/100 [6:44:11<16:50:21, 904.79s/it]

Epoch: [033/100] | Train Loss: 2.5874 | Val Loss: 1.2542 | Acc:  74.30% | Current LR(s): [0.000015, 0.000031, 0.000093, 0.000248, 0.000619]
No improvement for 5 epochs.


Training Epochs:  33%|████████████▊                          | 33/100 [6:59:25<16:50:21, 904.79s/it]

Epoch: [034/100] | Train Loss: 2.5675 | Val Loss: 1.1820 | Acc:  74.42% | Current LR(s): [0.000019, 0.000037, 0.000112, 0.000297, 0.000743]


Training Epochs:  34%|█████████████▎                         | 34/100 [7:00:07<16:52:22, 920.34s/it]

Best model saved at epoch [034/100] | Val Loss: 1.18196


Training Epochs:  35%|█████████████▋                         | 35/100 [7:15:22<16:35:09, 918.61s/it]

Epoch: [035/100] | Train Loss: 2.5549 | Val Loss: 1.1942 | Acc:  74.00% | Current LR(s): [0.000022, 0.000044, 0.000131, 0.000349, 0.000873]
No improvement for 1 epochs.


Training Epochs:  36%|██████████████                         | 36/100 [7:30:32<16:17:11, 916.12s/it]

Epoch: [036/100] | Train Loss: 2.5407 | Val Loss: 1.1883 | Acc:  74.34% | Current LR(s): [0.000025, 0.000050, 0.000151, 0.000403, 0.001007]
No improvement for 2 epochs.


Training Epochs:  36%|██████████████                         | 36/100 [7:45:41<16:17:11, 916.12s/it]

Epoch: [037/100] | Train Loss: 2.5293 | Val Loss: 1.1146 | Acc:  74.76% | Current LR(s): [0.000029, 0.000057, 0.000171, 0.000456, 0.001140]


Training Epochs:  37%|██████████████▍                        | 37/100 [7:46:27<16:14:15, 927.87s/it]

Best model saved at epoch [037/100] | Val Loss: 1.11461


Training Epochs:  37%|██████████████▍                        | 37/100 [8:01:52<16:14:15, 927.87s/it]

Epoch: [038/100] | Train Loss: 2.5296 | Val Loss: 1.1125 | Acc:  75.58% | Current LR(s): [0.000032, 0.000064, 0.000191, 0.000509, 0.001272]


Training Epochs:  38%|██████████████▊                        | 38/100 [8:02:37<16:11:37, 940.29s/it]

Best model saved at epoch [038/100] | Val Loss: 1.11249


Training Epochs:  39%|███████████████▏                       | 39/100 [8:17:54<15:48:54, 933.36s/it]

Epoch: [039/100] | Train Loss: 2.5095 | Val Loss: 1.1250 | Acc:  75.44% | Current LR(s): [0.000035, 0.000070, 0.000210, 0.000560, 0.001400]
No improvement for 1 epochs.


Training Epochs:  40%|███████████████▌                       | 40/100 [8:33:15<15:29:36, 929.60s/it]

Epoch: [040/100] | Train Loss: 2.5283 | Val Loss: 1.1797 | Acc:  75.16% | Current LR(s): [0.000038, 0.000076, 0.000228, 0.000608, 0.001520]
No improvement for 2 epochs.


Training Epochs:  41%|███████████████▉                       | 41/100 [8:48:34<15:11:01, 926.46s/it]

Epoch: [041/100] | Train Loss: 2.4885 | Val Loss: 1.1338 | Acc:  76.36% | Current LR(s): [0.000041, 0.000082, 0.000245, 0.000652, 0.001631]
No improvement for 3 epochs.


Training Epochs:  42%|████████████████▍                      | 42/100 [9:03:40<14:49:35, 920.27s/it]

Epoch: [042/100] | Train Loss: 2.4731 | Val Loss: 1.1129 | Acc:  76.60% | Current LR(s): [0.000043, 0.000087, 0.000260, 0.000692, 0.001731]
No improvement for 4 epochs.


Training Epochs:  43%|████████████████▊                      | 43/100 [9:18:42<14:29:13, 914.97s/it]

Epoch: [043/100] | Train Loss: 2.4764 | Val Loss: 1.1363 | Acc:  76.04% | Current LR(s): [0.000045, 0.000091, 0.000273, 0.000727, 0.001817]
No improvement for 5 epochs.


Training Epochs:  43%|████████████████▊                      | 43/100 [9:33:50<14:29:13, 914.97s/it]

Epoch: [044/100] | Train Loss: 2.4547 | Val Loss: 1.0447 | Acc:  77.06% | Current LR(s): [0.000047, 0.000094, 0.000283, 0.000755, 0.001888]


Training Epochs:  44%|█████████████████▏                     | 44/100 [9:34:23<14:21:06, 922.62s/it]

Best model saved at epoch [044/100] | Val Loss: 1.04471


Training Epochs:  45%|█████████████████▌                     | 45/100 [9:49:37<14:03:21, 920.02s/it]

Epoch: [045/100] | Train Loss: 2.4364 | Val Loss: 1.0693 | Acc:  76.44% | Current LR(s): [0.000049, 0.000097, 0.000291, 0.000777, 0.001942]
No improvement for 1 epochs.


Training Epochs:  46%|█████████████████▍                    | 46/100 [10:05:03<13:49:46, 921.97s/it]

Epoch: [046/100] | Train Loss: 2.4108 | Val Loss: 1.0825 | Acc:  77.22% | Current LR(s): [0.000049, 0.000099, 0.000297, 0.000792, 0.001979]
No improvement for 2 epochs.


Training Epochs:  47%|█████████████████▊                    | 47/100 [10:20:44<13:39:32, 927.78s/it]

Epoch: [047/100] | Train Loss: 2.3959 | Val Loss: 1.0665 | Acc:  77.58% | Current LR(s): [0.000050, 0.000100, 0.000300, 0.000799, 0.001998]
No improvement for 3 epochs.


Training Epochs:  47%|█████████████████▊                    | 47/100 [10:36:23<13:39:32, 927.78s/it]

Epoch: [048/100] | Train Loss: 2.4040 | Val Loss: 1.0288 | Acc:  78.06% | Current LR(s): [0.000050, 0.000100, 0.000300, 0.000800, 0.002000]


Training Epochs:  48%|██████████████████▏                   | 48/100 [10:37:01<13:36:44, 942.40s/it]

Best model saved at epoch [048/100] | Val Loss: 1.02882


Training Epochs:  49%|██████████████████▌                   | 49/100 [10:52:44<13:21:19, 942.74s/it]

Epoch: [049/100] | Train Loss: 2.4065 | Val Loss: 1.0530 | Acc:  77.08% | Current LR(s): [0.000050, 0.000100, 0.000299, 0.000798, 0.001996]
No improvement for 1 epochs.


Training Epochs:  49%|██████████████████▌                   | 49/100 [11:08:37<13:21:19, 942.74s/it]

Epoch: [050/100] | Train Loss: 2.3980 | Val Loss: 1.0060 | Acc:  78.22% | Current LR(s): [0.000050, 0.000099, 0.000298, 0.000796, 0.001989]


Training Epochs:  50%|███████████████████                   | 50/100 [11:09:13<13:17:00, 956.42s/it]

Best model saved at epoch [050/100] | Val Loss: 1.00600


Training Epochs:  50%|███████████████████                   | 50/100 [11:24:54<13:17:00, 956.42s/it]

Epoch: [051/100] | Train Loss: 2.3886 | Val Loss: 0.9876 | Acc:  77.96% | Current LR(s): [0.000049, 0.000099, 0.000297, 0.000791, 0.001978]


Training Epochs:  51%|███████████████████▍                  | 51/100 [11:25:33<13:06:56, 963.61s/it]

Best model saved at epoch [051/100] | Val Loss: 0.98757


Training Epochs:  52%|███████████████████▏                 | 52/100 [11:43:58<13:24:42, 1005.88s/it]

Epoch: [052/100] | Train Loss: 2.3898 | Val Loss: 0.9966 | Acc:  78.74% | Current LR(s): [0.000049, 0.000098, 0.000295, 0.000786, 0.001964]
No improvement for 1 epochs.


Training Epochs:  53%|████████████████████▏                 | 53/100 [11:59:53<12:56:00, 990.64s/it]

Epoch: [053/100] | Train Loss: 2.3844 | Val Loss: 1.0227 | Acc:  78.90% | Current LR(s): [0.000049, 0.000097, 0.000292, 0.000779, 0.001946]
No improvement for 2 epochs.


Training Epochs:  54%|████████████████████▌                 | 54/100 [12:15:41<12:29:38, 977.79s/it]

Epoch: [054/100] | Train Loss: 2.3495 | Val Loss: 0.9940 | Acc:  78.14% | Current LR(s): [0.000048, 0.000096, 0.000289, 0.000770, 0.001925]
No improvement for 3 epochs.


Training Epochs:  55%|████████████████████▉                 | 55/100 [12:31:25<12:05:51, 967.81s/it]

Epoch: [055/100] | Train Loss: 2.3653 | Val Loss: 1.0683 | Acc:  77.66% | Current LR(s): [0.000048, 0.000095, 0.000285, 0.000760, 0.001901]
No improvement for 4 epochs.


Training Epochs:  55%|████████████████████▉                 | 55/100 [12:47:11<12:05:51, 967.81s/it]

Epoch: [056/100] | Train Loss: 2.3571 | Val Loss: 0.9729 | Acc:  78.62% | Current LR(s): [0.000047, 0.000094, 0.000281, 0.000749, 0.001873]


Training Epochs:  56%|█████████████████████▎                | 56/100 [12:47:59<11:55:31, 975.71s/it]

Best model saved at epoch [056/100] | Val Loss: 0.97290


Training Epochs:  57%|█████████████████████▋                | 57/100 [13:03:44<11:32:40, 966.53s/it]

Epoch: [057/100] | Train Loss: 2.3525 | Val Loss: 1.0304 | Acc:  78.94% | Current LR(s): [0.000046, 0.000092, 0.000276, 0.000737, 0.001843]
No improvement for 1 epochs.


Training Epochs:  58%|██████████████████████                | 58/100 [13:19:25<11:11:07, 958.76s/it]

Epoch: [058/100] | Train Loss: 2.3516 | Val Loss: 1.0017 | Acc:  79.68% | Current LR(s): [0.000045, 0.000090, 0.000271, 0.000724, 0.001809]
No improvement for 2 epochs.


Training Epochs:  59%|██████████████████████▍               | 59/100 [13:35:03<10:50:59, 952.68s/it]

Epoch: [059/100] | Train Loss: 2.3263 | Val Loss: 1.0246 | Acc:  78.46% | Current LR(s): [0.000044, 0.000089, 0.000266, 0.000709, 0.001772]
No improvement for 3 epochs.


Training Epochs:  60%|██████████████████████▊               | 60/100 [13:50:45<10:32:54, 949.36s/it]

Epoch: [060/100] | Train Loss: 2.3179 | Val Loss: 1.0810 | Acc:  78.58% | Current LR(s): [0.000043, 0.000087, 0.000260, 0.000693, 0.001733]
No improvement for 4 epochs.


Training Epochs:  61%|███████████████████████▏              | 61/100 [14:06:50<10:20:07, 954.04s/it]

Epoch: [061/100] | Train Loss: 2.3090 | Val Loss: 1.0115 | Acc:  79.18% | Current LR(s): [0.000042, 0.000085, 0.000254, 0.000676, 0.001691]
No improvement for 5 epochs.


Training Epochs:  62%|███████████████████████▌              | 62/100 [14:23:14<10:09:49, 962.89s/it]

Epoch: [062/100] | Train Loss: 2.3075 | Val Loss: 1.0850 | Acc:  78.96% | Current LR(s): [0.000041, 0.000082, 0.000247, 0.000659, 0.001647]
No improvement for 6 epochs.


Training Epochs:  63%|████████████████████████▌              | 63/100 [14:39:34<9:57:01, 968.15s/it]

Epoch: [063/100] | Train Loss: 2.2986 | Val Loss: 0.9976 | Acc:  78.74% | Current LR(s): [0.000040, 0.000080, 0.000240, 0.000640, 0.001600]
No improvement for 7 epochs.


Training Epochs:  64%|████████████████████████▉              | 64/100 [14:56:05<9:44:55, 974.87s/it]

Epoch: [064/100] | Train Loss: 2.2875 | Val Loss: 1.0269 | Acc:  79.38% | Current LR(s): [0.000039, 0.000078, 0.000233, 0.000620, 0.001551]
No improvement for 8 epochs.


Training Epochs:  65%|█████████████████████████▎             | 65/100 [15:12:38<9:31:55, 980.45s/it]

Epoch: [065/100] | Train Loss: 2.3004 | Val Loss: 1.0223 | Acc:  79.34% | Current LR(s): [0.000037, 0.000075, 0.000225, 0.000600, 0.001500]
No improvement for 9 epochs.


Training Epochs:  65%|█████████████████████████▎             | 65/100 [15:33:08<8:22:27, 861.36s/it]


Epoch: [066/100] | Train Loss: 2.2987 | Val Loss: 1.0014 | Acc:  79.66% | Current LR(s): [0.000036, 0.000072, 0.000217, 0.000579, 0.001447]
No improvement for 10 epochs.
Early stopping triggered in epoch [066/100] | Best Val Loss: [0.972897]


In [22]:
# Final Evaluation
print("Loading best model...")

final_model = models.resnet50(weights="IMAGENET1K_V2")
num_features = final_model.fc.in_features
final_model.fc = nn.Linear(num_features, 100)

checkpoint = torch.load("model_checkpoint.pth")

final_model.load_state_dict(checkpoint["model_state_dict"])
final_model.to(device)

test_loss, test_acc = evaluate(final_model, test_loader, loss_fn_eval, device)
print(f"Test loss: {test_loss:.6f} | Test acc: {test_acc:.2f}%")

Loading best model...
Test loss: 0.762229 | Test acc: 84.35%
