# Imports and Data Preparation

In [1]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.quantization import (
    prepare_qat,
    convert,
    enable_observer,
    disable_observer,
    enable_fake_quant,
    disable_fake_quant,
)
from torchvision.models.quantization import resnet50
import numpy as np
from tqdm import tqdm

In [2]:
def get_cifar100_data_loaders(data_dir='./data'):
    train_tf = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
    ])
    test_tf = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
    ])

    train_ds = torchvision.datasets.CIFAR100(data_dir, train=True, download=True, transform=train_tf)
    test_ds = torchvision.datasets.CIFAR100(data_dir, train=False, download=True, transform=test_tf)

    train_size = len(train_ds)
    indices = list(range(train_size))
    np.random.seed(42)
    np.random.shuffle(indices)
    split = int(0.1 * train_size)
    train_idx, val_idx = indices[split:], indices[:split]

    train_loader = DataLoader(train_ds, batch_size=128, sampler=SubsetRandomSampler(train_idx), num_workers=4, pin_memory=True)
    val_loader = DataLoader(train_ds, batch_size=256, sampler=SubsetRandomSampler(val_idx), num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_ds, batch_size=8, shuffle=False, num_workers=4, pin_memory=False)

    return train_loader, val_loader, test_loader, train_idx

def cutmix(data, targets, alpha=1.0):
    batch_size = data.size(0)
    indices = torch.randperm(batch_size)
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    bbx1, bby1, bbx2, bby2 = rand_bbox(data.size(), lam)
    data[:, :, bbx1:bbx2, bby1:bby2] = shuffled_data[:, :, bbx1:bbx2, bby1:bby2]
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (data.size(-1) * data.size(-2)))

    return data, targets, shuffled_targets, lam

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

train_loader, val_loader, test_loader, train_idx = get_cifar100_data_loaders()

Files already downloaded and verified
Files already downloaded and verified


# fp32 with quantized resnet model

In [6]:

def evaluate_accuracy_and_latency(model, loader, device, num_batches=None, desc="Evaluating"):
    model.eval()
    model.to(device)
    correct = total = 0
    latencies = []
    with torch.no_grad():
        for i, (imgs, labels) in enumerate(tqdm(loader, desc=desc, leave=False)):
            if num_batches is not None and i >= num_batches:
                break
            imgs, labels = imgs.to(device), labels.to(device)
            start_time = time.time()
            outputs = model(imgs)
            latency = (time.time() - start_time) * 1000  # Latency in ms/batch
            latencies.append(latency)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    accuracy = 100.0 * correct / total
    avg_latency = sum(latencies) / len(latencies)
    return accuracy, avg_latency

# Main training loop (same structure as provided, with CutMix logic from QAT code)
def main():
    train_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    assert train_device.type == 'cuda', "GPU not available for training/validation"
    test_device = torch.device('cpu')

    # train_loader, val_loader, test_loader, train_idx = get_cifar100_data_loaders()

    model = resnet50(pretrained=False, quantize=False, num_classes=100)
    # Modify conv1 and maxpool for 32x32 inputs
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=0, bias=False)
    model.maxpool = nn.Identity()  # Remove maxpool to preserve spatial dimensions
    model.to(train_device)

    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    best_val_acc = 0.0
    best_model_path = 'best_fp32_resnet50_cifar100_32x32_100_cutmix.pth'
    epochs = 100
    patience = 20
    patience_counter = 0
    cutmix_prob = 0.5

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for imgs, targets in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{epochs}", leave=True):
            imgs, targets = imgs.to(train_device), targets.to(train_device)
            optimizer.zero_grad()
            if np.random.random() < cutmix_prob:
                imgs, targets_a, targets_b, lam = cutmix(imgs, targets, alpha=1.0)
                outputs = model(imgs)
                loss = lam * criterion(outputs, targets_a) + (1 - lam) * criterion(outputs, targets_b)
            else:
                outputs = model(imgs)
                loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * imgs.size(0)

        val_acc, _ = evaluate_accuracy_and_latency(model, val_loader, train_device, desc="Validating")
        print(f"Epoch {epoch+1}/{epochs} – Loss: {running_loss/len(train_idx):.4f}, Val Accuracy: {val_acc:.2f}%")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_model_path)
            print(f"Saved best FP32 model with Val Accuracy: {best_val_acc:.2f}%")
            patience_counter = 0
        else:
            patience_counter += 1
            print(f"No improvement in validation accuracy. Patience counter: {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break

        scheduler.step()

    model.load_state_dict(torch.load(best_model_path))
    test_acc, avg_latency = evaluate_accuracy_and_latency(model, test_loader, test_device, desc="Testing FP32")
    print(f"\nBest FP32 Model – Test Accuracy: {test_acc:.2f}%")
    print(f"Average FP32 Inference Latency (CPU): {avg_latency:.2f} ms/batch")

if __name__ == '__main__':
    main()

Training Epoch 1/100: 100%|██████████| 352/352 [00:19<00:00, 17.63it/s]
                                                           

Epoch 1/100 – Loss: 4.3153, Val Accuracy: 13.44%
Saved best FP32 model with Val Accuracy: 13.44%


Training Epoch 2/100: 100%|██████████| 352/352 [00:20<00:00, 17.53it/s]
                                                           

Epoch 2/100 – Loss: 3.9347, Val Accuracy: 18.54%
Saved best FP32 model with Val Accuracy: 18.54%


Training Epoch 3/100: 100%|██████████| 352/352 [00:19<00:00, 17.67it/s]
                                                           

Epoch 3/100 – Loss: 3.6401, Val Accuracy: 26.36%
Saved best FP32 model with Val Accuracy: 26.36%


Training Epoch 4/100: 100%|██████████| 352/352 [00:19<00:00, 17.62it/s]
                                                           

Epoch 4/100 – Loss: 3.4129, Val Accuracy: 35.50%
Saved best FP32 model with Val Accuracy: 35.50%


Training Epoch 5/100: 100%|██████████| 352/352 [00:20<00:00, 17.46it/s]
                                                           

Epoch 5/100 – Loss: 3.1587, Val Accuracy: 37.34%
Saved best FP32 model with Val Accuracy: 37.34%


Training Epoch 6/100: 100%|██████████| 352/352 [00:20<00:00, 17.58it/s]
                                                           

Epoch 6/100 – Loss: 3.0546, Val Accuracy: 42.62%
Saved best FP32 model with Val Accuracy: 42.62%


Training Epoch 7/100: 100%|██████████| 352/352 [00:20<00:00, 17.45it/s]
                                                           

Epoch 7/100 – Loss: 2.9393, Val Accuracy: 46.62%
Saved best FP32 model with Val Accuracy: 46.62%


Training Epoch 8/100: 100%|██████████| 352/352 [00:19<00:00, 17.64it/s]
                                                           

Epoch 8/100 – Loss: 2.8389, Val Accuracy: 48.34%
Saved best FP32 model with Val Accuracy: 48.34%


Training Epoch 9/100: 100%|██████████| 352/352 [00:20<00:00, 17.48it/s]
                                                           

Epoch 9/100 – Loss: 2.8216, Val Accuracy: 49.78%
Saved best FP32 model with Val Accuracy: 49.78%


Training Epoch 10/100: 100%|██████████| 352/352 [00:19<00:00, 17.62it/s]
                                                           

Epoch 10/100 – Loss: 2.6850, Val Accuracy: 52.48%
Saved best FP32 model with Val Accuracy: 52.48%


Training Epoch 11/100: 100%|██████████| 352/352 [00:20<00:00, 17.51it/s]
                                                           

Epoch 11/100 – Loss: 2.5140, Val Accuracy: 56.72%
Saved best FP32 model with Val Accuracy: 56.72%


Training Epoch 12/100: 100%|██████████| 352/352 [00:20<00:00, 17.58it/s]
                                                           

Epoch 12/100 – Loss: 2.5248, Val Accuracy: 54.76%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 13/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 13/100 – Loss: 2.4979, Val Accuracy: 58.66%
Saved best FP32 model with Val Accuracy: 58.66%


Training Epoch 14/100: 100%|██████████| 352/352 [00:19<00:00, 17.63it/s]
                                                           

Epoch 14/100 – Loss: 2.4577, Val Accuracy: 59.52%
Saved best FP32 model with Val Accuracy: 59.52%


Training Epoch 15/100: 100%|██████████| 352/352 [00:19<00:00, 17.62it/s]
                                                           

Epoch 15/100 – Loss: 2.4030, Val Accuracy: 60.26%
Saved best FP32 model with Val Accuracy: 60.26%


Training Epoch 16/100: 100%|██████████| 352/352 [00:20<00:00, 17.47it/s]
                                                           

Epoch 16/100 – Loss: 2.3608, Val Accuracy: 63.32%
Saved best FP32 model with Val Accuracy: 63.32%


Training Epoch 17/100: 100%|██████████| 352/352 [00:19<00:00, 17.65it/s]
                                                           

Epoch 17/100 – Loss: 2.2525, Val Accuracy: 64.02%
Saved best FP32 model with Val Accuracy: 64.02%


Training Epoch 18/100: 100%|██████████| 352/352 [00:20<00:00, 17.54it/s]
                                                           

Epoch 18/100 – Loss: 2.2988, Val Accuracy: 61.34%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 19/100: 100%|██████████| 352/352 [00:19<00:00, 17.67it/s]
                                                           

Epoch 19/100 – Loss: 2.3230, Val Accuracy: 64.78%
Saved best FP32 model with Val Accuracy: 64.78%


Training Epoch 20/100: 100%|██████████| 352/352 [00:20<00:00, 17.54it/s]
                                                           

Epoch 20/100 – Loss: 2.1720, Val Accuracy: 63.62%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 21/100: 100%|██████████| 352/352 [00:19<00:00, 17.61it/s]
                                                           

Epoch 21/100 – Loss: 2.0947, Val Accuracy: 66.24%
Saved best FP32 model with Val Accuracy: 66.24%


Training Epoch 22/100: 100%|██████████| 352/352 [00:19<00:00, 17.60it/s]
                                                           

Epoch 22/100 – Loss: 2.1347, Val Accuracy: 67.54%
Saved best FP32 model with Val Accuracy: 67.54%


Training Epoch 23/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 23/100 – Loss: 2.0554, Val Accuracy: 67.86%
Saved best FP32 model with Val Accuracy: 67.86%


Training Epoch 24/100: 100%|██████████| 352/352 [00:19<00:00, 17.69it/s]
                                                           

Epoch 24/100 – Loss: 2.0416, Val Accuracy: 68.96%
Saved best FP32 model with Val Accuracy: 68.96%


Training Epoch 25/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 25/100 – Loss: 2.0106, Val Accuracy: 67.68%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 26/100: 100%|██████████| 352/352 [00:19<00:00, 17.66it/s]
                                                           

Epoch 26/100 – Loss: 2.0431, Val Accuracy: 69.46%
Saved best FP32 model with Val Accuracy: 69.46%


Training Epoch 27/100: 100%|██████████| 352/352 [00:20<00:00, 17.53it/s]
                                                           

Epoch 27/100 – Loss: 1.9424, Val Accuracy: 69.02%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 28/100: 100%|██████████| 352/352 [00:19<00:00, 17.69it/s]
                                                           

Epoch 28/100 – Loss: 1.9593, Val Accuracy: 69.04%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 29/100: 100%|██████████| 352/352 [00:19<00:00, 17.70it/s]
                                                           

Epoch 29/100 – Loss: 1.9024, Val Accuracy: 68.46%
No improvement in validation accuracy. Patience counter: 3/20


Training Epoch 30/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 30/100 – Loss: 1.9016, Val Accuracy: 70.48%
Saved best FP32 model with Val Accuracy: 70.48%


Training Epoch 31/100: 100%|██████████| 352/352 [00:20<00:00, 17.58it/s]
                                                           

Epoch 31/100 – Loss: 1.8306, Val Accuracy: 70.26%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 32/100: 100%|██████████| 352/352 [00:20<00:00, 17.49it/s]
                                                           

Epoch 32/100 – Loss: 1.8713, Val Accuracy: 70.86%
Saved best FP32 model with Val Accuracy: 70.86%


Training Epoch 33/100: 100%|██████████| 352/352 [00:19<00:00, 17.64it/s]
                                                           

Epoch 33/100 – Loss: 1.8177, Val Accuracy: 70.92%
Saved best FP32 model with Val Accuracy: 70.92%


Training Epoch 34/100: 100%|██████████| 352/352 [00:19<00:00, 17.65it/s]
                                                           

Epoch 34/100 – Loss: 1.7299, Val Accuracy: 71.34%
Saved best FP32 model with Val Accuracy: 71.34%


Training Epoch 35/100: 100%|██████████| 352/352 [00:20<00:00, 17.46it/s]
                                                           

Epoch 35/100 – Loss: 1.7888, Val Accuracy: 70.54%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 36/100: 100%|██████████| 352/352 [00:19<00:00, 17.62it/s]
                                                           

Epoch 36/100 – Loss: 1.7638, Val Accuracy: 71.08%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 37/100: 100%|██████████| 352/352 [00:20<00:00, 17.56it/s]
                                                           

Epoch 37/100 – Loss: 1.7364, Val Accuracy: 71.64%
Saved best FP32 model with Val Accuracy: 71.64%


Training Epoch 38/100: 100%|██████████| 352/352 [00:19<00:00, 17.68it/s]
                                                           

Epoch 38/100 – Loss: 1.7173, Val Accuracy: 71.58%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 39/100: 100%|██████████| 352/352 [00:20<00:00, 17.54it/s]
                                                           

Epoch 39/100 – Loss: 1.6887, Val Accuracy: 71.84%
Saved best FP32 model with Val Accuracy: 71.84%


Training Epoch 40/100: 100%|██████████| 352/352 [00:19<00:00, 17.70it/s]
                                                           

Epoch 40/100 – Loss: 1.6728, Val Accuracy: 71.96%
Saved best FP32 model with Val Accuracy: 71.96%


Training Epoch 41/100: 100%|██████████| 352/352 [00:20<00:00, 17.46it/s]
                                                           

Epoch 41/100 – Loss: 1.6420, Val Accuracy: 72.24%
Saved best FP32 model with Val Accuracy: 72.24%


Training Epoch 42/100: 100%|██████████| 352/352 [00:19<00:00, 17.60it/s]
                                                           

Epoch 42/100 – Loss: 1.7140, Val Accuracy: 72.08%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 43/100: 100%|██████████| 352/352 [00:19<00:00, 17.73it/s]
                                                           

Epoch 43/100 – Loss: 1.5828, Val Accuracy: 72.16%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 44/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 44/100 – Loss: 1.6553, Val Accuracy: 72.62%
Saved best FP32 model with Val Accuracy: 72.62%


Training Epoch 45/100: 100%|██████████| 352/352 [00:19<00:00, 17.72it/s]
                                                           

Epoch 45/100 – Loss: 1.5929, Val Accuracy: 72.36%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 46/100: 100%|██████████| 352/352 [00:20<00:00, 17.51it/s]
                                                           

Epoch 46/100 – Loss: 1.5948, Val Accuracy: 73.02%
Saved best FP32 model with Val Accuracy: 73.02%


Training Epoch 47/100: 100%|██████████| 352/352 [00:19<00:00, 17.68it/s]
                                                           

Epoch 47/100 – Loss: 1.5855, Val Accuracy: 72.56%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 48/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 48/100 – Loss: 1.5590, Val Accuracy: 73.46%
Saved best FP32 model with Val Accuracy: 73.46%


Training Epoch 49/100: 100%|██████████| 352/352 [00:19<00:00, 17.61it/s]
                                                           

Epoch 49/100 – Loss: 1.5591, Val Accuracy: 72.24%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 50/100: 100%|██████████| 352/352 [00:19<00:00, 17.60it/s]
                                                           

Epoch 50/100 – Loss: 1.5800, Val Accuracy: 73.58%
Saved best FP32 model with Val Accuracy: 73.58%


Training Epoch 51/100: 100%|██████████| 352/352 [00:19<00:00, 17.62it/s]
                                                           

Epoch 51/100 – Loss: 1.5493, Val Accuracy: 72.94%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 52/100: 100%|██████████| 352/352 [00:19<00:00, 17.60it/s]
                                                           

Epoch 52/100 – Loss: 1.5851, Val Accuracy: 73.50%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 53/100: 100%|██████████| 352/352 [00:20<00:00, 17.50it/s]
                                                           

Epoch 53/100 – Loss: 1.5385, Val Accuracy: 74.14%
Saved best FP32 model with Val Accuracy: 74.14%


Training Epoch 54/100: 100%|██████████| 352/352 [00:19<00:00, 17.63it/s]
                                                           

Epoch 54/100 – Loss: 1.5499, Val Accuracy: 73.40%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 55/100: 100%|██████████| 352/352 [00:20<00:00, 17.54it/s]
                                                           

Epoch 55/100 – Loss: 1.5062, Val Accuracy: 73.92%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 56/100: 100%|██████████| 352/352 [00:19<00:00, 17.68it/s]
                                                           

Epoch 56/100 – Loss: 1.4930, Val Accuracy: 73.70%
No improvement in validation accuracy. Patience counter: 3/20


Training Epoch 57/100: 100%|██████████| 352/352 [00:20<00:00, 17.55it/s]
                                                           

Epoch 57/100 – Loss: 1.4531, Val Accuracy: 74.30%
Saved best FP32 model with Val Accuracy: 74.30%


Training Epoch 58/100: 100%|██████████| 352/352 [00:19<00:00, 17.67it/s]
                                                           

Epoch 58/100 – Loss: 1.5112, Val Accuracy: 74.14%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 59/100: 100%|██████████| 352/352 [00:19<00:00, 17.70it/s]
                                                           

Epoch 59/100 – Loss: 1.5169, Val Accuracy: 74.56%
Saved best FP32 model with Val Accuracy: 74.56%


Training Epoch 60/100: 100%|██████████| 352/352 [00:20<00:00, 17.54it/s]
                                                           

Epoch 60/100 – Loss: 1.4366, Val Accuracy: 74.86%
Saved best FP32 model with Val Accuracy: 74.86%


Training Epoch 61/100: 100%|██████████| 352/352 [00:19<00:00, 17.72it/s]
                                                           

Epoch 61/100 – Loss: 1.4520, Val Accuracy: 74.42%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 62/100: 100%|██████████| 352/352 [00:20<00:00, 17.49it/s]
                                                           

Epoch 62/100 – Loss: 1.4985, Val Accuracy: 74.76%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 63/100: 100%|██████████| 352/352 [00:20<00:00, 17.59it/s]
                                                           

Epoch 63/100 – Loss: 1.4695, Val Accuracy: 75.10%
Saved best FP32 model with Val Accuracy: 75.10%


Training Epoch 64/100: 100%|██████████| 352/352 [00:20<00:00, 17.51it/s]
                                                           

Epoch 64/100 – Loss: 1.5021, Val Accuracy: 74.58%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 65/100: 100%|██████████| 352/352 [00:19<00:00, 17.65it/s]
                                                           

Epoch 65/100 – Loss: 1.4302, Val Accuracy: 74.92%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 66/100: 100%|██████████| 352/352 [00:19<00:00, 17.65it/s]
                                                           

Epoch 66/100 – Loss: 1.4774, Val Accuracy: 74.90%
No improvement in validation accuracy. Patience counter: 3/20


Training Epoch 67/100: 100%|██████████| 352/352 [00:20<00:00, 17.47it/s]
                                                           

Epoch 67/100 – Loss: 1.4804, Val Accuracy: 75.56%
Saved best FP32 model with Val Accuracy: 75.56%


Training Epoch 68/100: 100%|██████████| 352/352 [00:20<00:00, 17.60it/s]
                                                           

Epoch 68/100 – Loss: 1.4610, Val Accuracy: 75.52%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 69/100: 100%|██████████| 352/352 [00:20<00:00, 17.53it/s]
                                                           

Epoch 69/100 – Loss: 1.3772, Val Accuracy: 75.58%
Saved best FP32 model with Val Accuracy: 75.58%


Training Epoch 70/100: 100%|██████████| 352/352 [00:19<00:00, 17.69it/s]
                                                           

Epoch 70/100 – Loss: 1.4701, Val Accuracy: 75.70%
Saved best FP32 model with Val Accuracy: 75.70%


Training Epoch 71/100: 100%|██████████| 352/352 [00:20<00:00, 17.55it/s]
                                                           

Epoch 71/100 – Loss: 1.4021, Val Accuracy: 76.56%
Saved best FP32 model with Val Accuracy: 76.56%


Training Epoch 72/100: 100%|██████████| 352/352 [00:19<00:00, 17.70it/s]
                                                           

Epoch 72/100 – Loss: 1.4367, Val Accuracy: 75.76%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 73/100: 100%|██████████| 352/352 [00:20<00:00, 17.56it/s]
                                                           

Epoch 73/100 – Loss: 1.5039, Val Accuracy: 75.96%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 74/100: 100%|██████████| 352/352 [00:19<00:00, 17.65it/s]
                                                           

Epoch 74/100 – Loss: 1.4195, Val Accuracy: 75.74%
No improvement in validation accuracy. Patience counter: 3/20


Training Epoch 75/100: 100%|██████████| 352/352 [00:19<00:00, 17.67it/s]
                                                           

Epoch 75/100 – Loss: 1.4329, Val Accuracy: 76.10%
No improvement in validation accuracy. Patience counter: 4/20


Training Epoch 76/100: 100%|██████████| 352/352 [00:20<00:00, 17.51it/s]
                                                           

Epoch 76/100 – Loss: 1.4015, Val Accuracy: 75.46%
No improvement in validation accuracy. Patience counter: 5/20


Training Epoch 77/100: 100%|██████████| 352/352 [00:19<00:00, 17.62it/s]
                                                           

Epoch 77/100 – Loss: 1.3718, Val Accuracy: 75.68%
No improvement in validation accuracy. Patience counter: 6/20


Training Epoch 78/100: 100%|██████████| 352/352 [00:20<00:00, 17.48it/s]
                                                           

Epoch 78/100 – Loss: 1.3673, Val Accuracy: 75.88%
No improvement in validation accuracy. Patience counter: 7/20


Training Epoch 79/100: 100%|██████████| 352/352 [00:19<00:00, 17.64it/s]
                                                           

Epoch 79/100 – Loss: 1.4057, Val Accuracy: 75.84%
No improvement in validation accuracy. Patience counter: 8/20


Training Epoch 80/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 80/100 – Loss: 1.4047, Val Accuracy: 76.16%
No improvement in validation accuracy. Patience counter: 9/20


Training Epoch 81/100: 100%|██████████| 352/352 [00:20<00:00, 17.59it/s]
                                                           

Epoch 81/100 – Loss: 1.3467, Val Accuracy: 75.86%
No improvement in validation accuracy. Patience counter: 10/20


Training Epoch 82/100: 100%|██████████| 352/352 [00:20<00:00, 17.49it/s]
                                                           

Epoch 82/100 – Loss: 1.3968, Val Accuracy: 76.14%
No improvement in validation accuracy. Patience counter: 11/20


Training Epoch 83/100: 100%|██████████| 352/352 [00:19<00:00, 17.69it/s]
                                                           

Epoch 83/100 – Loss: 1.4262, Val Accuracy: 75.52%
No improvement in validation accuracy. Patience counter: 12/20


Training Epoch 84/100: 100%|██████████| 352/352 [00:19<00:00, 17.67it/s]
                                                           

Epoch 84/100 – Loss: 1.4056, Val Accuracy: 76.40%
No improvement in validation accuracy. Patience counter: 13/20


Training Epoch 85/100: 100%|██████████| 352/352 [00:20<00:00, 17.56it/s]
                                                           

Epoch 85/100 – Loss: 1.3675, Val Accuracy: 76.24%
No improvement in validation accuracy. Patience counter: 14/20


Training Epoch 86/100: 100%|██████████| 352/352 [00:19<00:00, 17.69it/s]
                                                           

Epoch 86/100 – Loss: 1.3922, Val Accuracy: 75.70%
No improvement in validation accuracy. Patience counter: 15/20


Training Epoch 87/100: 100%|██████████| 352/352 [00:20<00:00, 17.54it/s]
                                                           

Epoch 87/100 – Loss: 1.3710, Val Accuracy: 76.64%
Saved best FP32 model with Val Accuracy: 76.64%


Training Epoch 88/100: 100%|██████████| 352/352 [00:19<00:00, 17.65it/s]
                                                           

Epoch 88/100 – Loss: 1.2740, Val Accuracy: 76.58%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 89/100: 100%|██████████| 352/352 [00:20<00:00, 17.52it/s]
                                                           

Epoch 89/100 – Loss: 1.3365, Val Accuracy: 76.62%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 90/100: 100%|██████████| 352/352 [00:20<00:00, 17.58it/s]
                                                           

Epoch 90/100 – Loss: 1.2620, Val Accuracy: 76.20%
No improvement in validation accuracy. Patience counter: 3/20


Training Epoch 91/100: 100%|██████████| 352/352 [00:19<00:00, 17.65it/s]
                                                           

Epoch 91/100 – Loss: 1.3411, Val Accuracy: 76.22%
No improvement in validation accuracy. Patience counter: 4/20


Training Epoch 92/100: 100%|██████████| 352/352 [00:20<00:00, 17.56it/s]
                                                           

Epoch 92/100 – Loss: 1.4517, Val Accuracy: 76.86%
Saved best FP32 model with Val Accuracy: 76.86%


Training Epoch 93/100: 100%|██████████| 352/352 [00:19<00:00, 17.69it/s]
                                                           

Epoch 93/100 – Loss: 1.3439, Val Accuracy: 76.76%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 94/100: 100%|██████████| 352/352 [00:20<00:00, 17.53it/s]
                                                           

Epoch 94/100 – Loss: 1.3202, Val Accuracy: 76.48%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 95/100: 100%|██████████| 352/352 [00:19<00:00, 17.67it/s]
                                                           

Epoch 95/100 – Loss: 1.2882, Val Accuracy: 76.30%
No improvement in validation accuracy. Patience counter: 3/20


Training Epoch 96/100: 100%|██████████| 352/352 [00:20<00:00, 17.50it/s]
                                                           

Epoch 96/100 – Loss: 1.3615, Val Accuracy: 75.88%
No improvement in validation accuracy. Patience counter: 4/20


Training Epoch 97/100: 100%|██████████| 352/352 [00:19<00:00, 17.66it/s]
                                                           

Epoch 97/100 – Loss: 1.3480, Val Accuracy: 77.30%
Saved best FP32 model with Val Accuracy: 77.30%


Training Epoch 98/100: 100%|██████████| 352/352 [00:20<00:00, 17.55it/s]
                                                           

Epoch 98/100 – Loss: 1.4152, Val Accuracy: 75.94%
No improvement in validation accuracy. Patience counter: 1/20


Training Epoch 99/100: 100%|██████████| 352/352 [00:19<00:00, 17.66it/s]
                                                           

Epoch 99/100 – Loss: 1.3812, Val Accuracy: 76.02%
No improvement in validation accuracy. Patience counter: 2/20


Training Epoch 100/100: 100%|██████████| 352/352 [00:20<00:00, 17.58it/s]
                                                           

Epoch 100/100 – Loss: 1.3134, Val Accuracy: 76.42%
No improvement in validation accuracy. Patience counter: 3/20


                                                                 


Best FP32 Model – Test Accuracy: 76.69%
Average FP32 Inference Latency (CPU): 267.59 ms/batch




# ENT KD

In [13]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.quantization import (
    prepare_qat,
    convert,
    enable_observer,
    disable_observer,
    enable_fake_quant,
    disable_fake_quant,
)
from torchvision.models.quantization import resnet50
import numpy as np
from tqdm import tqdm

torch.backends.quantized.engine = 'fbgemm'



class EntKDLoss(nn.Module):
    def __init__(self, base_temperature=3.0, alpha=0.7, beta=0.1):
        super(EntKDLoss, self).__init__()
        self.base_temperature = base_temperature
        self.alpha = alpha
        self.beta = beta
        self.kld_loss = nn.KLDivLoss(reduction='batchmean')

    def forward(self, student_outputs, teacher_outputs, targets_a, targets_b, lam):
        # Compute teacher entropy
        teacher_probs = F.softmax(teacher_outputs, dim=1)
        entropy = -torch.sum(teacher_probs * torch.log(teacher_probs + 1e-10), dim=1).mean()
        
        # Dynamic temperature
        temperature = self.base_temperature / (1 + self.beta * entropy)
        temperature = torch.clamp(temperature, min=1.0, max=10.0)
        
        # Compute mixed CE loss
        log_prob = F.log_softmax(student_outputs, dim=1)
        ce_loss = - (lam * log_prob[torch.arange(student_outputs.size(0)), targets_a] + 
                     (1 - lam) * log_prob[torch.arange(student_outputs.size(0)), targets_b]).mean()
        
        # Compute KLD loss
        kld = self.kld_loss(
            F.log_softmax(student_outputs / temperature, dim=1),
            F.softmax(teacher_outputs / temperature, dim=1)
        ) * (temperature ** 2)
        
        return self.alpha * ce_loss + (1 - self.alpha) * kld

def main():
    train_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    assert train_device.type == 'cuda', "GPU not available for training/validation"
    test_device = torch.device('cpu')

    train_loader, val_loader, test_loader, train_idx = get_cifar100_data_loaders()

    # Load pre-trained FP32 teacher model
    teacher_model = resnet50(pretrained=False, num_classes=100)
    teacher_model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=0, bias=False)
    teacher_model.maxpool = nn.Identity()
    teacher_model.load_state_dict(torch.load('best_fp32_resnet50_cifar100_32x32_100_cutmix.pth'))
    teacher_model.to(train_device)
    teacher_model.eval()

    # Initialize QAT student model
    student_model = resnet50(pretrained=False, quantize=False, num_classes=100)
    student_model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=0, bias=False)
    student_model.maxpool = nn.Identity()
    student_model.fuse_model()
    student_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
    prepare_qat(student_model, inplace=True)
    student_model.to(train_device)

    # Calibration
    student_model.eval()
    disable_fake_quant(student_model)
    enable_observer(student_model)
    with torch.no_grad():
        for i, (imgs, _) in enumerate(tqdm(train_loader, desc="Calibrating", leave=False)):
            if i >= 200:
                break
            imgs = imgs.to(train_device)
            student_model(imgs)
    disable_observer(student_model)
    enable_fake_quant(student_model)

    optimizer = optim.AdamW(student_model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
    criterion = EntKDLoss(base_temperature=3.0, alpha=0.3, beta=0.1)
    best_val_acc = 0.0
    best_model_path = 'qat_entkd_cutmix_resnet50_cifar100_float_32x32_100.pth'
    epochs = 100
    patience = 20
    patience_counter = 0

    for epoch in range(epochs):
        student_model.train()
        enable_fake_quant(student_model)
        running_loss = 0.0
        for imgs, targets in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{epochs}", leave=False):
            imgs, targets = imgs.to(train_device), targets.to(train_device)
            if np.random.rand() < 0.5:  # Apply CutMix with 50% probability
                mixed_imgs, targets_a, targets_b, lam = cutmix(imgs, targets)
            else:
                mixed_imgs = imgs
                targets_a = targets
                targets_b = targets
                lam = 1.0
            optimizer.zero_grad()
            with torch.no_grad():
                teacher_outputs = teacher_model(mixed_imgs)
            student_outputs = student_model(mixed_imgs)
            loss = criterion(student_outputs, teacher_outputs, targets_a, targets_b, lam)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * imgs.size(0)

        val_acc, _ = evaluate_accuracy_and_latency(student_model, val_loader, train_device, desc="Validating")
        print(f"Epoch {epoch+1}/{epochs} – Loss: {running_loss/len(train_idx):.4f}, ValAcc: {val_acc:.2f}%")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(student_model.state_dict(), best_model_path)
            print(f"Saved best QAT+EntKD+CutMix model with ValAcc: {best_val_acc:.2f}%")
            patience_counter = 0
        else:
            patience_counter += 1
            print(f"No improvement in validation accuracy. Patience counter: {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break

        scheduler.step()

    student_model.load_state_dict(torch.load(best_model_path))
    fake_quant_acc, fake_quant_latency = evaluate_accuracy_and_latency(student_model, test_loader, train_device, desc="Testing Fake-Quantized")
    print(f"\nBest Fake-Quantized Model – Test Accuracy: {fake_quant_acc:.2f}%")
    print(f"Average Fake-Quantized Inference Latency (GPU): {fake_quant_latency:.2f} ms/batch")

    student_model.cpu()
    student_model.eval()
    quantized_model = convert(student_model, inplace=False)

    int8_acc, int8_latency = evaluate_accuracy_and_latency(quantized_model, test_loader, test_device, desc="Testing INT8")
    print(f"Best INT8 Model – Test Accuracy: {int8_acc:.2f}%")
    print(f"Average INT8 Inference Latency (CPU): {int8_latency:.2f} ms/batch")

    torch.save(quantized_model.state_dict(), 'qat_entkd_cutmix_resnet50_cifar100_int8_final_32x32_100.pth')

if __name__ == '__main__':
    main()

Files already downloaded and verified
Files already downloaded and verified


                                                                       

Epoch 1/100 – Loss: 2.1634, ValAcc: 11.82%
Saved best QAT+EntKD+CutMix model with ValAcc: 11.82%


                                                                       

Epoch 2/100 – Loss: 1.9454, ValAcc: 19.42%
Saved best QAT+EntKD+CutMix model with ValAcc: 19.42%


                                                                       

Epoch 3/100 – Loss: 1.8255, ValAcc: 29.62%
Saved best QAT+EntKD+CutMix model with ValAcc: 29.62%


                                                                       

Epoch 4/100 – Loss: 1.6762, ValAcc: 34.98%
Saved best QAT+EntKD+CutMix model with ValAcc: 34.98%


                                                                       

Epoch 5/100 – Loss: 1.5531, ValAcc: 41.04%
Saved best QAT+EntKD+CutMix model with ValAcc: 41.04%


                                                                       

Epoch 6/100 – Loss: 1.4415, ValAcc: 45.50%
Saved best QAT+EntKD+CutMix model with ValAcc: 45.50%


                                                                       

Epoch 7/100 – Loss: 1.3490, ValAcc: 48.92%
Saved best QAT+EntKD+CutMix model with ValAcc: 48.92%


                                                                       

Epoch 8/100 – Loss: 1.2727, ValAcc: 53.22%
Saved best QAT+EntKD+CutMix model with ValAcc: 53.22%


                                                                       

Epoch 9/100 – Loss: 1.2166, ValAcc: 51.28%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 10/100 – Loss: 1.1828, ValAcc: 51.52%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 11/100 – Loss: 1.1338, ValAcc: 58.92%
Saved best QAT+EntKD+CutMix model with ValAcc: 58.92%


                                                                        

Epoch 12/100 – Loss: 1.0646, ValAcc: 60.48%
Saved best QAT+EntKD+CutMix model with ValAcc: 60.48%


                                                                        

Epoch 13/100 – Loss: 1.0432, ValAcc: 61.74%
Saved best QAT+EntKD+CutMix model with ValAcc: 61.74%


                                                                        

Epoch 14/100 – Loss: 1.0058, ValAcc: 59.56%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 15/100 – Loss: 0.9595, ValAcc: 63.90%
Saved best QAT+EntKD+CutMix model with ValAcc: 63.90%


                                                                        

Epoch 16/100 – Loss: 0.9073, ValAcc: 63.90%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 17/100 – Loss: 0.8670, ValAcc: 65.94%
Saved best QAT+EntKD+CutMix model with ValAcc: 65.94%


                                                                        

Epoch 18/100 – Loss: 0.8475, ValAcc: 67.20%
Saved best QAT+EntKD+CutMix model with ValAcc: 67.20%


                                                                        

Epoch 19/100 – Loss: 0.8327, ValAcc: 67.84%
Saved best QAT+EntKD+CutMix model with ValAcc: 67.84%


                                                                        

Epoch 20/100 – Loss: 0.7982, ValAcc: 68.66%
Saved best QAT+EntKD+CutMix model with ValAcc: 68.66%


                                                                        

Epoch 21/100 – Loss: 0.7873, ValAcc: 68.60%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 22/100 – Loss: 0.7491, ValAcc: 68.22%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 23/100 – Loss: 0.7239, ValAcc: 69.84%
Saved best QAT+EntKD+CutMix model with ValAcc: 69.84%


                                                                        

Epoch 24/100 – Loss: 0.7481, ValAcc: 70.12%
Saved best QAT+EntKD+CutMix model with ValAcc: 70.12%


                                                                        

Epoch 25/100 – Loss: 0.6945, ValAcc: 70.54%
Saved best QAT+EntKD+CutMix model with ValAcc: 70.54%


                                                                        

Epoch 26/100 – Loss: 0.6890, ValAcc: 70.58%
Saved best QAT+EntKD+CutMix model with ValAcc: 70.58%


                                                                        

Epoch 27/100 – Loss: 0.6365, ValAcc: 71.72%
Saved best QAT+EntKD+CutMix model with ValAcc: 71.72%


                                                                        

Epoch 28/100 – Loss: 0.5840, ValAcc: 71.82%
Saved best QAT+EntKD+CutMix model with ValAcc: 71.82%


                                                                        

Epoch 29/100 – Loss: 0.6108, ValAcc: 71.98%
Saved best QAT+EntKD+CutMix model with ValAcc: 71.98%


                                                                        

Epoch 30/100 – Loss: 0.5594, ValAcc: 72.88%
Saved best QAT+EntKD+CutMix model with ValAcc: 72.88%


                                                                        

Epoch 31/100 – Loss: 0.5863, ValAcc: 72.88%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 32/100 – Loss: 0.5770, ValAcc: 72.46%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 33/100 – Loss: 0.5606, ValAcc: 72.84%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 34/100 – Loss: 0.5372, ValAcc: 73.40%
Saved best QAT+EntKD+CutMix model with ValAcc: 73.40%


                                                                        

Epoch 35/100 – Loss: 0.5441, ValAcc: 73.68%
Saved best QAT+EntKD+CutMix model with ValAcc: 73.68%


                                                                        

Epoch 36/100 – Loss: 0.5087, ValAcc: 73.74%
Saved best QAT+EntKD+CutMix model with ValAcc: 73.74%


                                                                        

Epoch 37/100 – Loss: 0.4867, ValAcc: 74.38%
Saved best QAT+EntKD+CutMix model with ValAcc: 74.38%


                                                                        

Epoch 38/100 – Loss: 0.4807, ValAcc: 74.06%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 39/100 – Loss: 0.4879, ValAcc: 73.18%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 40/100 – Loss: 0.4658, ValAcc: 74.46%
Saved best QAT+EntKD+CutMix model with ValAcc: 74.46%


                                                                        

Epoch 41/100 – Loss: 0.4822, ValAcc: 74.12%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 42/100 – Loss: 0.4695, ValAcc: 74.62%
Saved best QAT+EntKD+CutMix model with ValAcc: 74.62%


                                                                        

Epoch 43/100 – Loss: 0.4716, ValAcc: 74.16%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 44/100 – Loss: 0.4340, ValAcc: 74.60%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 45/100 – Loss: 0.4572, ValAcc: 74.96%
Saved best QAT+EntKD+CutMix model with ValAcc: 74.96%


                                                                        

Epoch 46/100 – Loss: 0.4327, ValAcc: 75.46%
Saved best QAT+EntKD+CutMix model with ValAcc: 75.46%


                                                                        

Epoch 47/100 – Loss: 0.4145, ValAcc: 75.04%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 48/100 – Loss: 0.4294, ValAcc: 75.76%
Saved best QAT+EntKD+CutMix model with ValAcc: 75.76%


                                                                        

Epoch 49/100 – Loss: 0.4052, ValAcc: 75.14%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 50/100 – Loss: 0.4308, ValAcc: 74.22%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 51/100 – Loss: 0.3909, ValAcc: 75.26%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 52/100 – Loss: 0.4020, ValAcc: 75.96%
Saved best QAT+EntKD+CutMix model with ValAcc: 75.96%


                                                                        

Epoch 53/100 – Loss: 0.3992, ValAcc: 76.28%
Saved best QAT+EntKD+CutMix model with ValAcc: 76.28%


                                                                        

Epoch 54/100 – Loss: 0.3901, ValAcc: 75.60%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 55/100 – Loss: 0.3626, ValAcc: 76.10%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 56/100 – Loss: 0.3796, ValAcc: 76.54%
Saved best QAT+EntKD+CutMix model with ValAcc: 76.54%


                                                                        

Epoch 57/100 – Loss: 0.3587, ValAcc: 75.90%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 58/100 – Loss: 0.3666, ValAcc: 76.40%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 59/100 – Loss: 0.3796, ValAcc: 76.34%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 60/100 – Loss: 0.3514, ValAcc: 76.92%
Saved best QAT+EntKD+CutMix model with ValAcc: 76.92%


                                                                        

Epoch 61/100 – Loss: 0.3817, ValAcc: 76.72%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 62/100 – Loss: 0.3593, ValAcc: 75.82%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 63/100 – Loss: 0.3472, ValAcc: 76.68%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 64/100 – Loss: 0.3608, ValAcc: 76.72%
No improvement in validation accuracy. Patience counter: 4/20


                                                                        

Epoch 65/100 – Loss: 0.3477, ValAcc: 77.36%
Saved best QAT+EntKD+CutMix model with ValAcc: 77.36%


                                                                        

Epoch 66/100 – Loss: 0.3428, ValAcc: 76.60%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 67/100 – Loss: 0.3628, ValAcc: 76.60%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 68/100 – Loss: 0.3426, ValAcc: 76.72%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 69/100 – Loss: 0.3524, ValAcc: 77.30%
No improvement in validation accuracy. Patience counter: 4/20


                                                                        

Epoch 70/100 – Loss: 0.3522, ValAcc: 77.68%
Saved best QAT+EntKD+CutMix model with ValAcc: 77.68%


                                                                        

Epoch 71/100 – Loss: 0.3498, ValAcc: 77.00%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 72/100 – Loss: 0.3200, ValAcc: 76.70%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 73/100 – Loss: 0.3417, ValAcc: 76.98%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 74/100 – Loss: 0.2999, ValAcc: 77.42%
No improvement in validation accuracy. Patience counter: 4/20


                                                                        

Epoch 75/100 – Loss: 0.3706, ValAcc: 77.50%
No improvement in validation accuracy. Patience counter: 5/20


                                                                        

Epoch 76/100 – Loss: 0.3228, ValAcc: 76.86%
No improvement in validation accuracy. Patience counter: 6/20


                                                                        

Epoch 77/100 – Loss: 0.3171, ValAcc: 76.72%
No improvement in validation accuracy. Patience counter: 7/20


                                                                        

Epoch 78/100 – Loss: 0.3251, ValAcc: 77.64%
No improvement in validation accuracy. Patience counter: 8/20


                                                                        

Epoch 79/100 – Loss: 0.3370, ValAcc: 77.76%
Saved best QAT+EntKD+CutMix model with ValAcc: 77.76%


                                                                        

Epoch 80/100 – Loss: 0.3237, ValAcc: 77.48%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 81/100 – Loss: 0.3521, ValAcc: 77.26%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 82/100 – Loss: 0.3238, ValAcc: 77.78%
Saved best QAT+EntKD+CutMix model with ValAcc: 77.78%


                                                                        

Epoch 83/100 – Loss: 0.3220, ValAcc: 77.96%
Saved best QAT+EntKD+CutMix model with ValAcc: 77.96%


                                                                        

Epoch 84/100 – Loss: 0.3514, ValAcc: 78.26%
Saved best QAT+EntKD+CutMix model with ValAcc: 78.26%


                                                                        

Epoch 85/100 – Loss: 0.3279, ValAcc: 77.60%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 86/100 – Loss: 0.3237, ValAcc: 77.64%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 87/100 – Loss: 0.3130, ValAcc: 77.50%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 88/100 – Loss: 0.3291, ValAcc: 78.20%
No improvement in validation accuracy. Patience counter: 4/20


                                                                        

Epoch 89/100 – Loss: 0.3392, ValAcc: 78.08%
No improvement in validation accuracy. Patience counter: 5/20


                                                                        

Epoch 90/100 – Loss: 0.3434, ValAcc: 77.86%
No improvement in validation accuracy. Patience counter: 6/20


                                                                        

Epoch 91/100 – Loss: 0.3270, ValAcc: 77.98%
No improvement in validation accuracy. Patience counter: 7/20


                                                                        

Epoch 92/100 – Loss: 0.3201, ValAcc: 77.66%
No improvement in validation accuracy. Patience counter: 8/20


                                                                        

Epoch 93/100 – Loss: 0.3225, ValAcc: 78.32%
Saved best QAT+EntKD+CutMix model with ValAcc: 78.32%


                                                                        

Epoch 94/100 – Loss: 0.3051, ValAcc: 78.00%
No improvement in validation accuracy. Patience counter: 1/20


                                                                        

Epoch 95/100 – Loss: 0.3199, ValAcc: 77.64%
No improvement in validation accuracy. Patience counter: 2/20


                                                                        

Epoch 96/100 – Loss: 0.3236, ValAcc: 77.84%
No improvement in validation accuracy. Patience counter: 3/20


                                                                        

Epoch 97/100 – Loss: 0.3137, ValAcc: 78.28%
No improvement in validation accuracy. Patience counter: 4/20


                                                                        

Epoch 98/100 – Loss: 0.3102, ValAcc: 77.72%
No improvement in validation accuracy. Patience counter: 5/20


                                                                        

Epoch 99/100 – Loss: 0.3395, ValAcc: 77.78%
No improvement in validation accuracy. Patience counter: 6/20


                                                                         

Epoch 100/100 – Loss: 0.3269, ValAcc: 78.00%
No improvement in validation accuracy. Patience counter: 7/20


                                                                           


Best Fake-Quantized Model – Test Accuracy: 78.42%
Average Fake-Quantized Inference Latency (GPU): 25.87 ms/batch


                                                                 

Best INT8 Model – Test Accuracy: 78.40%
Average INT8 Inference Latency (CPU): 130.78 ms/batch
