This model got 0.86851

In [None]:
# model.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

# ------------------------------------------------------------------------
#                       Squeeze-and-Excitation (SE) Block
# ------------------------------------------------------------------------
class SEBlock(nn.Module):
    """
    A Squeeze-and-Excitation Block to recalibrate channel-wise feature responses.
    """
    def __init__(self, channels, reduction=16):
        super().__init__()
        # First fully connected layer to reduce the dimensionality
        self.fc1 = nn.Linear(channels, channels // reduction)
        # Second fully connected layer to restore the dimensionality
        self.fc2 = nn.Linear(channels // reduction, channels)

    def forward(self, x):
        b, c, _, _ = x.size()  # Batch size, channels, height, width
        # Squeeze: global spatial average pooling
        squeeze = x.view(b, c, -1).mean(dim=2)
        # Excitation: two fully connected layers with ReLU and Sigmoid activations
        excitation = F.relu(self.fc1(squeeze))
        excitation = torch.sigmoid(self.fc2(excitation)).view(b, c, 1, 1)
        # Apply the excitation to the input (recalibrate the features)
        return x * excitation

# ------------------------------------------------------------------------
#                       Stochastic Depth
# ------------------------------------------------------------------------
class StochasticDepth(nn.Module):
    """
    Implements stochastic depth, randomly dropping residual connections.
    """
    def __init__(self, p: float = 0.1):
        super().__init__()
        self.p = p  # Probability of dropping the residual branch

    def forward(self, x, residual):
        # If not training or if probability is 0, return the sum of x and residual
        if not self.training or self.p == 0.0:
            return x + residual
        # With probability p, drop the residual connection
        if torch.rand(1).item() < self.p:
            return x
        else:
            return x + residual

# ------------------------------------------------------------------------
#       PreAct Residual Block with SE and Stochastic Depth
# ------------------------------------------------------------------------
class PreActBlock(nn.Module):
    """
    A Pre-Activation Residual Block with Squeeze-and-Excitation and Stochastic Depth.
    """
    def __init__(self, in_planes, out_planes, stride=1, drop_prob=0.0):
        super().__init__()
        # Batch Normalization layer for input
        self.bn1 = nn.BatchNorm2d(in_planes)
        # Squeeze-and-Excitation Block
        self.se = SEBlock(in_planes)
        # First convolution layer with batch normalization
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        # Second convolution layer with batch normalization
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3,
                               stride=1, padding=1, bias=False)

        # Shortcut path for the residual connection
        self.shortcut = None
        # If stride is not 1 or in_planes is not equal to out_planes, apply a 1x1 convolution
        if stride != 1 or in_planes != out_planes:
            self.shortcut = nn.Conv2d(in_planes, out_planes, kernel_size=1,
                                      stride=stride, padding=0, bias=False)

        # Stochastic Depth probability
        self.sd = StochasticDepth(p=drop_prob)

    def forward(self, x):
        # Apply batch normalization and ReLU activation to input
        out = F.relu(self.bn1(x))
        # Apply Squeeze-and-Excitation on pre-activated features
        out = self.se(out)
        # Compute the shortcut connection (residual)
        shortcut = x if self.shortcut is None else self.shortcut(out)
        # First convolution operation
        out = self.conv1(out)
        # Apply batch normalization and ReLU activation
        out = F.relu(self.bn2(out))
        # Second convolution operation
        out = self.conv2(out)
        # Apply Stochastic Depth to the output
        return self.sd(shortcut, out)

# ------------------------------------------------------------------------
#       Modified LightResNet for CIFAR-10 with ~4.4M parameters
#       (Channels: 40, 80, 160, 320)
# ------------------------------------------------------------------------
class LightResNet18_v2(nn.Module):
    """
    A modified version of ResNet18 tailored for CIFAR-10 with lightweight design
    and squeeze-and-excitation (SE) blocks, stochastic depth, and batch normalization.
    """
    def __init__(self, num_classes=10, drop_prob=0.1):
        super().__init__()
        # Increase the base channel width to 40 (instead of 32)
        self.in_planes = 40
        # Initial convolution layer with 40 channels
        self.conv1 = nn.Conv2d(3, 40, kernel_size=3, stride=1,
                               padding=1, bias=False)

        # Four layers with 2 blocks each; channels increase as 40 -> 80 -> 160 -> 320.
        # Drop probability increases across layers
        self.layer1 = self._make_layer(40, 2, stride=1, base_p=drop_prob * 1/4)
        self.layer2 = self._make_layer(80, 2, stride=2, base_p=drop_prob * 2/4)
        self.layer3 = self._make_layer(160, 2, stride=2, base_p=drop_prob * 3/4)
        self.layer4 = self._make_layer(320, 2, stride=2, base_p=drop_prob * 4/4)

        # Batch normalization layer for final output
        self.bn = nn.BatchNorm2d(320)
        # Fully connected layer for classification
        self.linear = nn.Linear(320, num_classes)

        # Weight initialization for better convergence
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, out_planes, blocks, stride, base_p):
        """
        Helper function to create a sequential layer of PreActBlocks.
        """
        strides = [stride] + [1] * (blocks - 1)  # Adjust stride for the first block
        layers = []
        for i in range(blocks):
            # Increase dropout probability linearly across blocks
            block_p = base_p * (i + 1) / blocks
            # Append PreActBlock to layers
            layers.append(PreActBlock(self.in_planes, out_planes,
                                      stride=strides[i],
                                      drop_prob=block_p))
            self.in_planes = out_planes  # Update the in_planes for the next block
        return nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward pass through the network.
        """
        out = self.conv1(x)  # Initial convolution
        out = self.layer1(out)  # First layer block
        out = self.layer2(out)  # Second layer block
        out = self.layer3(out)  # Third layer block
        out = self.layer4(out)  # Fourth layer block
        out = F.relu(self.bn(out))  # Apply batch normalization and ReLU
        out = F.adaptive_avg_pool2d(out, 1).view(out.size(0), -1)  # Adaptive average pooling
        out = self.linear(out)  # Final fully connected layer
        return out

# Quick parameter check
if __name__ == "__main__":
    model = LightResNet18_v2(num_classes=10, drop_prob=0.1)
    # Sample input tensor with batch size of 2 and 32x32 image
    x = torch.randn(2, 3, 32, 32)
    y = model(x)
    print("Output shape:", y.shape)  # Print the output shape
    # Calculate total number of trainable parameters
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total trainable parameters: {total_params:,} (~{total_params/1e6:.2f}M)")


Output shape: torch.Size([2, 10])
Total trainable parameters: 4,389,826 (~4.39M)


In [None]:
"""
Training script for LightResNet18 model on CIFAR-10 with MixUp and CutMix augmentation techniques.
Includes custom data loading, advanced augmentation strategies, and cosine learning rate scheduling.
"""

# train.py
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import datasets
import numpy as np
import pickle
from torch.utils.data import DataLoader, Dataset
from PIL import Image
# from model import LightResNet18  # Uncomment when actual model is available
from tqdm import tqdm

# Custom collate function for MixUp/CutMix data augmentation
def rand_bbox(size, lam):
    """Generate random bounding box coordinates for CutMix"""
    W = size[3]  # Image width
    H = size[2]  # Image height
    cut_rat = np.sqrt(1. - lam)  # Calculate cut ratio from lambda
    cut_w = int(W * cut_rat)     # Calculate cut width
    cut_h = int(H * cut_rat)     # Calculate cut height

    # Center coordinates for the cut region
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    # Calculate bounding box coordinates with clipping to image dimensions
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    return bbx1, bby1, bbx2, bby2

def mixup_data(x, y, alpha=1.0):
    """Applies MixUp augmentation to batch of images and labels"""
    lam = np.random.beta(alpha, alpha)  # Mix ratio from Beta distribution
    batch_size = x.size(0)
    index = torch.randperm(batch_size)  # Random permutation of indices
    mixed_x = lam * x + (1 - lam) * x[index, :]  # Mix images
    y_a, y_b = y, y[index]  # Get corresponding labels
    return mixed_x, y_a, y_b, lam

def cutmix_data(x, y, alpha=1.0):
    """Applies CutMix augmentation to batch of images and labels"""
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size(0)
    index = torch.randperm(batch_size)
    # Get bounding box coordinates
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    # Replace selected region with another image's region
    x[:, :, bby1:bby2, bbx1:bbx2] = x[index, :, bby1:bby2, bbx1:bbx2]
    # Adjust lambda to match exact area ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size(-1) * x.size(-2)))
    y_a, y_b = y, y[index]
    return x, y_a, y_b, lam

def mixup_cutmix_collate(batch, alpha=1.0, p=0.5):
    """Custom collate function that randomly applies MixUp or CutMix"""
    # Unpack batch and convert to tensors
    images, labels = list(zip(*batch))
    images = torch.stack(images, 0)
    labels = torch.tensor(labels, dtype=torch.long)

    # Randomly choose between MixUp and CutMix based on probability p
    if np.random.rand() < p:
        # Apply MixUp augmentation
        mixed_x, y_a, y_b, lam = mixup_data(images, labels, alpha)
        return mixed_x, (y_a, y_b, lam, 'mixup')
    else:
        # Apply CutMix augmentation
        cutmix_x, y_a, y_b, lam = cutmix_data(images, labels, alpha)
        return cutmix_x, (y_a, y_b, lam, 'cutmix')

def unpickle(file):
    """Helper function to load CIFAR-10 pickled data"""
    with open(file, 'rb') as fo:
        data_dict = pickle.load(fo, encoding='bytes')
    return data_dict

class CIFARDataset(Dataset):
    """Custom Dataset class for CIFAR-10 with preprocessing support"""
    def __init__(self, data, labels, transform=None):
        self.data = data.reshape(-1, 3, 32, 32).astype("float32") / 255.0
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Convert numpy array to PIL Image with proper shape and scaling
        img = self.data[idx]*255
        img = img.astype("uint8")
        img = Image.fromarray(img.transpose(1, 2, 0))  # Change from CHW to HWC
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

def load_cifar10_batches(root_dir):
    """Loads and concatenates all CIFAR-10 training batches"""
    data_list, labels_list = [], []
    for i in range(1, 6):
        batch_file = f"{root_dir}/data_batch_{i}"
        batch = unpickle(batch_file)
        data_list.append(batch[b'data'])
        labels_list.extend(batch[b'labels'])
    X = np.concatenate(data_list, axis=0)
    y = np.array(labels_list)
    return X, y

def train():
    """Main training function with all training components"""
    # Set device configuration
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    # Data loading and preprocessing
    # Note: Original custom loading code is commented out in favor of torchvision's implementation
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandAugment(),  # Using RandAugment for strong data augmentation
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),  # CIFAR-10 mean/std
                             (0.2470, 0.2435, 0.2616))
    ])

    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2470, 0.2435, 0.2616))
    ])

    # Using torchvision's built-in CIFAR-10 dataset instead of custom loader
    train_dataset = datasets.CIFAR10(root='./data', train=True,
                                   download=True, transform=transform_train)
    val_dataset = datasets.CIFAR10(root='./data', train=False,
                                  download=True, transform=transform_val)

    # Create data loaders with mixed augmentation for training
    train_loader = DataLoader(
        train_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=4,
        collate_fn=lambda b: mixup_cutmix_collate(b, alpha=1.0, p=0.5)  # 50% chance for MixUp/CutMix
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=128,
        shuffle=False,
        num_workers=4
    )

    # Model initialization
    model = LightResNet18_v2(num_classes=10, drop_prob=0.1).to(device)  # Assuming model definition exists

    # Training configuration
    base_criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Regularization through label smoothing
    optimizer = optim.SGD(
        model.parameters(),
        lr=0.1,
        momentum=0.9,
        nesterov=True,
        weight_decay=5e-4  # L2 regularization
    )
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=300)  # Cosine learning rate decay

    best_acc = 0.0
    num_epochs = 300  # Total training epochs

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_approx = 0.0
        total_approx = 0

        train_pbar = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]", leave=False)
        for images, label_info in train_pbar:
            # Unpack augmentation information
            y_a, y_b, lam, method = label_info
            images = images.to(device)
            y_a, y_b = y_a.to(device), y_b.to(device)

            # Forward pass and loss calculation
            optimizer.zero_grad()
            outputs = model(images)

            # Mixed loss calculation based on augmentation method
            loss = lam * base_criterion(outputs, y_a) + (1 - lam) * base_criterion(outputs, y_b)
            loss.backward()
            optimizer.step()

            # Track metrics
            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            # Approximate accuracy accounting for mixed labels
            correct_approx += (preds == y_a).sum().item() * lam + (preds == y_b).sum().item() * (1 - lam)
            total_approx += images.size(0)

            train_pbar.set_postfix(loss=f"{loss.item():.3f}")

        # Update learning rate
        scheduler.step()

        # Calculate epoch metrics
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = 100.0 * correct_approx / total_approx

        # Validation phase
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                _, pred = torch.max(outputs, 1)
                val_correct += (pred == labels).sum().item()
                val_total += labels.size(0)
        val_acc = 100.0 * val_correct / val_total

        # Print epoch summary
        print(f"Epoch {epoch+1}/{num_epochs} | "
              f"Train Loss: {epoch_loss:.4f}, Approx Train Acc: {epoch_acc:.2f}% | "
              f"Val Acc: {val_acc:.2f}%")

        # Save best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_model.pth")
            print(f"   [*] Saved new best model: Val Acc = {val_acc:.2f}%")

    print(f"Training complete. Best validation accuracy: {best_acc:.2f}%")
    print("Best model saved as best_model.pth")

if __name__ == "__main__":
    train()

Using device: cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:18<00:00, 9.17MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified




Epoch 1/300 | Train Loss: 2.0815, Approx Train Acc: 26.68% | Val Acc: 47.50%
   [*] Saved new best model: Val Acc = 47.50%




Epoch 2/300 | Train Loss: 1.8909, Approx Train Acc: 38.38% | Val Acc: 58.55%
   [*] Saved new best model: Val Acc = 58.55%




Epoch 3/300 | Train Loss: 1.8000, Approx Train Acc: 43.69% | Val Acc: 65.01%
   [*] Saved new best model: Val Acc = 65.01%




Epoch 4/300 | Train Loss: 1.7335, Approx Train Acc: 47.48% | Val Acc: 63.09%




Epoch 5/300 | Train Loss: 1.6937, Approx Train Acc: 49.51% | Val Acc: 73.21%
   [*] Saved new best model: Val Acc = 73.21%




Epoch 6/300 | Train Loss: 1.6966, Approx Train Acc: 49.44% | Val Acc: 73.01%




Epoch 7/300 | Train Loss: 1.6556, Approx Train Acc: 51.53% | Val Acc: 67.80%




Epoch 8/300 | Train Loss: 1.6611, Approx Train Acc: 51.33% | Val Acc: 75.21%
   [*] Saved new best model: Val Acc = 75.21%




Epoch 9/300 | Train Loss: 1.6617, Approx Train Acc: 51.34% | Val Acc: 75.57%
   [*] Saved new best model: Val Acc = 75.57%




Epoch 10/300 | Train Loss: 1.6628, Approx Train Acc: 51.16% | Val Acc: 68.57%




Epoch 11/300 | Train Loss: 1.6243, Approx Train Acc: 53.23% | Val Acc: 74.52%




Epoch 12/300 | Train Loss: 1.6171, Approx Train Acc: 53.61% | Val Acc: 75.34%




Epoch 13/300 | Train Loss: 1.6194, Approx Train Acc: 53.32% | Val Acc: 78.15%
   [*] Saved new best model: Val Acc = 78.15%




Epoch 14/300 | Train Loss: 1.5990, Approx Train Acc: 54.47% | Val Acc: 79.45%
   [*] Saved new best model: Val Acc = 79.45%




Epoch 15/300 | Train Loss: 1.5842, Approx Train Acc: 55.05% | Val Acc: 77.78%




Epoch 16/300 | Train Loss: 1.6106, Approx Train Acc: 53.65% | Val Acc: 77.85%




Epoch 17/300 | Train Loss: 1.6014, Approx Train Acc: 54.01% | Val Acc: 74.70%




Epoch 18/300 | Train Loss: 1.5722, Approx Train Acc: 55.56% | Val Acc: 77.23%




Epoch 19/300 | Train Loss: 1.6029, Approx Train Acc: 54.14% | Val Acc: 81.35%
   [*] Saved new best model: Val Acc = 81.35%




Epoch 20/300 | Train Loss: 1.5753, Approx Train Acc: 55.39% | Val Acc: 72.71%




Epoch 21/300 | Train Loss: 1.6071, Approx Train Acc: 54.01% | Val Acc: 78.78%




Epoch 22/300 | Train Loss: 1.5705, Approx Train Acc: 56.10% | Val Acc: 77.03%




Epoch 23/300 | Train Loss: 1.5692, Approx Train Acc: 55.94% | Val Acc: 76.99%




Epoch 24/300 | Train Loss: 1.5765, Approx Train Acc: 55.58% | Val Acc: 79.66%




Epoch 25/300 | Train Loss: 1.5799, Approx Train Acc: 55.44% | Val Acc: 81.25%




Epoch 26/300 | Train Loss: 1.5852, Approx Train Acc: 55.03% | Val Acc: 80.05%




Epoch 27/300 | Train Loss: 1.5840, Approx Train Acc: 54.94% | Val Acc: 83.84%
   [*] Saved new best model: Val Acc = 83.84%




Epoch 28/300 | Train Loss: 1.5630, Approx Train Acc: 56.07% | Val Acc: 77.88%




Epoch 29/300 | Train Loss: 1.5814, Approx Train Acc: 55.01% | Val Acc: 78.58%




Epoch 30/300 | Train Loss: 1.5677, Approx Train Acc: 55.94% | Val Acc: 77.51%




Epoch 31/300 | Train Loss: 1.5814, Approx Train Acc: 55.66% | Val Acc: 79.28%




Epoch 32/300 | Train Loss: 1.5881, Approx Train Acc: 55.06% | Val Acc: 78.82%




Epoch 33/300 | Train Loss: 1.5936, Approx Train Acc: 54.66% | Val Acc: 78.48%




Epoch 34/300 | Train Loss: 1.5905, Approx Train Acc: 54.72% | Val Acc: 80.18%




Epoch 35/300 | Train Loss: 1.5723, Approx Train Acc: 55.78% | Val Acc: 77.26%




Epoch 36/300 | Train Loss: 1.5669, Approx Train Acc: 56.06% | Val Acc: 78.30%




Epoch 37/300 | Train Loss: 1.5645, Approx Train Acc: 55.88% | Val Acc: 80.23%




Epoch 38/300 | Train Loss: 1.5563, Approx Train Acc: 56.20% | Val Acc: 77.62%




Epoch 39/300 | Train Loss: 1.5665, Approx Train Acc: 56.04% | Val Acc: 80.97%




Epoch 40/300 | Train Loss: 1.5493, Approx Train Acc: 56.85% | Val Acc: 80.78%




Epoch 41/300 | Train Loss: 1.5468, Approx Train Acc: 57.02% | Val Acc: 82.95%




Epoch 42/300 | Train Loss: 1.5629, Approx Train Acc: 55.95% | Val Acc: 82.76%




Epoch 43/300 | Train Loss: 1.5668, Approx Train Acc: 56.06% | Val Acc: 79.27%




Epoch 44/300 | Train Loss: 1.5628, Approx Train Acc: 55.98% | Val Acc: 81.07%




Epoch 45/300 | Train Loss: 1.5496, Approx Train Acc: 56.74% | Val Acc: 82.73%




Epoch 46/300 | Train Loss: 1.5562, Approx Train Acc: 56.68% | Val Acc: 77.82%




Epoch 47/300 | Train Loss: 1.5492, Approx Train Acc: 56.96% | Val Acc: 78.07%




Epoch 48/300 | Train Loss: 1.5401, Approx Train Acc: 57.20% | Val Acc: 82.10%




Epoch 49/300 | Train Loss: 1.5455, Approx Train Acc: 56.97% | Val Acc: 81.52%




Epoch 50/300 | Train Loss: 1.5629, Approx Train Acc: 55.89% | Val Acc: 81.46%




Epoch 51/300 | Train Loss: 1.5462, Approx Train Acc: 56.85% | Val Acc: 80.88%




Epoch 52/300 | Train Loss: 1.5295, Approx Train Acc: 57.69% | Val Acc: 70.61%




Epoch 53/300 | Train Loss: 1.5307, Approx Train Acc: 57.43% | Val Acc: 81.67%




Epoch 54/300 | Train Loss: 1.5447, Approx Train Acc: 56.87% | Val Acc: 82.62%




Epoch 55/300 | Train Loss: 1.5338, Approx Train Acc: 57.57% | Val Acc: 81.60%




Epoch 56/300 | Train Loss: 1.5186, Approx Train Acc: 58.19% | Val Acc: 83.23%




Epoch 57/300 | Train Loss: 1.5727, Approx Train Acc: 55.50% | Val Acc: 80.46%




Epoch 58/300 | Train Loss: 1.5309, Approx Train Acc: 57.46% | Val Acc: 78.75%




Epoch 59/300 | Train Loss: 1.5330, Approx Train Acc: 57.36% | Val Acc: 78.74%




Epoch 60/300 | Train Loss: 1.5198, Approx Train Acc: 58.25% | Val Acc: 81.61%




Epoch 61/300 | Train Loss: 1.5610, Approx Train Acc: 56.31% | Val Acc: 84.52%
   [*] Saved new best model: Val Acc = 84.52%




Epoch 62/300 | Train Loss: 1.5554, Approx Train Acc: 56.48% | Val Acc: 80.25%




Epoch 63/300 | Train Loss: 1.5458, Approx Train Acc: 57.03% | Val Acc: 81.79%




Epoch 64/300 | Train Loss: 1.5316, Approx Train Acc: 57.75% | Val Acc: 83.73%




Epoch 65/300 | Train Loss: 1.5586, Approx Train Acc: 56.31% | Val Acc: 83.29%




Epoch 66/300 | Train Loss: 1.5360, Approx Train Acc: 57.44% | Val Acc: 79.87%




Epoch 67/300 | Train Loss: 1.5585, Approx Train Acc: 56.35% | Val Acc: 78.75%




Epoch 68/300 | Train Loss: 1.5742, Approx Train Acc: 55.25% | Val Acc: 81.14%




Epoch 69/300 | Train Loss: 1.5539, Approx Train Acc: 56.17% | Val Acc: 82.05%




Epoch 70/300 | Train Loss: 1.5401, Approx Train Acc: 57.08% | Val Acc: 83.31%




Epoch 71/300 | Train Loss: 1.5198, Approx Train Acc: 57.76% | Val Acc: 81.57%




Epoch 72/300 | Train Loss: 1.5292, Approx Train Acc: 57.63% | Val Acc: 83.27%




Epoch 73/300 | Train Loss: 1.5314, Approx Train Acc: 57.38% | Val Acc: 83.93%




Epoch 74/300 | Train Loss: 1.5242, Approx Train Acc: 58.17% | Val Acc: 81.29%




Epoch 75/300 | Train Loss: 1.5208, Approx Train Acc: 58.12% | Val Acc: 78.34%




Epoch 76/300 | Train Loss: 1.5133, Approx Train Acc: 58.60% | Val Acc: 83.34%




Epoch 77/300 | Train Loss: 1.5276, Approx Train Acc: 57.77% | Val Acc: 79.97%




Epoch 78/300 | Train Loss: 1.5108, Approx Train Acc: 58.52% | Val Acc: 81.51%




Epoch 79/300 | Train Loss: 1.5543, Approx Train Acc: 56.10% | Val Acc: 81.17%




Epoch 80/300 | Train Loss: 1.5317, Approx Train Acc: 57.52% | Val Acc: 85.14%
   [*] Saved new best model: Val Acc = 85.14%




Epoch 81/300 | Train Loss: 1.5298, Approx Train Acc: 57.65% | Val Acc: 79.26%




Epoch 82/300 | Train Loss: 1.5197, Approx Train Acc: 58.37% | Val Acc: 84.49%




Epoch 83/300 | Train Loss: 1.5531, Approx Train Acc: 56.20% | Val Acc: 84.11%




Epoch 84/300 | Train Loss: 1.5075, Approx Train Acc: 58.73% | Val Acc: 80.83%




Epoch 85/300 | Train Loss: 1.5302, Approx Train Acc: 57.37% | Val Acc: 82.62%




Epoch 86/300 | Train Loss: 1.5149, Approx Train Acc: 58.23% | Val Acc: 79.37%




Epoch 87/300 | Train Loss: 1.5067, Approx Train Acc: 58.81% | Val Acc: 78.40%




Epoch 88/300 | Train Loss: 1.5041, Approx Train Acc: 58.93% | Val Acc: 83.46%




Epoch 89/300 | Train Loss: 1.4942, Approx Train Acc: 59.34% | Val Acc: 84.50%




Epoch 90/300 | Train Loss: 1.5183, Approx Train Acc: 58.20% | Val Acc: 80.10%




Epoch 91/300 | Train Loss: 1.5134, Approx Train Acc: 58.20% | Val Acc: 84.68%




Epoch 92/300 | Train Loss: 1.5213, Approx Train Acc: 57.89% | Val Acc: 84.31%




Epoch 93/300 | Train Loss: 1.5064, Approx Train Acc: 58.56% | Val Acc: 84.73%




Epoch 94/300 | Train Loss: 1.5233, Approx Train Acc: 58.05% | Val Acc: 82.86%




Epoch 95/300 | Train Loss: 1.5098, Approx Train Acc: 58.45% | Val Acc: 83.28%




Epoch 96/300 | Train Loss: 1.5246, Approx Train Acc: 57.77% | Val Acc: 83.18%




Epoch 97/300 | Train Loss: 1.5195, Approx Train Acc: 57.90% | Val Acc: 84.47%




Epoch 98/300 | Train Loss: 1.5239, Approx Train Acc: 57.88% | Val Acc: 83.74%




Epoch 99/300 | Train Loss: 1.5147, Approx Train Acc: 58.05% | Val Acc: 80.86%




Epoch 100/300 | Train Loss: 1.5237, Approx Train Acc: 57.69% | Val Acc: 83.88%




Epoch 101/300 | Train Loss: 1.5077, Approx Train Acc: 58.51% | Val Acc: 84.25%




Epoch 102/300 | Train Loss: 1.5236, Approx Train Acc: 57.90% | Val Acc: 81.47%




Epoch 103/300 | Train Loss: 1.5088, Approx Train Acc: 58.62% | Val Acc: 84.51%




Epoch 104/300 | Train Loss: 1.4905, Approx Train Acc: 59.37% | Val Acc: 84.03%




Epoch 105/300 | Train Loss: 1.5159, Approx Train Acc: 58.07% | Val Acc: 82.76%




Epoch 106/300 | Train Loss: 1.4916, Approx Train Acc: 59.45% | Val Acc: 81.69%




Epoch 107/300 | Train Loss: 1.4818, Approx Train Acc: 59.77% | Val Acc: 84.74%




Epoch 108/300 | Train Loss: 1.5043, Approx Train Acc: 58.68% | Val Acc: 83.67%




Epoch 109/300 | Train Loss: 1.4992, Approx Train Acc: 59.23% | Val Acc: 82.01%




Epoch 110/300 | Train Loss: 1.5084, Approx Train Acc: 58.27% | Val Acc: 85.75%
   [*] Saved new best model: Val Acc = 85.75%




Epoch 111/300 | Train Loss: 1.5003, Approx Train Acc: 58.98% | Val Acc: 84.07%




Epoch 112/300 | Train Loss: 1.5083, Approx Train Acc: 58.53% | Val Acc: 85.08%




Epoch 113/300 | Train Loss: 1.5130, Approx Train Acc: 58.29% | Val Acc: 86.24%
   [*] Saved new best model: Val Acc = 86.24%




Epoch 114/300 | Train Loss: 1.5085, Approx Train Acc: 58.62% | Val Acc: 86.36%
   [*] Saved new best model: Val Acc = 86.36%




Epoch 115/300 | Train Loss: 1.4949, Approx Train Acc: 59.00% | Val Acc: 81.01%




Epoch 116/300 | Train Loss: 1.5072, Approx Train Acc: 58.63% | Val Acc: 85.59%




Epoch 117/300 | Train Loss: 1.5179, Approx Train Acc: 58.02% | Val Acc: 86.04%




Epoch 118/300 | Train Loss: 1.4732, Approx Train Acc: 60.15% | Val Acc: 82.17%




Epoch 119/300 | Train Loss: 1.5117, Approx Train Acc: 58.26% | Val Acc: 84.47%




Epoch 120/300 | Train Loss: 1.5114, Approx Train Acc: 58.52% | Val Acc: 83.87%




Epoch 121/300 | Train Loss: 1.5028, Approx Train Acc: 58.71% | Val Acc: 83.91%




Epoch 122/300 | Train Loss: 1.4889, Approx Train Acc: 59.33% | Val Acc: 85.92%




Epoch 123/300 | Train Loss: 1.5170, Approx Train Acc: 58.10% | Val Acc: 83.98%




Epoch 124/300 | Train Loss: 1.5014, Approx Train Acc: 58.81% | Val Acc: 86.43%
   [*] Saved new best model: Val Acc = 86.43%




Epoch 125/300 | Train Loss: 1.4927, Approx Train Acc: 59.18% | Val Acc: 85.18%




Epoch 126/300 | Train Loss: 1.4952, Approx Train Acc: 59.15% | Val Acc: 87.32%
   [*] Saved new best model: Val Acc = 87.32%




Epoch 127/300 | Train Loss: 1.5054, Approx Train Acc: 58.46% | Val Acc: 81.61%




Epoch 128/300 | Train Loss: 1.4885, Approx Train Acc: 59.61% | Val Acc: 87.41%
   [*] Saved new best model: Val Acc = 87.41%




Epoch 129/300 | Train Loss: 1.5044, Approx Train Acc: 58.71% | Val Acc: 85.34%




Epoch 130/300 | Train Loss: 1.4795, Approx Train Acc: 59.61% | Val Acc: 87.37%




Epoch 131/300 | Train Loss: 1.5008, Approx Train Acc: 58.89% | Val Acc: 85.25%




Epoch 132/300 | Train Loss: 1.4682, Approx Train Acc: 60.47% | Val Acc: 85.45%




Epoch 133/300 | Train Loss: 1.4666, Approx Train Acc: 60.41% | Val Acc: 85.94%




Epoch 134/300 | Train Loss: 1.4834, Approx Train Acc: 59.74% | Val Acc: 79.67%




Epoch 135/300 | Train Loss: 1.4950, Approx Train Acc: 59.11% | Val Acc: 87.00%




Epoch 136/300 | Train Loss: 1.4819, Approx Train Acc: 59.53% | Val Acc: 85.65%




Epoch 137/300 | Train Loss: 1.4602, Approx Train Acc: 60.80% | Val Acc: 81.42%




Epoch 138/300 | Train Loss: 1.4776, Approx Train Acc: 59.96% | Val Acc: 83.33%




Epoch 139/300 | Train Loss: 1.4817, Approx Train Acc: 59.70% | Val Acc: 87.94%
   [*] Saved new best model: Val Acc = 87.94%




Epoch 140/300 | Train Loss: 1.4780, Approx Train Acc: 59.66% | Val Acc: 87.11%




Epoch 141/300 | Train Loss: 1.4846, Approx Train Acc: 59.52% | Val Acc: 87.05%




Epoch 142/300 | Train Loss: 1.4777, Approx Train Acc: 59.60% | Val Acc: 83.63%




Epoch 143/300 | Train Loss: 1.4939, Approx Train Acc: 59.01% | Val Acc: 87.97%
   [*] Saved new best model: Val Acc = 87.97%




Epoch 144/300 | Train Loss: 1.4632, Approx Train Acc: 60.51% | Val Acc: 84.54%




Epoch 145/300 | Train Loss: 1.4592, Approx Train Acc: 60.46% | Val Acc: 86.73%




Epoch 146/300 | Train Loss: 1.4609, Approx Train Acc: 60.80% | Val Acc: 86.85%




Epoch 147/300 | Train Loss: 1.4667, Approx Train Acc: 60.39% | Val Acc: 85.46%




Epoch 148/300 | Train Loss: 1.4636, Approx Train Acc: 60.56% | Val Acc: 86.08%




Epoch 149/300 | Train Loss: 1.4494, Approx Train Acc: 61.25% | Val Acc: 85.17%




Epoch 150/300 | Train Loss: 1.4708, Approx Train Acc: 60.15% | Val Acc: 85.26%




Epoch 151/300 | Train Loss: 1.4804, Approx Train Acc: 59.89% | Val Acc: 85.38%




Epoch 152/300 | Train Loss: 1.4582, Approx Train Acc: 60.89% | Val Acc: 87.82%




Epoch 153/300 | Train Loss: 1.4516, Approx Train Acc: 61.06% | Val Acc: 86.61%




Epoch 154/300 | Train Loss: 1.4506, Approx Train Acc: 61.16% | Val Acc: 88.35%
   [*] Saved new best model: Val Acc = 88.35%




Epoch 155/300 | Train Loss: 1.4331, Approx Train Acc: 61.89% | Val Acc: 86.73%




Epoch 156/300 | Train Loss: 1.4440, Approx Train Acc: 61.71% | Val Acc: 86.84%




Epoch 157/300 | Train Loss: 1.4529, Approx Train Acc: 61.06% | Val Acc: 87.28%




Epoch 158/300 | Train Loss: 1.4524, Approx Train Acc: 60.80% | Val Acc: 87.09%




Epoch 159/300 | Train Loss: 1.4803, Approx Train Acc: 59.69% | Val Acc: 86.26%




Epoch 160/300 | Train Loss: 1.4404, Approx Train Acc: 61.62% | Val Acc: 87.58%




Epoch 161/300 | Train Loss: 1.4275, Approx Train Acc: 62.05% | Val Acc: 86.89%




Epoch 162/300 | Train Loss: 1.4644, Approx Train Acc: 60.59% | Val Acc: 87.13%




Epoch 163/300 | Train Loss: 1.4516, Approx Train Acc: 60.84% | Val Acc: 88.24%




Epoch 164/300 | Train Loss: 1.4293, Approx Train Acc: 62.33% | Val Acc: 88.86%
   [*] Saved new best model: Val Acc = 88.86%




Epoch 165/300 | Train Loss: 1.4876, Approx Train Acc: 59.24% | Val Acc: 88.52%




Epoch 166/300 | Train Loss: 1.4435, Approx Train Acc: 61.42% | Val Acc: 86.71%




Epoch 167/300 | Train Loss: 1.4314, Approx Train Acc: 62.05% | Val Acc: 87.96%




Epoch 168/300 | Train Loss: 1.4501, Approx Train Acc: 61.06% | Val Acc: 88.37%




Epoch 169/300 | Train Loss: 1.4469, Approx Train Acc: 61.28% | Val Acc: 87.18%




Epoch 170/300 | Train Loss: 1.4328, Approx Train Acc: 61.77% | Val Acc: 88.13%




Epoch 171/300 | Train Loss: 1.4249, Approx Train Acc: 62.16% | Val Acc: 87.63%




Epoch 172/300 | Train Loss: 1.4552, Approx Train Acc: 60.68% | Val Acc: 88.52%




Epoch 173/300 | Train Loss: 1.4250, Approx Train Acc: 62.27% | Val Acc: 83.53%




Epoch 174/300 | Train Loss: 1.4519, Approx Train Acc: 60.97% | Val Acc: 89.04%
   [*] Saved new best model: Val Acc = 89.04%




Epoch 175/300 | Train Loss: 1.4189, Approx Train Acc: 62.58% | Val Acc: 86.77%




Epoch 176/300 | Train Loss: 1.4198, Approx Train Acc: 62.30% | Val Acc: 89.18%
   [*] Saved new best model: Val Acc = 89.18%




Epoch 177/300 | Train Loss: 1.4308, Approx Train Acc: 62.06% | Val Acc: 86.93%




Epoch 178/300 | Train Loss: 1.4296, Approx Train Acc: 62.28% | Val Acc: 87.65%




Epoch 179/300 | Train Loss: 1.4320, Approx Train Acc: 61.59% | Val Acc: 88.15%




Epoch 180/300 | Train Loss: 1.4077, Approx Train Acc: 63.07% | Val Acc: 86.62%




Epoch 181/300 | Train Loss: 1.4451, Approx Train Acc: 61.21% | Val Acc: 86.05%




Epoch 182/300 | Train Loss: 1.4457, Approx Train Acc: 61.09% | Val Acc: 89.26%
   [*] Saved new best model: Val Acc = 89.26%




Epoch 183/300 | Train Loss: 1.4448, Approx Train Acc: 60.98% | Val Acc: 88.71%




Epoch 184/300 | Train Loss: 1.4477, Approx Train Acc: 60.78% | Val Acc: 89.92%
   [*] Saved new best model: Val Acc = 89.92%




Epoch 185/300 | Train Loss: 1.4369, Approx Train Acc: 61.63% | Val Acc: 89.46%




Epoch 186/300 | Train Loss: 1.4220, Approx Train Acc: 62.28% | Val Acc: 88.51%




Epoch 187/300 | Train Loss: 1.4380, Approx Train Acc: 61.25% | Val Acc: 88.82%




Epoch 188/300 | Train Loss: 1.4211, Approx Train Acc: 62.54% | Val Acc: 89.18%




Epoch 189/300 | Train Loss: 1.4129, Approx Train Acc: 62.86% | Val Acc: 86.04%




Epoch 190/300 | Train Loss: 1.3890, Approx Train Acc: 63.86% | Val Acc: 88.77%




Epoch 191/300 | Train Loss: 1.4253, Approx Train Acc: 62.29% | Val Acc: 88.27%




Epoch 192/300 | Train Loss: 1.4421, Approx Train Acc: 61.21% | Val Acc: 88.96%




Epoch 193/300 | Train Loss: 1.4038, Approx Train Acc: 63.15% | Val Acc: 90.05%
   [*] Saved new best model: Val Acc = 90.05%




Epoch 194/300 | Train Loss: 1.3832, Approx Train Acc: 64.08% | Val Acc: 90.44%
   [*] Saved new best model: Val Acc = 90.44%




Epoch 195/300 | Train Loss: 1.4287, Approx Train Acc: 61.69% | Val Acc: 90.57%
   [*] Saved new best model: Val Acc = 90.57%




Epoch 196/300 | Train Loss: 1.4159, Approx Train Acc: 62.56% | Val Acc: 90.12%




Epoch 197/300 | Train Loss: 1.3848, Approx Train Acc: 63.86% | Val Acc: 89.80%




Epoch 198/300 | Train Loss: 1.4087, Approx Train Acc: 62.90% | Val Acc: 89.95%




Epoch 199/300 | Train Loss: 1.4201, Approx Train Acc: 62.04% | Val Acc: 89.12%




Epoch 200/300 | Train Loss: 1.4045, Approx Train Acc: 62.83% | Val Acc: 90.19%




Epoch 201/300 | Train Loss: 1.3864, Approx Train Acc: 63.90% | Val Acc: 91.41%
   [*] Saved new best model: Val Acc = 91.41%




Epoch 202/300 | Train Loss: 1.3979, Approx Train Acc: 63.27% | Val Acc: 88.60%




Epoch 203/300 | Train Loss: 1.4166, Approx Train Acc: 62.16% | Val Acc: 91.09%




Epoch 204/300 | Train Loss: 1.3874, Approx Train Acc: 63.54% | Val Acc: 88.60%




Epoch 205/300 | Train Loss: 1.4134, Approx Train Acc: 62.40% | Val Acc: 91.01%




Epoch 206/300 | Train Loss: 1.3998, Approx Train Acc: 63.23% | Val Acc: 90.27%




Epoch 207/300 | Train Loss: 1.3926, Approx Train Acc: 63.18% | Val Acc: 91.37%




Epoch 208/300 | Train Loss: 1.3923, Approx Train Acc: 63.47% | Val Acc: 91.40%




Epoch 209/300 | Train Loss: 1.3872, Approx Train Acc: 63.52% | Val Acc: 90.82%




Epoch 210/300 | Train Loss: 1.3930, Approx Train Acc: 63.26% | Val Acc: 90.99%




Epoch 211/300 | Train Loss: 1.3935, Approx Train Acc: 63.28% | Val Acc: 90.74%




Epoch 212/300 | Train Loss: 1.3807, Approx Train Acc: 63.78% | Val Acc: 89.16%




Epoch 213/300 | Train Loss: 1.3868, Approx Train Acc: 63.47% | Val Acc: 88.63%




Epoch 214/300 | Train Loss: 1.3614, Approx Train Acc: 64.67% | Val Acc: 88.83%




Epoch 215/300 | Train Loss: 1.3773, Approx Train Acc: 64.23% | Val Acc: 91.10%




Epoch 216/300 | Train Loss: 1.3793, Approx Train Acc: 63.84% | Val Acc: 90.45%




Epoch 217/300 | Train Loss: 1.3852, Approx Train Acc: 63.88% | Val Acc: 91.05%




Epoch 218/300 | Train Loss: 1.3679, Approx Train Acc: 64.56% | Val Acc: 92.17%
   [*] Saved new best model: Val Acc = 92.17%




Epoch 219/300 | Train Loss: 1.3681, Approx Train Acc: 64.44% | Val Acc: 90.90%




Epoch 220/300 | Train Loss: 1.3795, Approx Train Acc: 64.13% | Val Acc: 92.02%




Epoch 221/300 | Train Loss: 1.3539, Approx Train Acc: 65.26% | Val Acc: 90.44%




Epoch 222/300 | Train Loss: 1.3599, Approx Train Acc: 64.57% | Val Acc: 90.85%




Epoch 223/300 | Train Loss: 1.3496, Approx Train Acc: 65.40% | Val Acc: 92.51%
   [*] Saved new best model: Val Acc = 92.51%




Epoch 224/300 | Train Loss: 1.3619, Approx Train Acc: 64.81% | Val Acc: 92.18%




Epoch 225/300 | Train Loss: 1.3467, Approx Train Acc: 65.52% | Val Acc: 90.06%




Epoch 226/300 | Train Loss: 1.3678, Approx Train Acc: 64.14% | Val Acc: 92.31%




Epoch 227/300 | Train Loss: 1.3519, Approx Train Acc: 64.98% | Val Acc: 92.07%




Epoch 228/300 | Train Loss: 1.3627, Approx Train Acc: 64.87% | Val Acc: 91.24%




Epoch 229/300 | Train Loss: 1.3538, Approx Train Acc: 64.67% | Val Acc: 91.56%




Epoch 230/300 | Train Loss: 1.3382, Approx Train Acc: 66.21% | Val Acc: 92.16%




Epoch 231/300 | Train Loss: 1.3399, Approx Train Acc: 65.55% | Val Acc: 92.44%




Epoch 232/300 | Train Loss: 1.3218, Approx Train Acc: 66.77% | Val Acc: 92.95%
   [*] Saved new best model: Val Acc = 92.95%




Epoch 233/300 | Train Loss: 1.3386, Approx Train Acc: 65.60% | Val Acc: 93.28%
   [*] Saved new best model: Val Acc = 93.28%




Epoch 234/300 | Train Loss: 1.3436, Approx Train Acc: 65.40% | Val Acc: 92.58%




Epoch 235/300 | Train Loss: 1.3577, Approx Train Acc: 64.47% | Val Acc: 92.34%




Epoch 236/300 | Train Loss: 1.3293, Approx Train Acc: 65.90% | Val Acc: 92.74%




Epoch 237/300 | Train Loss: 1.3539, Approx Train Acc: 64.59% | Val Acc: 91.98%




Epoch 238/300 | Train Loss: 1.3175, Approx Train Acc: 66.71% | Val Acc: 92.69%




Epoch 239/300 | Train Loss: 1.3264, Approx Train Acc: 65.95% | Val Acc: 92.24%




Epoch 240/300 | Train Loss: 1.3356, Approx Train Acc: 65.53% | Val Acc: 93.15%




Epoch 241/300 | Train Loss: 1.3354, Approx Train Acc: 65.48% | Val Acc: 92.95%




Epoch 242/300 | Train Loss: 1.3193, Approx Train Acc: 66.10% | Val Acc: 93.67%
   [*] Saved new best model: Val Acc = 93.67%




Epoch 243/300 | Train Loss: 1.3217, Approx Train Acc: 66.02% | Val Acc: 93.29%




Epoch 244/300 | Train Loss: 1.3400, Approx Train Acc: 65.02% | Val Acc: 93.01%




Epoch 245/300 | Train Loss: 1.3252, Approx Train Acc: 65.81% | Val Acc: 93.57%




Epoch 246/300 | Train Loss: 1.3257, Approx Train Acc: 66.15% | Val Acc: 93.33%




Epoch 247/300 | Train Loss: 1.2798, Approx Train Acc: 68.41% | Val Acc: 93.55%




Epoch 248/300 | Train Loss: 1.3253, Approx Train Acc: 65.86% | Val Acc: 93.48%




Epoch 249/300 | Train Loss: 1.3380, Approx Train Acc: 65.25% | Val Acc: 93.71%
   [*] Saved new best model: Val Acc = 93.71%




Epoch 250/300 | Train Loss: 1.3269, Approx Train Acc: 65.66% | Val Acc: 93.99%
   [*] Saved new best model: Val Acc = 93.99%




Epoch 251/300 | Train Loss: 1.3038, Approx Train Acc: 67.12% | Val Acc: 93.59%




Epoch 252/300 | Train Loss: 1.3295, Approx Train Acc: 65.43% | Val Acc: 94.07%
   [*] Saved new best model: Val Acc = 94.07%




Epoch 253/300 | Train Loss: 1.2967, Approx Train Acc: 67.46% | Val Acc: 94.05%




Epoch 254/300 | Train Loss: 1.2965, Approx Train Acc: 67.27% | Val Acc: 93.08%




Epoch 255/300 | Train Loss: 1.2751, Approx Train Acc: 68.33% | Val Acc: 94.15%
   [*] Saved new best model: Val Acc = 94.15%




Epoch 256/300 | Train Loss: 1.2845, Approx Train Acc: 68.08% | Val Acc: 94.00%




Epoch 257/300 | Train Loss: 1.2977, Approx Train Acc: 67.20% | Val Acc: 94.15%




Epoch 258/300 | Train Loss: 1.3007, Approx Train Acc: 66.85% | Val Acc: 94.07%




Epoch 259/300 | Train Loss: 1.2695, Approx Train Acc: 68.33% | Val Acc: 94.33%
   [*] Saved new best model: Val Acc = 94.33%




Epoch 260/300 | Train Loss: 1.2884, Approx Train Acc: 67.43% | Val Acc: 94.51%
   [*] Saved new best model: Val Acc = 94.51%




Epoch 261/300 | Train Loss: 1.2627, Approx Train Acc: 68.87% | Val Acc: 94.19%




Epoch 262/300 | Train Loss: 1.2597, Approx Train Acc: 68.59% | Val Acc: 94.31%




Epoch 263/300 | Train Loss: 1.2729, Approx Train Acc: 67.91% | Val Acc: 94.43%




Epoch 264/300 | Train Loss: 1.2877, Approx Train Acc: 67.46% | Val Acc: 95.05%
   [*] Saved new best model: Val Acc = 95.05%




Epoch 265/300 | Train Loss: 1.2878, Approx Train Acc: 67.43% | Val Acc: 94.73%




Epoch 266/300 | Train Loss: 1.2667, Approx Train Acc: 68.13% | Val Acc: 95.15%
   [*] Saved new best model: Val Acc = 95.15%




Epoch 267/300 | Train Loss: 1.2575, Approx Train Acc: 68.92% | Val Acc: 94.49%




Epoch 268/300 | Train Loss: 1.2641, Approx Train Acc: 68.40% | Val Acc: 94.72%




Epoch 269/300 | Train Loss: 1.2782, Approx Train Acc: 67.59% | Val Acc: 94.63%




Epoch 270/300 | Train Loss: 1.2381, Approx Train Acc: 69.42% | Val Acc: 95.19%
   [*] Saved new best model: Val Acc = 95.19%




Epoch 271/300 | Train Loss: 1.2540, Approx Train Acc: 69.09% | Val Acc: 95.07%




Epoch 272/300 | Train Loss: 1.2515, Approx Train Acc: 68.92% | Val Acc: 95.39%
   [*] Saved new best model: Val Acc = 95.39%




Epoch 273/300 | Train Loss: 1.2710, Approx Train Acc: 67.93% | Val Acc: 95.18%




Epoch 274/300 | Train Loss: 1.2484, Approx Train Acc: 68.66% | Val Acc: 95.55%
   [*] Saved new best model: Val Acc = 95.55%




Epoch 275/300 | Train Loss: 1.2455, Approx Train Acc: 69.29% | Val Acc: 95.27%




Epoch 276/300 | Train Loss: 1.2471, Approx Train Acc: 68.82% | Val Acc: 95.42%




Epoch 277/300 | Train Loss: 1.2314, Approx Train Acc: 69.76% | Val Acc: 95.40%




Epoch 278/300 | Train Loss: 1.2411, Approx Train Acc: 69.20% | Val Acc: 95.33%




Epoch 279/300 | Train Loss: 1.2421, Approx Train Acc: 69.34% | Val Acc: 95.64%
   [*] Saved new best model: Val Acc = 95.64%




Epoch 280/300 | Train Loss: 1.2173, Approx Train Acc: 70.27% | Val Acc: 95.62%




Epoch 281/300 | Train Loss: 1.2455, Approx Train Acc: 68.83% | Val Acc: 95.54%




Epoch 282/300 | Train Loss: 1.2147, Approx Train Acc: 70.68% | Val Acc: 95.40%




Epoch 283/300 | Train Loss: 1.2407, Approx Train Acc: 68.91% | Val Acc: 95.68%
   [*] Saved new best model: Val Acc = 95.68%




Epoch 284/300 | Train Loss: 1.2549, Approx Train Acc: 68.26% | Val Acc: 95.58%




Epoch 285/300 | Train Loss: 1.2111, Approx Train Acc: 70.80% | Val Acc: 95.78%
   [*] Saved new best model: Val Acc = 95.78%




Epoch 286/300 | Train Loss: 1.2308, Approx Train Acc: 69.47% | Val Acc: 95.56%




Epoch 287/300 | Train Loss: 1.2166, Approx Train Acc: 70.23% | Val Acc: 95.62%




Epoch 288/300 | Train Loss: 1.2361, Approx Train Acc: 69.58% | Val Acc: 95.88%
   [*] Saved new best model: Val Acc = 95.88%




Epoch 289/300 | Train Loss: 1.2435, Approx Train Acc: 68.87% | Val Acc: 95.74%




Epoch 290/300 | Train Loss: 1.2251, Approx Train Acc: 69.77% | Val Acc: 95.70%




Epoch 291/300 | Train Loss: 1.2254, Approx Train Acc: 70.13% | Val Acc: 95.74%




Epoch 292/300 | Train Loss: 1.2117, Approx Train Acc: 70.21% | Val Acc: 95.90%
   [*] Saved new best model: Val Acc = 95.90%




Epoch 293/300 | Train Loss: 1.2163, Approx Train Acc: 70.52% | Val Acc: 96.03%
   [*] Saved new best model: Val Acc = 96.03%




Epoch 294/300 | Train Loss: 1.2086, Approx Train Acc: 70.49% | Val Acc: 96.08%
   [*] Saved new best model: Val Acc = 96.08%




Epoch 295/300 | Train Loss: 1.2264, Approx Train Acc: 69.90% | Val Acc: 96.06%




Epoch 296/300 | Train Loss: 1.2306, Approx Train Acc: 69.55% | Val Acc: 96.01%




Epoch 297/300 | Train Loss: 1.2342, Approx Train Acc: 69.25% | Val Acc: 95.88%




Epoch 298/300 | Train Loss: 1.2306, Approx Train Acc: 69.69% | Val Acc: 96.11%
   [*] Saved new best model: Val Acc = 96.11%




Epoch 299/300 | Train Loss: 1.2150, Approx Train Acc: 70.00% | Val Acc: 95.87%




Epoch 300/300 | Train Loss: 1.2110, Approx Train Acc: 70.83% | Val Acc: 95.75%
Training complete. Best validation accuracy: 96.11%
Best model saved as best_model.pth


In [None]:
# inference.py
import torch
import torchvision.transforms as transforms
import pandas as pd
import pickle
import numpy as np
from torch.utils.data import DataLoader, Dataset
#from model import LightResNet18
import torch.nn.functional as F

def unpickle(file):
    with open(file, 'rb') as fo:
        data_dict = pickle.load(fo, encoding='bytes')
    return data_dict

class CIFARTestDataset(Dataset):
    def __init__(self, data, ids, transform=None):
        """
        data: shape (N, 32, 32, 3)
        ids: array/list of image IDs
        transform: transforms to apply
        """
        self.data = data
        self.ids = ids
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img = self.data[idx].astype("uint8")  # ensure 0..255
        img_id = self.ids[idx]
        if self.transform:
            img = self.transform(img)
        return img, img_id

def inference():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    # 1) Load the best model
    model = LightResNet18_v2(num_classes=10, drop_prob=0.1).to(device)
    model.load_state_dict(torch.load("best_model.pth", map_location=device))
    model.eval()

    # 2) Load custom test set .pkl
    test_file = "/content/cifar_test_nolabel.pkl"
    test_dict = unpickle(test_file)
    print("Keys in test_dict:", test_dict.keys())

    # Reshape if it's (N, 3072). If it's already (N, 32,32,3), remove reshape
    test_images = test_dict[b'data'].reshape(-1, 32, 32, 3)
    test_ids = [str(i) for i in range(len(test_images))]

    # 3) Define test transforms
    transform_test = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2470, 0.2435, 0.2616))
    ])

    test_dataset = CIFARTestDataset(test_images, test_ids, transform=transform_test)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4)

    # 4) Inference with 2-pass TTA
    predictions = []
    image_ids = []
    with torch.no_grad():
        for imgs, ids in test_loader:
            imgs = imgs.to(device)

            # Pass 1: Normal
            out_normal = model(imgs)
            probs_normal = F.softmax(out_normal, dim=1)

            # Pass 2: Horizontal flip
            imgs_flipped = torch.flip(imgs, dims=[3])  # flip W dimension
            out_flipped = model(imgs_flipped)
            probs_flipped = F.softmax(out_flipped, dim=1)

            # Average probabilities
            final_probs = (probs_normal + probs_flipped) / 2.0
            _, predicted = torch.max(final_probs, 1)

            predictions.extend(predicted.cpu().numpy().tolist())
            image_ids.extend(ids)

    # 5) Save submission
    submission_df = pd.DataFrame({"ID": image_ids, "Labels": predictions})
    submission_df.to_csv("submission.csv", index=False)
    print("Submission file saved as submission.csv")

if __name__ == "__main__":
    inference()


Using device: cuda
Keys in test_dict: dict_keys([b'data', b'ids'])


  model.load_state_dict(torch.load("best_model.pth", map_location=device))


Submission file saved as submission.csv


In [None]:
import pandas as pd

# Load and inspect submission file
submission_df = pd.read_csv("submission.csv")
print(submission_df.head())  # Show first few rows
print(submission_df["Labels"].value_counts())  # Show label distribution


   ID  Labels
0   0       6
1   1       1
2   2       8
3   3       6
4   4       9
Labels
3    1119
1    1066
8    1053
5    1040
7    1006
2     983
9     980
4     949
6     918
0     886
Name: count, dtype: int64
