This is our best scoring model, which got 0.84517 in public scoring

In [None]:
# model.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

# ------------------------------------------------------------------------
#                       Squeeze-and-Excitation Block
# ------------------------------------------------------------------------
class SEBlock(nn.Module):
    """
    Squeeze-and-Excitation Block:
    - Reduces channel dimensions for global information capture.
    - Scales input features based on their importance.
    """
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.fc1 = nn.Linear(channels, channels // reduction)
        self.fc2 = nn.Linear(channels // reduction, channels)

    def forward(self, x):
        b, c, _, _ = x.size()
        # Squeeze: Global average pooling across spatial dimensions
        squeeze = x.view(b, c, -1).mean(dim=2)
        # Excitation: Fully connected layers with activation
        excitation = F.relu(self.fc1(squeeze))
        excitation = torch.sigmoid(self.fc2(excitation)).view(b, c, 1, 1)
        return x * excitation  # Scale input features


# ------------------------------------------------------------------------
#                       Stochastic Depth
# ------------------------------------------------------------------------
class StochasticDepth(nn.Module):
    """
    Stochastic Depth (also known as DropPath):
    - Randomly drops residual branches during training for better regularization.
    """
    def __init__(self, p: float = 0.1):
        super().__init__()
        self.p = p

    def forward(self, x, residual):
        if not self.training or self.p == 0.0:
            return x + residual  # No drop during evaluation
        if torch.rand(1).item() < self.p:
            return x  # Drop residual branch
        else:
            return x + residual  # Keep residual branch


# ------------------------------------------------------------------------
#       Pre-Activation Residual Block with SE + Stochastic Depth
# ------------------------------------------------------------------------
class PreActBlock(nn.Module):
    """
    Pre-Activation Residual Block:
    - Uses BatchNorm and ReLU before convolution layers.
    - Incorporates SE and Stochastic Depth for enhanced performance.
    """
    def __init__(self, in_planes, out_planes, stride=1, drop_prob=0.0):
        super().__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.se = SEBlock(in_planes)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3,
                               stride=1, padding=1, bias=False)

        # Handle shortcut connections for downsampling
        self.shortcut = None
        if stride != 1 or in_planes != out_planes:
            self.shortcut = nn.Conv2d(in_planes, out_planes, kernel_size=1,
                                      stride=stride, padding=0, bias=False)

        self.sd = StochasticDepth(p=drop_prob)  # Stochastic Depth probability

    def forward(self, x):
        out = F.relu(self.bn1(x))
        out = self.se(out)  # Apply SE block
        shortcut = x if self.shortcut is None else self.shortcut(out)
        out = self.conv1(out)
        out = F.relu(self.bn2(out))
        out = self.conv2(out)
        return self.sd(shortcut, out)  # Apply Stochastic Depth


# ------------------------------------------------------------------------
#       LightResNet-18 for CIFAR-10 (Initial Channels=32)
# ------------------------------------------------------------------------
class LightResNet18(nn.Module):
    """
    A lightweight ResNet-18 architecture designed for CIFAR-10:
    - Uses 32 initial channels instead of 64 for reduced parameters.
    - Incorporates SE and Stochastic Depth for improved generalization.
    """
    def __init__(self, num_classes=10, drop_prob=0.1):
        super().__init__()
        self.in_planes = 32
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1,
                               padding=1, bias=False)

        # Create layers with increasing channels and dropout probability
        self.layer1 = self._make_layer(32, 2, stride=1, base_p=drop_prob*1/4)
        self.layer2 = self._make_layer(64, 2, stride=2, base_p=drop_prob*2/4)
        self.layer3 = self._make_layer(128, 2, stride=2, base_p=drop_prob*3/4)
        self.layer4 = self._make_layer(256, 2, stride=2, base_p=drop_prob*4/4)

        self.bn = nn.BatchNorm2d(256)
        self.linear = nn.Linear(256, num_classes)

        # Weight Initialization for Convolution, BatchNorm, and Linear layers
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, out_planes, blocks, stride, base_p):
        """Creates a series of PreActBlocks with increasing dropout probability."""
        strides = [stride] + [1]*(blocks-1)
        layers = []
        for i in range(blocks):
            block_p = base_p * (i+1)/blocks  # Gradually increase drop probability
            layers.append(PreActBlock(self.in_planes, out_planes,
                                      stride=strides[i],
                                      drop_prob=block_p))
            self.in_planes = out_planes  # Update channel size
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.relu(self.bn(out))
        out = F.adaptive_avg_pool2d(out, 1).view(out.size(0), -1)
        out = self.linear(out)
        return out


# Quick parameter check for debugging
if __name__ == "__main__":
    model = LightResNet18(num_classes=10, drop_prob=0.1)
    x = torch.randn(2, 3, 32, 32)
    y = model(x)
    print("Output shape:", y.shape)
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total trainable parameters: {total_params:,} (~{total_params/1e6:.2f}M)")

Output shape: torch.Size([2, 10])
Total trainable parameters: 2,811,192 (~2.81M)


In [None]:
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np
import pickle
from torch.utils.data import DataLoader, Dataset
from PIL import Image
#from model import LightResNet18  # Assuming this model is defined elsewhere
from tqdm import tqdm  # For progress bars

# Helper function for CutMix to generate random bounding box coordinates
def rand_bbox(size, lam):
    """Generate random bounding box coordinates for CutMix augmentation.
    Args:
        size: Tensor size [batch_size, channels, height, width]
        lam: Lambda value from beta distribution
    Returns:
        Tuple of (bbx1, bby1, bbx2, bby2) coordinates
    """
    W = size[3]
    H = size[2]
    cut_rat = np.sqrt(1. - lam)  # Calculate cut ratio
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    # Center coordinates
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    # Bounding box coordinates with clipping to stay within image dimensions
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    return bbx1, bby1, bbx2, bby2

def mixup_data(x, y, alpha=1.0):
    """Applies MixUp augmentation to a batch of data.
    Args:
        x: Input images tensor
        y: Target labels tensor
        alpha: Parameter for beta distribution
    Returns:
        Mixed images, labels_a, labels_b, and lambda value
    """
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size(0)
    index = torch.randperm(batch_size)  # Random permutation of indices
    mixed_x = lam * x + (1 - lam) * x[index, :]  # Linear combination of images
    y_a, y_b = y, y[index]  # Original and permuted labels
    return mixed_x, y_a, y_b, lam

def cutmix_data(x, y, alpha=1.0):
    """Applies CutMix augmentation to a batch of data.
    Args:
        x: Input images tensor
        y: Target labels tensor
        alpha: Parameter for beta distribution
    Returns:
        Modified images, labels_a, labels_b, and adjusted lambda
    """
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size(0)
    index = torch.randperm(batch_size)

    # Generate random bounding box coordinates
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)

    # Replace selected region with another image's region
    x[:, :, bby1:bby2, bbx1:bbx2] = x[index, :, bby1:bby2, bbx1:bbx2]

    # Adjust lambda based on the area of the cut region
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size(-1) * x.size(-2)))
    y_a, y_b = y, y[index]
    return x, y_a, y_b, lam

def mixup_cutmix_collate(batch, alpha=1.0, p=0.5):
    """Custom collate function that randomly applies MixUp or CutMix.
    Args:
        batch: Input batch from DataLoader
        alpha: Beta distribution parameter
        p: Probability of choosing MixUp vs CutMix
    Returns:
        Augmented batch with modified labels
    """
    # Unpack the batch
    images, labels = list(zip(*batch))
    images = torch.stack(images, 0)
    labels = torch.tensor(labels, dtype=torch.long)

    # Randomly select between MixUp and CutMix
    if np.random.rand() < p:
        return mixup_data(images, labels, alpha) + ('mixup',)
    else:
        return cutmix_data(images, labels, alpha) + ('cutmix',)

def unpickle(file):
    """Load CIFAR-10 data from pickle file."""
    with open(file, 'rb') as fo:
        data_dict = pickle.load(fo, encoding='bytes')
    return data_dict

class CIFARDataset(Dataset):
    """Custom Dataset class for CIFAR-10 with optional transformations."""
    def __init__(self, data, labels, transform=None):
        self.data = data.reshape(-1, 3, 32, 32).astype("float32") / 255.0
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Convert numpy array to PIL Image
        img = self.data[idx]*255
        img = img.astype("uint8")
        img = Image.fromarray(img.transpose(1,2,0))  # Change to HWC format
        label = self.labels[idx]

        if self.transform:
            img = self.transform(img)
        return img, label

def load_cifar10_batches(root_dir):
    """Load and concatenate all CIFAR-10 training batches."""
    data_list, labels_list = [], []
    for i in range(1, 6):  # Load all 5 training batches
        batch_file = f"{root_dir}/data_batch_{i}"
        batch = unpickle(batch_file)
        data_list.append(batch[b'data'])
        labels_list.extend(batch[b'labels'])
    X = np.concatenate(data_list, axis=0)
    y = np.array(labels_list)
    return X, y

def train():
    # Device configuration
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    # 1) Load and split CIFAR-10 dataset
    root_dir = "/content/cifar-10-python/cifar-10-batches-py"
    X, y = load_cifar10_batches(root_dir)

    # Create train/validation split (5000 validation samples)
    total_size = len(X)  # 50000 total samples
    val_size = 5000
    train_size = total_size - val_size
    indices = np.arange(total_size)
    np.random.shuffle(indices)
    train_idx = indices[:train_size]
    val_idx   = indices[train_size:]

    X_train, y_train = X[train_idx], y[train_idx]
    X_val,   y_val   = X[val_idx],   y[val_idx]

    # 2) Define data transformations
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandAugment(),  # Random augmentation policy
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),  # CIFAR-10 mean
        (0.2470, 0.2435, 0.2616))   # CIFAR-10 std
    ])
    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
        (0.2470, 0.2435, 0.2616))
    ])

    # Create datasets
    train_dataset = CIFARDataset(X_train, y_train, transform=transform_train)
    val_dataset   = CIFARDataset(X_val,   y_val,   transform=transform_val)

    # 3) Create data loaders with MixUp/CutMix augmentation
    train_loader = DataLoader(
        train_dataset, batch_size=128,
        shuffle=True, num_workers=4,
        collate_fn=lambda b: mixup_cutmix_collate(b, alpha=1.0, p=0.5)
    )
    val_loader   = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=4)

    # 4) Initialize model
    model = LightResNet18(num_classes=10, drop_prob=0.1).to(device)

    # 5) Define loss function, optimizer, and scheduler
    base_criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Regularization
    optimizer = optim.SGD(
        model.parameters(),
        lr=0.1,
        momentum=0.9,
        nesterov=True,
        weight_decay=5e-4  # L2 regularization
    )
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=300)  # Cosine decay

    best_acc = 0.0
    num_epochs = 300

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_approx = 0.0
        total_approx = 0

        # Training phase with progress bar
        train_pbar = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]", leave=False)
        for images, label_info in train_pbar:
            # Unpack MixUp/CutMix information
            y_a, y_b, lam, method = label_info
            images = images.to(device)
            y_a = y_a.to(device)
            y_b = y_b.to(device)

            # Forward pass
            optimizer.zero_grad()
            outputs = model(images)

            # Compute MixUp/CutMix loss
            loss = lam * base_criterion(outputs, y_a) + (1 - lam) * base_criterion(outputs, y_b)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Update statistics
            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            # Calculate approximate accuracy (weighted by lambda)
            correct_approx += (preds == y_a).sum().item() * lam + (preds == y_b).sum().item() * (1 - lam)
            total_approx += images.size(0)

            train_pbar.set_postfix(loss=f"{loss.item():.3f}")

        # Update learning rate
        scheduler.step()

        # Calculate epoch metrics
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc  = 100.0 * correct_approx / total_approx

        # Validation phase
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                _, pred = torch.max(outputs, 1)
                val_correct += (pred == labels).sum().item()
                val_total += labels.size(0)
        val_acc = 100.0 * val_correct / val_total

        # Print epoch summary
        print(f"Epoch {epoch+1}/{num_epochs} | "
              f"Train Loss: {epoch_loss:.4f}, Approx Train Acc: {epoch_acc:.2f}% | "
              f"Val Acc: {val_acc:.2f}%")

        # Save best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_model.pth")
            print(f"   [*] Saved new best model: Val Acc = {val_acc:.2f}%")

    # Final output
    print(f"Training complete. Best validation accuracy: {best_acc:.2f}%")
    print("Best model saved as best_model.pth")

if __name__ == "__main__":
    train()

Using device: cuda




Epoch 1/300 | Train Loss: 2.1250, Approx Train Acc: 23.84% | Val Acc: 39.90%
   [*] Saved new best model: Val Acc = 39.90%




Epoch 2/300 | Train Loss: 1.9533, Approx Train Acc: 34.79% | Val Acc: 50.34%
   [*] Saved new best model: Val Acc = 50.34%




Epoch 3/300 | Train Loss: 1.8435, Approx Train Acc: 41.25% | Val Acc: 59.78%
   [*] Saved new best model: Val Acc = 59.78%




Epoch 4/300 | Train Loss: 1.7906, Approx Train Acc: 44.13% | Val Acc: 67.26%
   [*] Saved new best model: Val Acc = 67.26%




Epoch 5/300 | Train Loss: 1.7532, Approx Train Acc: 46.17% | Val Acc: 61.60%




Epoch 6/300 | Train Loss: 1.7188, Approx Train Acc: 48.07% | Val Acc: 70.34%
   [*] Saved new best model: Val Acc = 70.34%




Epoch 7/300 | Train Loss: 1.7157, Approx Train Acc: 48.40% | Val Acc: 74.14%
   [*] Saved new best model: Val Acc = 74.14%




Epoch 8/300 | Train Loss: 1.6761, Approx Train Acc: 50.28% | Val Acc: 66.10%




Epoch 9/300 | Train Loss: 1.6816, Approx Train Acc: 50.16% | Val Acc: 70.46%




Epoch 10/300 | Train Loss: 1.6709, Approx Train Acc: 50.76% | Val Acc: 75.10%
   [*] Saved new best model: Val Acc = 75.10%




Epoch 11/300 | Train Loss: 1.6889, Approx Train Acc: 49.79% | Val Acc: 71.34%




Epoch 12/300 | Train Loss: 1.6689, Approx Train Acc: 50.80% | Val Acc: 74.88%




Epoch 13/300 | Train Loss: 1.6261, Approx Train Acc: 52.91% | Val Acc: 76.36%
   [*] Saved new best model: Val Acc = 76.36%




Epoch 14/300 | Train Loss: 1.6341, Approx Train Acc: 52.58% | Val Acc: 75.04%




Epoch 15/300 | Train Loss: 1.6174, Approx Train Acc: 53.46% | Val Acc: 74.06%




Epoch 16/300 | Train Loss: 1.6213, Approx Train Acc: 53.18% | Val Acc: 75.58%




Epoch 17/300 | Train Loss: 1.6176, Approx Train Acc: 53.53% | Val Acc: 77.26%
   [*] Saved new best model: Val Acc = 77.26%




Epoch 18/300 | Train Loss: 1.5979, Approx Train Acc: 54.43% | Val Acc: 73.72%




Epoch 19/300 | Train Loss: 1.6278, Approx Train Acc: 53.12% | Val Acc: 70.22%




Epoch 20/300 | Train Loss: 1.6168, Approx Train Acc: 53.40% | Val Acc: 79.38%
   [*] Saved new best model: Val Acc = 79.38%




Epoch 21/300 | Train Loss: 1.6257, Approx Train Acc: 52.86% | Val Acc: 78.56%




Epoch 22/300 | Train Loss: 1.6128, Approx Train Acc: 53.86% | Val Acc: 77.60%




Epoch 23/300 | Train Loss: 1.5950, Approx Train Acc: 54.72% | Val Acc: 79.44%
   [*] Saved new best model: Val Acc = 79.44%




Epoch 24/300 | Train Loss: 1.5937, Approx Train Acc: 54.80% | Val Acc: 73.38%




Epoch 25/300 | Train Loss: 1.5959, Approx Train Acc: 54.74% | Val Acc: 78.78%




Epoch 26/300 | Train Loss: 1.6017, Approx Train Acc: 54.52% | Val Acc: 78.84%




Epoch 27/300 | Train Loss: 1.5936, Approx Train Acc: 54.81% | Val Acc: 79.00%




Epoch 28/300 | Train Loss: 1.6002, Approx Train Acc: 54.19% | Val Acc: 79.12%




Epoch 29/300 | Train Loss: 1.5800, Approx Train Acc: 55.33% | Val Acc: 72.96%




Epoch 30/300 | Train Loss: 1.5948, Approx Train Acc: 54.73% | Val Acc: 81.16%
   [*] Saved new best model: Val Acc = 81.16%




Epoch 31/300 | Train Loss: 1.5784, Approx Train Acc: 55.58% | Val Acc: 76.78%




Epoch 32/300 | Train Loss: 1.5636, Approx Train Acc: 56.27% | Val Acc: 72.84%




Epoch 33/300 | Train Loss: 1.5789, Approx Train Acc: 55.50% | Val Acc: 80.72%




Epoch 34/300 | Train Loss: 1.5895, Approx Train Acc: 54.87% | Val Acc: 74.10%




Epoch 35/300 | Train Loss: 1.6025, Approx Train Acc: 54.37% | Val Acc: 80.94%




Epoch 36/300 | Train Loss: 1.5858, Approx Train Acc: 55.27% | Val Acc: 80.06%




Epoch 37/300 | Train Loss: 1.5835, Approx Train Acc: 55.36% | Val Acc: 75.14%




Epoch 38/300 | Train Loss: 1.5839, Approx Train Acc: 55.06% | Val Acc: 79.96%




Epoch 39/300 | Train Loss: 1.5670, Approx Train Acc: 55.98% | Val Acc: 78.08%




Epoch 40/300 | Train Loss: 1.5866, Approx Train Acc: 55.04% | Val Acc: 80.82%




Epoch 41/300 | Train Loss: 1.5805, Approx Train Acc: 55.26% | Val Acc: 79.56%




Epoch 42/300 | Train Loss: 1.6004, Approx Train Acc: 54.18% | Val Acc: 77.50%




Epoch 43/300 | Train Loss: 1.5672, Approx Train Acc: 56.06% | Val Acc: 79.68%




Epoch 44/300 | Train Loss: 1.5773, Approx Train Acc: 55.50% | Val Acc: 77.60%




Epoch 45/300 | Train Loss: 1.5648, Approx Train Acc: 56.05% | Val Acc: 78.22%




Epoch 46/300 | Train Loss: 1.5721, Approx Train Acc: 55.77% | Val Acc: 81.36%
   [*] Saved new best model: Val Acc = 81.36%




Epoch 47/300 | Train Loss: 1.5916, Approx Train Acc: 54.88% | Val Acc: 79.64%




Epoch 48/300 | Train Loss: 1.6080, Approx Train Acc: 53.87% | Val Acc: 78.02%




Epoch 49/300 | Train Loss: 1.5593, Approx Train Acc: 56.12% | Val Acc: 77.56%




Epoch 50/300 | Train Loss: 1.5485, Approx Train Acc: 57.04% | Val Acc: 79.36%




Epoch 51/300 | Train Loss: 1.5712, Approx Train Acc: 55.50% | Val Acc: 81.38%
   [*] Saved new best model: Val Acc = 81.38%




Epoch 52/300 | Train Loss: 1.5496, Approx Train Acc: 56.78% | Val Acc: 80.32%




Epoch 53/300 | Train Loss: 1.5616, Approx Train Acc: 56.01% | Val Acc: 83.76%
   [*] Saved new best model: Val Acc = 83.76%




Epoch 54/300 | Train Loss: 1.5847, Approx Train Acc: 55.00% | Val Acc: 77.96%




Epoch 55/300 | Train Loss: 1.5631, Approx Train Acc: 56.37% | Val Acc: 79.46%




Epoch 56/300 | Train Loss: 1.5530, Approx Train Acc: 56.62% | Val Acc: 65.50%




Epoch 57/300 | Train Loss: 1.5656, Approx Train Acc: 56.17% | Val Acc: 81.92%




Epoch 58/300 | Train Loss: 1.5495, Approx Train Acc: 56.79% | Val Acc: 83.62%




Epoch 59/300 | Train Loss: 1.5749, Approx Train Acc: 55.66% | Val Acc: 74.94%




Epoch 60/300 | Train Loss: 1.5592, Approx Train Acc: 56.48% | Val Acc: 79.32%




Epoch 61/300 | Train Loss: 1.5537, Approx Train Acc: 56.48% | Val Acc: 81.10%




Epoch 62/300 | Train Loss: 1.5490, Approx Train Acc: 57.07% | Val Acc: 83.56%




Epoch 63/300 | Train Loss: 1.5530, Approx Train Acc: 56.51% | Val Acc: 79.40%




Epoch 64/300 | Train Loss: 1.5385, Approx Train Acc: 57.48% | Val Acc: 82.08%




Epoch 65/300 | Train Loss: 1.5451, Approx Train Acc: 56.87% | Val Acc: 77.18%




Epoch 66/300 | Train Loss: 1.5533, Approx Train Acc: 56.76% | Val Acc: 79.30%




Epoch 67/300 | Train Loss: 1.5683, Approx Train Acc: 56.08% | Val Acc: 83.30%




Epoch 68/300 | Train Loss: 1.5693, Approx Train Acc: 55.70% | Val Acc: 80.32%




Epoch 69/300 | Train Loss: 1.5688, Approx Train Acc: 56.05% | Val Acc: 76.74%




Epoch 70/300 | Train Loss: 1.5523, Approx Train Acc: 56.71% | Val Acc: 82.10%




Epoch 71/300 | Train Loss: 1.5875, Approx Train Acc: 55.05% | Val Acc: 79.96%




Epoch 72/300 | Train Loss: 1.5380, Approx Train Acc: 57.52% | Val Acc: 82.70%




Epoch 73/300 | Train Loss: 1.5449, Approx Train Acc: 57.05% | Val Acc: 79.22%




Epoch 74/300 | Train Loss: 1.5374, Approx Train Acc: 57.47% | Val Acc: 82.88%




Epoch 75/300 | Train Loss: 1.5623, Approx Train Acc: 56.13% | Val Acc: 77.24%




Epoch 76/300 | Train Loss: 1.5613, Approx Train Acc: 56.34% | Val Acc: 80.30%




Epoch 77/300 | Train Loss: 1.5428, Approx Train Acc: 57.02% | Val Acc: 78.88%




Epoch 78/300 | Train Loss: 1.5686, Approx Train Acc: 55.77% | Val Acc: 83.96%
   [*] Saved new best model: Val Acc = 83.96%




Epoch 79/300 | Train Loss: 1.5614, Approx Train Acc: 56.17% | Val Acc: 81.90%




Epoch 80/300 | Train Loss: 1.5294, Approx Train Acc: 57.57% | Val Acc: 81.08%




Epoch 81/300 | Train Loss: 1.5407, Approx Train Acc: 56.89% | Val Acc: 75.04%




Epoch 82/300 | Train Loss: 1.5467, Approx Train Acc: 56.89% | Val Acc: 82.38%




Epoch 83/300 | Train Loss: 1.5467, Approx Train Acc: 57.09% | Val Acc: 83.34%




Epoch 84/300 | Train Loss: 1.5353, Approx Train Acc: 57.56% | Val Acc: 78.68%




Epoch 85/300 | Train Loss: 1.5364, Approx Train Acc: 57.60% | Val Acc: 82.84%




Epoch 86/300 | Train Loss: 1.5513, Approx Train Acc: 56.80% | Val Acc: 83.54%




Epoch 87/300 | Train Loss: 1.5209, Approx Train Acc: 58.03% | Val Acc: 80.74%




Epoch 88/300 | Train Loss: 1.5359, Approx Train Acc: 57.69% | Val Acc: 83.42%




Epoch 89/300 | Train Loss: 1.5527, Approx Train Acc: 56.82% | Val Acc: 81.98%




Epoch 90/300 | Train Loss: 1.5410, Approx Train Acc: 57.07% | Val Acc: 84.14%
   [*] Saved new best model: Val Acc = 84.14%




Epoch 91/300 | Train Loss: 1.5532, Approx Train Acc: 56.31% | Val Acc: 80.72%




Epoch 92/300 | Train Loss: 1.5328, Approx Train Acc: 57.48% | Val Acc: 81.62%




Epoch 93/300 | Train Loss: 1.5440, Approx Train Acc: 56.92% | Val Acc: 81.34%




Epoch 94/300 | Train Loss: 1.5419, Approx Train Acc: 57.02% | Val Acc: 81.40%




Epoch 95/300 | Train Loss: 1.5676, Approx Train Acc: 55.87% | Val Acc: 80.98%




Epoch 96/300 | Train Loss: 1.5471, Approx Train Acc: 56.90% | Val Acc: 82.06%




Epoch 97/300 | Train Loss: 1.5324, Approx Train Acc: 57.48% | Val Acc: 81.34%




Epoch 98/300 | Train Loss: 1.5425, Approx Train Acc: 57.11% | Val Acc: 84.56%
   [*] Saved new best model: Val Acc = 84.56%




Epoch 99/300 | Train Loss: 1.5156, Approx Train Acc: 58.43% | Val Acc: 84.14%




Epoch 100/300 | Train Loss: 1.5284, Approx Train Acc: 57.72% | Val Acc: 79.90%




Epoch 101/300 | Train Loss: 1.5501, Approx Train Acc: 56.80% | Val Acc: 82.82%




Epoch 102/300 | Train Loss: 1.5254, Approx Train Acc: 57.88% | Val Acc: 83.78%




Epoch 103/300 | Train Loss: 1.5283, Approx Train Acc: 57.40% | Val Acc: 79.66%




Epoch 104/300 | Train Loss: 1.5316, Approx Train Acc: 57.66% | Val Acc: 80.60%




Epoch 105/300 | Train Loss: 1.5559, Approx Train Acc: 56.18% | Val Acc: 81.64%




Epoch 106/300 | Train Loss: 1.5052, Approx Train Acc: 58.81% | Val Acc: 85.00%
   [*] Saved new best model: Val Acc = 85.00%




Epoch 107/300 | Train Loss: 1.5160, Approx Train Acc: 58.39% | Val Acc: 79.14%




Epoch 108/300 | Train Loss: 1.5270, Approx Train Acc: 57.63% | Val Acc: 81.58%




Epoch 109/300 | Train Loss: 1.5134, Approx Train Acc: 58.47% | Val Acc: 84.04%




Epoch 110/300 | Train Loss: 1.5271, Approx Train Acc: 57.73% | Val Acc: 81.60%




Epoch 111/300 | Train Loss: 1.5192, Approx Train Acc: 57.98% | Val Acc: 81.72%




Epoch 112/300 | Train Loss: 1.5261, Approx Train Acc: 57.61% | Val Acc: 83.62%




Epoch 113/300 | Train Loss: 1.5252, Approx Train Acc: 58.15% | Val Acc: 84.38%




Epoch 114/300 | Train Loss: 1.5164, Approx Train Acc: 58.12% | Val Acc: 85.18%
   [*] Saved new best model: Val Acc = 85.18%




Epoch 115/300 | Train Loss: 1.5289, Approx Train Acc: 57.67% | Val Acc: 85.34%
   [*] Saved new best model: Val Acc = 85.34%




Epoch 116/300 | Train Loss: 1.4907, Approx Train Acc: 59.70% | Val Acc: 78.78%




Epoch 117/300 | Train Loss: 1.5326, Approx Train Acc: 57.58% | Val Acc: 82.42%




Epoch 118/300 | Train Loss: 1.5049, Approx Train Acc: 58.77% | Val Acc: 85.44%
   [*] Saved new best model: Val Acc = 85.44%




Epoch 119/300 | Train Loss: 1.5381, Approx Train Acc: 57.04% | Val Acc: 85.72%
   [*] Saved new best model: Val Acc = 85.72%




Epoch 120/300 | Train Loss: 1.5218, Approx Train Acc: 58.19% | Val Acc: 83.92%




Epoch 121/300 | Train Loss: 1.5229, Approx Train Acc: 57.75% | Val Acc: 82.72%




Epoch 122/300 | Train Loss: 1.5171, Approx Train Acc: 58.20% | Val Acc: 79.70%




Epoch 123/300 | Train Loss: 1.5100, Approx Train Acc: 58.70% | Val Acc: 82.92%




Epoch 124/300 | Train Loss: 1.4838, Approx Train Acc: 59.86% | Val Acc: 81.86%




Epoch 125/300 | Train Loss: 1.5335, Approx Train Acc: 57.38% | Val Acc: 84.00%




Epoch 126/300 | Train Loss: 1.4956, Approx Train Acc: 59.33% | Val Acc: 84.50%




Epoch 127/300 | Train Loss: 1.5043, Approx Train Acc: 58.69% | Val Acc: 84.86%




Epoch 128/300 | Train Loss: 1.5054, Approx Train Acc: 58.73% | Val Acc: 85.80%
   [*] Saved new best model: Val Acc = 85.80%




Epoch 129/300 | Train Loss: 1.5124, Approx Train Acc: 58.36% | Val Acc: 80.24%




Epoch 130/300 | Train Loss: 1.5114, Approx Train Acc: 58.51% | Val Acc: 83.00%




Epoch 131/300 | Train Loss: 1.5109, Approx Train Acc: 58.11% | Val Acc: 84.92%




Epoch 132/300 | Train Loss: 1.5070, Approx Train Acc: 58.44% | Val Acc: 86.18%
   [*] Saved new best model: Val Acc = 86.18%




Epoch 133/300 | Train Loss: 1.5105, Approx Train Acc: 58.22% | Val Acc: 84.80%




Epoch 134/300 | Train Loss: 1.5104, Approx Train Acc: 58.45% | Val Acc: 84.58%




Epoch 135/300 | Train Loss: 1.4961, Approx Train Acc: 59.16% | Val Acc: 85.94%




Epoch 136/300 | Train Loss: 1.5244, Approx Train Acc: 57.54% | Val Acc: 83.34%




Epoch 137/300 | Train Loss: 1.4918, Approx Train Acc: 59.53% | Val Acc: 80.96%




Epoch 138/300 | Train Loss: 1.4835, Approx Train Acc: 59.58% | Val Acc: 82.14%




Epoch 139/300 | Train Loss: 1.4781, Approx Train Acc: 60.10% | Val Acc: 86.22%
   [*] Saved new best model: Val Acc = 86.22%




Epoch 140/300 | Train Loss: 1.5005, Approx Train Acc: 59.30% | Val Acc: 86.12%




Epoch 141/300 | Train Loss: 1.4959, Approx Train Acc: 59.09% | Val Acc: 84.68%




Epoch 142/300 | Train Loss: 1.5100, Approx Train Acc: 58.43% | Val Acc: 85.60%




Epoch 143/300 | Train Loss: 1.4949, Approx Train Acc: 59.01% | Val Acc: 85.58%




Epoch 144/300 | Train Loss: 1.5241, Approx Train Acc: 57.69% | Val Acc: 84.96%




Epoch 145/300 | Train Loss: 1.4840, Approx Train Acc: 59.61% | Val Acc: 86.04%




Epoch 146/300 | Train Loss: 1.5135, Approx Train Acc: 58.13% | Val Acc: 85.92%




Epoch 147/300 | Train Loss: 1.5139, Approx Train Acc: 58.05% | Val Acc: 85.96%




Epoch 148/300 | Train Loss: 1.4892, Approx Train Acc: 59.43% | Val Acc: 86.16%




Epoch 149/300 | Train Loss: 1.5173, Approx Train Acc: 58.10% | Val Acc: 84.14%




Epoch 150/300 | Train Loss: 1.4719, Approx Train Acc: 60.28% | Val Acc: 85.32%




Epoch 151/300 | Train Loss: 1.4673, Approx Train Acc: 60.36% | Val Acc: 80.58%




Epoch 152/300 | Train Loss: 1.4796, Approx Train Acc: 59.77% | Val Acc: 86.74%
   [*] Saved new best model: Val Acc = 86.74%




Epoch 153/300 | Train Loss: 1.4595, Approx Train Acc: 60.80% | Val Acc: 87.30%
   [*] Saved new best model: Val Acc = 87.30%




Epoch 154/300 | Train Loss: 1.4662, Approx Train Acc: 60.62% | Val Acc: 86.94%




Epoch 155/300 | Train Loss: 1.4893, Approx Train Acc: 59.20% | Val Acc: 85.26%




Epoch 156/300 | Train Loss: 1.4552, Approx Train Acc: 61.03% | Val Acc: 87.70%
   [*] Saved new best model: Val Acc = 87.70%




Epoch 157/300 | Train Loss: 1.4823, Approx Train Acc: 59.74% | Val Acc: 87.94%
   [*] Saved new best model: Val Acc = 87.94%




Epoch 158/300 | Train Loss: 1.4882, Approx Train Acc: 59.21% | Val Acc: 88.44%
   [*] Saved new best model: Val Acc = 88.44%




Epoch 159/300 | Train Loss: 1.4847, Approx Train Acc: 59.42% | Val Acc: 85.30%




Epoch 160/300 | Train Loss: 1.4551, Approx Train Acc: 61.20% | Val Acc: 83.54%




Epoch 161/300 | Train Loss: 1.4704, Approx Train Acc: 60.46% | Val Acc: 86.54%




Epoch 162/300 | Train Loss: 1.4741, Approx Train Acc: 59.80% | Val Acc: 86.90%




Epoch 163/300 | Train Loss: 1.4674, Approx Train Acc: 60.30% | Val Acc: 87.44%




Epoch 164/300 | Train Loss: 1.4845, Approx Train Acc: 59.48% | Val Acc: 82.76%




Epoch 165/300 | Train Loss: 1.4785, Approx Train Acc: 59.74% | Val Acc: 87.12%




Epoch 166/300 | Train Loss: 1.4268, Approx Train Acc: 62.09% | Val Acc: 86.56%




Epoch 167/300 | Train Loss: 1.4602, Approx Train Acc: 61.01% | Val Acc: 82.82%




Epoch 168/300 | Train Loss: 1.4290, Approx Train Acc: 62.21% | Val Acc: 88.08%




Epoch 169/300 | Train Loss: 1.4658, Approx Train Acc: 60.32% | Val Acc: 86.46%




Epoch 170/300 | Train Loss: 1.4789, Approx Train Acc: 59.52% | Val Acc: 87.46%




Epoch 171/300 | Train Loss: 1.4697, Approx Train Acc: 60.45% | Val Acc: 87.82%




Epoch 172/300 | Train Loss: 1.4604, Approx Train Acc: 60.47% | Val Acc: 86.62%




Epoch 173/300 | Train Loss: 1.4855, Approx Train Acc: 59.54% | Val Acc: 88.78%
   [*] Saved new best model: Val Acc = 88.78%




Epoch 174/300 | Train Loss: 1.4690, Approx Train Acc: 60.30% | Val Acc: 86.24%




Epoch 175/300 | Train Loss: 1.4530, Approx Train Acc: 60.80% | Val Acc: 88.84%
   [*] Saved new best model: Val Acc = 88.84%




Epoch 176/300 | Train Loss: 1.4641, Approx Train Acc: 60.36% | Val Acc: 85.82%




Epoch 177/300 | Train Loss: 1.4565, Approx Train Acc: 60.67% | Val Acc: 87.62%




Epoch 178/300 | Train Loss: 1.4686, Approx Train Acc: 60.14% | Val Acc: 88.86%
   [*] Saved new best model: Val Acc = 88.86%




Epoch 179/300 | Train Loss: 1.4684, Approx Train Acc: 60.27% | Val Acc: 87.96%




Epoch 180/300 | Train Loss: 1.3862, Approx Train Acc: 64.31% | Val Acc: 87.40%




Epoch 181/300 | Train Loss: 1.4461, Approx Train Acc: 61.44% | Val Acc: 88.26%




Epoch 182/300 | Train Loss: 1.4261, Approx Train Acc: 62.12% | Val Acc: 85.68%




Epoch 183/300 | Train Loss: 1.4443, Approx Train Acc: 60.99% | Val Acc: 86.56%




Epoch 184/300 | Train Loss: 1.4506, Approx Train Acc: 60.79% | Val Acc: 86.52%




Epoch 185/300 | Train Loss: 1.4308, Approx Train Acc: 62.20% | Val Acc: 89.12%
   [*] Saved new best model: Val Acc = 89.12%




Epoch 186/300 | Train Loss: 1.4587, Approx Train Acc: 60.61% | Val Acc: 86.52%




Epoch 187/300 | Train Loss: 1.4494, Approx Train Acc: 60.92% | Val Acc: 87.26%




Epoch 188/300 | Train Loss: 1.4545, Approx Train Acc: 60.58% | Val Acc: 89.88%
   [*] Saved new best model: Val Acc = 89.88%




Epoch 189/300 | Train Loss: 1.4456, Approx Train Acc: 61.31% | Val Acc: 87.08%




Epoch 190/300 | Train Loss: 1.4550, Approx Train Acc: 60.92% | Val Acc: 89.30%




Epoch 191/300 | Train Loss: 1.4116, Approx Train Acc: 62.94% | Val Acc: 89.20%




Epoch 192/300 | Train Loss: 1.4526, Approx Train Acc: 60.73% | Val Acc: 89.06%




Epoch 193/300 | Train Loss: 1.4536, Approx Train Acc: 60.59% | Val Acc: 88.34%




Epoch 194/300 | Train Loss: 1.4426, Approx Train Acc: 61.21% | Val Acc: 89.56%




Epoch 195/300 | Train Loss: 1.4422, Approx Train Acc: 61.14% | Val Acc: 87.58%




Epoch 196/300 | Train Loss: 1.4295, Approx Train Acc: 62.19% | Val Acc: 87.06%




Epoch 197/300 | Train Loss: 1.4426, Approx Train Acc: 61.45% | Val Acc: 90.18%
   [*] Saved new best model: Val Acc = 90.18%




Epoch 198/300 | Train Loss: 1.4157, Approx Train Acc: 62.81% | Val Acc: 89.90%




Epoch 199/300 | Train Loss: 1.4269, Approx Train Acc: 62.08% | Val Acc: 89.70%




Epoch 200/300 | Train Loss: 1.4592, Approx Train Acc: 60.59% | Val Acc: 90.34%
   [*] Saved new best model: Val Acc = 90.34%




Epoch 201/300 | Train Loss: 1.4098, Approx Train Acc: 63.14% | Val Acc: 89.40%




Epoch 202/300 | Train Loss: 1.4130, Approx Train Acc: 62.75% | Val Acc: 88.94%




Epoch 203/300 | Train Loss: 1.4127, Approx Train Acc: 62.52% | Val Acc: 90.46%
   [*] Saved new best model: Val Acc = 90.46%




Epoch 204/300 | Train Loss: 1.4202, Approx Train Acc: 62.00% | Val Acc: 89.74%




Epoch 205/300 | Train Loss: 1.4129, Approx Train Acc: 62.50% | Val Acc: 91.06%
   [*] Saved new best model: Val Acc = 91.06%




Epoch 206/300 | Train Loss: 1.4372, Approx Train Acc: 61.21% | Val Acc: 89.30%




Epoch 207/300 | Train Loss: 1.4262, Approx Train Acc: 62.04% | Val Acc: 89.54%




Epoch 208/300 | Train Loss: 1.3993, Approx Train Acc: 63.27% | Val Acc: 89.90%




Epoch 209/300 | Train Loss: 1.4230, Approx Train Acc: 62.19% | Val Acc: 89.18%




Epoch 210/300 | Train Loss: 1.4063, Approx Train Acc: 62.92% | Val Acc: 90.70%




Epoch 211/300 | Train Loss: 1.4064, Approx Train Acc: 62.86% | Val Acc: 90.18%




Epoch 212/300 | Train Loss: 1.4491, Approx Train Acc: 60.67% | Val Acc: 89.34%




Epoch 213/300 | Train Loss: 1.3919, Approx Train Acc: 63.50% | Val Acc: 90.92%




Epoch 214/300 | Train Loss: 1.3963, Approx Train Acc: 63.40% | Val Acc: 89.28%




Epoch 215/300 | Train Loss: 1.4281, Approx Train Acc: 61.85% | Val Acc: 90.82%




Epoch 216/300 | Train Loss: 1.4161, Approx Train Acc: 62.42% | Val Acc: 89.82%




Epoch 217/300 | Train Loss: 1.3855, Approx Train Acc: 63.99% | Val Acc: 91.52%
   [*] Saved new best model: Val Acc = 91.52%




Epoch 218/300 | Train Loss: 1.3946, Approx Train Acc: 63.15% | Val Acc: 89.32%




Epoch 219/300 | Train Loss: 1.3968, Approx Train Acc: 63.45% | Val Acc: 91.18%




Epoch 220/300 | Train Loss: 1.3999, Approx Train Acc: 63.19% | Val Acc: 91.12%




Epoch 221/300 | Train Loss: 1.3977, Approx Train Acc: 63.02% | Val Acc: 90.06%




Epoch 222/300 | Train Loss: 1.3744, Approx Train Acc: 64.07% | Val Acc: 91.36%




Epoch 223/300 | Train Loss: 1.4032, Approx Train Acc: 62.99% | Val Acc: 90.58%




Epoch 224/300 | Train Loss: 1.3927, Approx Train Acc: 63.21% | Val Acc: 91.66%
   [*] Saved new best model: Val Acc = 91.66%




Epoch 225/300 | Train Loss: 1.3823, Approx Train Acc: 63.68% | Val Acc: 90.80%




Epoch 226/300 | Train Loss: 1.3915, Approx Train Acc: 63.87% | Val Acc: 92.64%
   [*] Saved new best model: Val Acc = 92.64%




Epoch 227/300 | Train Loss: 1.3783, Approx Train Acc: 64.00% | Val Acc: 90.24%




Epoch 228/300 | Train Loss: 1.3752, Approx Train Acc: 64.24% | Val Acc: 91.50%




Epoch 229/300 | Train Loss: 1.3681, Approx Train Acc: 64.61% | Val Acc: 92.58%




Epoch 230/300 | Train Loss: 1.3683, Approx Train Acc: 64.29% | Val Acc: 91.04%




Epoch 231/300 | Train Loss: 1.3835, Approx Train Acc: 64.01% | Val Acc: 90.38%




Epoch 232/300 | Train Loss: 1.3795, Approx Train Acc: 63.99% | Val Acc: 92.38%




Epoch 233/300 | Train Loss: 1.3652, Approx Train Acc: 64.62% | Val Acc: 92.10%




Epoch 234/300 | Train Loss: 1.3690, Approx Train Acc: 64.50% | Val Acc: 91.66%




Epoch 235/300 | Train Loss: 1.3446, Approx Train Acc: 65.40% | Val Acc: 92.38%




Epoch 236/300 | Train Loss: 1.3868, Approx Train Acc: 63.37% | Val Acc: 91.86%




Epoch 237/300 | Train Loss: 1.3790, Approx Train Acc: 63.90% | Val Acc: 93.14%
   [*] Saved new best model: Val Acc = 93.14%




Epoch 238/300 | Train Loss: 1.3417, Approx Train Acc: 65.85% | Val Acc: 91.52%




Epoch 239/300 | Train Loss: 1.3856, Approx Train Acc: 63.52% | Val Acc: 92.36%




Epoch 240/300 | Train Loss: 1.3311, Approx Train Acc: 66.14% | Val Acc: 92.38%




Epoch 241/300 | Train Loss: 1.3558, Approx Train Acc: 65.16% | Val Acc: 92.92%




Epoch 242/300 | Train Loss: 1.3515, Approx Train Acc: 65.43% | Val Acc: 93.36%
   [*] Saved new best model: Val Acc = 93.36%




Epoch 243/300 | Train Loss: 1.3377, Approx Train Acc: 66.05% | Val Acc: 92.86%




Epoch 244/300 | Train Loss: 1.3355, Approx Train Acc: 65.98% | Val Acc: 93.06%




Epoch 245/300 | Train Loss: 1.3409, Approx Train Acc: 65.67% | Val Acc: 92.96%




Epoch 246/300 | Train Loss: 1.3270, Approx Train Acc: 66.29% | Val Acc: 92.92%




Epoch 247/300 | Train Loss: 1.3341, Approx Train Acc: 65.83% | Val Acc: 91.96%




Epoch 248/300 | Train Loss: 1.3439, Approx Train Acc: 65.07% | Val Acc: 93.12%




Epoch 249/300 | Train Loss: 1.3274, Approx Train Acc: 66.19% | Val Acc: 93.12%




Epoch 250/300 | Train Loss: 1.3424, Approx Train Acc: 65.09% | Val Acc: 93.14%




Epoch 251/300 | Train Loss: 1.3369, Approx Train Acc: 65.71% | Val Acc: 91.64%




Epoch 252/300 | Train Loss: 1.3065, Approx Train Acc: 67.07% | Val Acc: 92.78%




Epoch 253/300 | Train Loss: 1.3331, Approx Train Acc: 65.63% | Val Acc: 93.46%
   [*] Saved new best model: Val Acc = 93.46%




Epoch 254/300 | Train Loss: 1.3339, Approx Train Acc: 65.87% | Val Acc: 93.46%




Epoch 255/300 | Train Loss: 1.3422, Approx Train Acc: 65.26% | Val Acc: 93.48%
   [*] Saved new best model: Val Acc = 93.48%




Epoch 256/300 | Train Loss: 1.2906, Approx Train Acc: 67.75% | Val Acc: 93.14%




Epoch 257/300 | Train Loss: 1.3262, Approx Train Acc: 65.96% | Val Acc: 94.06%
   [*] Saved new best model: Val Acc = 94.06%




Epoch 258/300 | Train Loss: 1.3131, Approx Train Acc: 66.48% | Val Acc: 93.36%




Epoch 259/300 | Train Loss: 1.3101, Approx Train Acc: 66.62% | Val Acc: 93.86%




Epoch 260/300 | Train Loss: 1.3329, Approx Train Acc: 65.35% | Val Acc: 93.98%




Epoch 261/300 | Train Loss: 1.3219, Approx Train Acc: 65.75% | Val Acc: 93.88%




Epoch 262/300 | Train Loss: 1.3231, Approx Train Acc: 65.84% | Val Acc: 93.82%




Epoch 263/300 | Train Loss: 1.3062, Approx Train Acc: 66.88% | Val Acc: 93.80%




Epoch 264/300 | Train Loss: 1.3288, Approx Train Acc: 65.50% | Val Acc: 93.86%




Epoch 265/300 | Train Loss: 1.3055, Approx Train Acc: 66.89% | Val Acc: 94.32%
   [*] Saved new best model: Val Acc = 94.32%




Epoch 266/300 | Train Loss: 1.2904, Approx Train Acc: 67.83% | Val Acc: 93.88%




Epoch 267/300 | Train Loss: 1.2975, Approx Train Acc: 67.21% | Val Acc: 93.88%




Epoch 268/300 | Train Loss: 1.2876, Approx Train Acc: 67.87% | Val Acc: 94.38%
   [*] Saved new best model: Val Acc = 94.38%




Epoch 269/300 | Train Loss: 1.3006, Approx Train Acc: 66.79% | Val Acc: 93.98%




Epoch 270/300 | Train Loss: 1.2699, Approx Train Acc: 68.69% | Val Acc: 94.24%




Epoch 271/300 | Train Loss: 1.2761, Approx Train Acc: 68.43% | Val Acc: 94.42%
   [*] Saved new best model: Val Acc = 94.42%




Epoch 272/300 | Train Loss: 1.2845, Approx Train Acc: 67.88% | Val Acc: 94.52%
   [*] Saved new best model: Val Acc = 94.52%




Epoch 273/300 | Train Loss: 1.3037, Approx Train Acc: 66.86% | Val Acc: 94.96%
   [*] Saved new best model: Val Acc = 94.96%




Epoch 274/300 | Train Loss: 1.2812, Approx Train Acc: 68.07% | Val Acc: 94.38%




Epoch 275/300 | Train Loss: 1.2730, Approx Train Acc: 68.27% | Val Acc: 94.52%




Epoch 276/300 | Train Loss: 1.2828, Approx Train Acc: 67.51% | Val Acc: 94.74%




Epoch 277/300 | Train Loss: 1.2706, Approx Train Acc: 68.17% | Val Acc: 94.56%




Epoch 278/300 | Train Loss: 1.2559, Approx Train Acc: 69.01% | Val Acc: 94.60%




Epoch 279/300 | Train Loss: 1.2820, Approx Train Acc: 67.57% | Val Acc: 94.74%




Epoch 280/300 | Train Loss: 1.2743, Approx Train Acc: 68.21% | Val Acc: 94.54%




Epoch 281/300 | Train Loss: 1.2620, Approx Train Acc: 68.48% | Val Acc: 94.84%




Epoch 282/300 | Train Loss: 1.2695, Approx Train Acc: 68.35% | Val Acc: 95.12%
   [*] Saved new best model: Val Acc = 95.12%




Epoch 283/300 | Train Loss: 1.2722, Approx Train Acc: 67.75% | Val Acc: 94.90%




Epoch 284/300 | Train Loss: 1.2680, Approx Train Acc: 68.32% | Val Acc: 94.94%




Epoch 285/300 | Train Loss: 1.2495, Approx Train Acc: 69.58% | Val Acc: 95.08%




Epoch 286/300 | Train Loss: 1.2657, Approx Train Acc: 68.17% | Val Acc: 95.00%




Epoch 287/300 | Train Loss: 1.2636, Approx Train Acc: 68.22% | Val Acc: 94.74%




Epoch 288/300 | Train Loss: 1.2456, Approx Train Acc: 69.41% | Val Acc: 94.94%




Epoch 289/300 | Train Loss: 1.2587, Approx Train Acc: 68.69% | Val Acc: 94.86%




Epoch 290/300 | Train Loss: 1.2489, Approx Train Acc: 69.45% | Val Acc: 94.74%




Epoch 291/300 | Train Loss: 1.2571, Approx Train Acc: 68.66% | Val Acc: 94.96%




Epoch 292/300 | Train Loss: 1.2691, Approx Train Acc: 68.04% | Val Acc: 95.04%




Epoch 293/300 | Train Loss: 1.2524, Approx Train Acc: 69.23% | Val Acc: 94.96%




Epoch 294/300 | Train Loss: 1.2649, Approx Train Acc: 68.22% | Val Acc: 94.78%




Epoch 295/300 | Train Loss: 1.2452, Approx Train Acc: 69.22% | Val Acc: 94.88%




Epoch 296/300 | Train Loss: 1.2474, Approx Train Acc: 69.38% | Val Acc: 94.88%




Epoch 297/300 | Train Loss: 1.2466, Approx Train Acc: 69.03% | Val Acc: 94.92%




Epoch 298/300 | Train Loss: 1.2715, Approx Train Acc: 67.95% | Val Acc: 94.70%




Epoch 299/300 | Train Loss: 1.2799, Approx Train Acc: 67.47% | Val Acc: 94.88%




Epoch 300/300 | Train Loss: 1.2737, Approx Train Acc: 67.95% | Val Acc: 94.98%
Training complete. Best validation accuracy: 95.12%
Best model saved as best_model.pth


In [None]:
# inference.py
import torch
import torchvision.transforms as transforms
import pandas as pd
import pickle
import numpy as np
from torch.utils.data import DataLoader, Dataset
#from model import LightResNet18
import torch.nn.functional as F

def unpickle(file):
    with open(file, 'rb') as fo:
        data_dict = pickle.load(fo, encoding='bytes')
    return data_dict

class CIFARTestDataset(Dataset):
    def __init__(self, data, ids, transform=None):
        """
        data: shape (N, 32, 32, 3)
        ids: array/list of image IDs
        transform: transforms to apply
        """
        self.data = data
        self.ids = ids
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img = self.data[idx].astype("uint8")  # ensure 0..255
        img_id = self.ids[idx]
        if self.transform:
            img = self.transform(img)
        return img, img_id

def inference():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    # 1) Load the best model
    model = LightResNet18(num_classes=10, drop_prob=0.1).to(device)
    model.load_state_dict(torch.load("best_model.pth", map_location=device))
    model.eval()

    # 2) Load custom test set .pkl
    test_file = "/content/cifar_test_nolabel.pkl"
    test_dict = unpickle(test_file)
    print("Keys in test_dict:", test_dict.keys())

    # Reshape if it's (N, 3072). If it's already (N, 32,32,3), remove reshape
    test_images = test_dict[b'data'].reshape(-1, 32, 32, 3)
    test_ids = [str(i) for i in range(len(test_images))]

    # 3) Define test transforms
    transform_test = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2470, 0.2435, 0.2616))
    ])

    test_dataset = CIFARTestDataset(test_images, test_ids, transform=transform_test)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4)

    # 4) Inference with 2-pass TTA
    predictions = []
    image_ids = []
    with torch.no_grad():
        for imgs, ids in test_loader:
            imgs = imgs.to(device)

            # Pass 1: Normal
            out_normal = model(imgs)
            probs_normal = F.softmax(out_normal, dim=1)

            # Pass 2: Horizontal flip
            imgs_flipped = torch.flip(imgs, dims=[3])  # flip W dimension
            out_flipped = model(imgs_flipped)
            probs_flipped = F.softmax(out_flipped, dim=1)

            # Average probabilities
            final_probs = (probs_normal + probs_flipped) / 2.0
            _, predicted = torch.max(final_probs, 1)

            predictions.extend(predicted.cpu().numpy().tolist())
            image_ids.extend(ids)

    # 5) Save submission
    submission_df = pd.DataFrame({"ID": image_ids, "Labels": predictions})
    submission_df.to_csv("submission.csv", index=False)
    print("Submission file saved as submission.csv")

if __name__ == "__main__":
    inference()


Using device: cuda
Keys in test_dict: dict_keys([b'data', b'ids'])


  model.load_state_dict(torch.load("best_model.pth", map_location=device))


Submission file saved as submission.csv


In [None]:
import pandas as pd

# Load and inspect submission file
submission_df = pd.read_csv("submission.csv")
print(submission_df.head())  # Show first few rows
print(submission_df["Labels"].value_counts())  # Show label distribution


   ID  Labels
0   0       6
1   1       1
2   2       8
3   3       6
4   4       9
Labels
3    1127
5    1119
1    1040
8    1021
9    1018
7     969
2     958
4     940
0     912
6     896
Name: count, dtype: int64
