### Imports

In [None]:
import os
import json
import math
import random
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight

### --- 2. Configuration ---

In [None]:
# Path to your landmark file (created by the first notebook)
LANDMARK_FILE = r'../data/Landmarks_GCN_augmented.npz'
# Path to the JSON file defining the 100-class dataset
SPLIT_FILE = r'../data/nslt_100.json'
# Path to a new folder where all outputs (models, logs) will be saved
OUTPUT_DIR = r'../outputs/stgcn_100/'

#### --- Hyperparameters ---

In [None]:
NUM_CLASSES = 100
TOTAL_LANDMARKS = 67  # 15 pose + 2 mid + 42 hands + 8 face
C = 4                 # Channels: x, y, z, confidence
T_FRAMES = 128        # Target number of frames per video
BATCH_SIZE = 32
EPOCHS = 200
BASE_LR = 3e-4
WEIGHT_DECAY = 1e-4
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
USE_AMP = True        # Automatic Mixed Precision (faster training)
MIXUP_ALPHA = 0.4     # Mixup augmentation strength
LABEL_SMOOTHING = 0.1
NUM_WORKERS = 2       # For data loading

#### --- Reproducibility ---

In [None]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

### --- 3. Preprocessing & Augmentation ---

In [None]:
def temporal_resample(sequence, target_len):
    """
    Resamples a sequence to a target length (T_FRAMES).
    This ensures all video sequences have the same number of frames.
    """
    orig_len = sequence.shape[0]
    if orig_len == target_len:
        return sequence
    # If only 1 frame, repeat it
    if orig_len == 1:
        return np.repeat(sequence, target_len, axis=0)
    
    # Generate target frame indices
    target_positions = np.linspace(0, orig_len - 1, target_len)
    old_idx = np.arange(orig_len)
    
    # Interpolate each channel (x, y, z, c) for each landmark
    out = np.zeros((target_len, sequence.shape[1], sequence.shape[2]), dtype=sequence.dtype)
    for v in range(sequence.shape[1]): # Iterate landmarks
        for c in range(sequence.shape[2]): # Iterate channels
            values = sequence[:, v, c]
            out[:, v, c] = np.interp(target_positions, old_idx, values)
    return out

class LandmarkPreprocessor:
    """
    Handles normalization and augmentation of the landmark data.
    - Normalization: Makes the data robust to signer's position/size.
    - Augmentation: Creates new variations of data during training.
    """
    def __init__(self, augment=True, target_frames=T_FRAMES):
        self.augment = augment
        self.target_frames = target_frames
        # Landmark indices (must match 'extract_landmarks.ipynb')
        self.POSE_IDXS = list(range(0, 17))
        self.LEFT_HAND_IDXS = list(range(17, 38))
        self.RIGHT_HAND_IDXS = list(range(38, 59))
        self.FACE_IDXS = list(range(59, 67))

    def normalize_frame(self, frame):
        """Normalizes a single frame of landmarks."""
        normalized = frame.copy()
        
        # --- Pose + face normalization (relative to nose/shoulders) ---
        try:
            pose = frame[self.POSE_IDXS, :2]
            if np.sum(np.abs(pose)) > 0:
                nose = pose[0]
                l_s = pose[1] if len(pose) > 1 else nose
                r_s = pose[2] if len(pose) > 2 else nose + 0.1
                if np.all(l_s == 0) or np.all(r_s == 0):
                    l_s = np.nan_to_num(np.mean(pose, axis=0))
                    r_s = l_s + 0.1
                dist = np.linalg.norm(l_s - r_s)
                if dist < 1e-4: dist = 0.1
                hu = dist / 2.0
                bw = 6 * hu
                bh = 7 * hu
                bx = nose[0] - 3 * hu
                by = nose[1] - 0.5 * hu
                pf_idxs = self.POSE_IDXS + self.FACE_IDXS
                normalized[pf_idxs, 0] = (frame[pf_idxs, 0] - bx) / max(bw, 1e-6) - 0.5
                normalized[pf_idxs, 1] = (frame[pf_idxs, 1] - by) / max(bh, 1e-6) - 0.5
        except Exception:
            pass # Keep original data if normalization fails

        # --- Hand normalization (relative to each hand's bounding box) ---
        for h_idxs in [self.LEFT_HAND_IDXS, self.RIGHT_HAND_IDXS]:
            try:
                hand = frame[h_idxs]
                if np.sum(np.abs(hand)) > 0:
                    xs, ys = hand[:, 0], hand[:, 1]
                    if (np.max(xs) != np.min(xs)) or (np.max(ys) != np.min(ys)):
                        xmin, xmax, ymin, ymax = np.min(xs), np.max(xs), np.min(ys), np.max(ys)
                        box_size = max(xmax - xmin, ymax - ymin, 1e-4)
                        normalized[h_idxs, 0] = (hand[:, 0] - xmin) / box_size - 0.5
                        normalized[h_idxs, 1] = (hand[:, 1] - ymin) / box_size - 0.5
            except Exception:
                continue # Keep original data if normalization fails

        # --- Z-axis normalization (relative to torso) ---
        try:
            torso_z = frame[self.POSE_IDXS, 2]
            torso_z_nonzero = torso_z[torso_z != 0]
            if len(torso_z_nonzero) > 0:
                median_z = np.median(torso_z_nonzero)
                normalized[:, 2] = (frame[:, 2] - median_z)
        except Exception:
            pass

        return normalized

    def augment_sequence(self, seq):
        """Applies augmentations to a full sequence."""
        # --- Aug: Rotation noise ---
        angle = random.uniform(-12, 12)
        rad = math.radians(angle)
        cos, sin = math.cos(rad), math.sin(rad)
        R = np.array([[cos, -sin], [sin, cos]], dtype=np.float32)
        xy = seq[:, :, :2].reshape(-1, 2)
        xy = xy @ R.T
        seq[:, :, :2] = xy.reshape(seq.shape[0], seq.shape[1], 2)
        # --- Aug: Add small random noise ---
        seq += np.random.normal(0, 0.003, size=seq.shape).astype(np.float32)

        # --- Aug: Temporal jitter (drop/repeat frames) ---
        if random.random() < 0.5:
            T = seq.shape[0]
            jittered = []
            for t in range(T):
                if random.random() < 0.05 and t < T - 1:
                    continue # 5% chance to drop frame
                jittered.append(seq[t])
                if random.random() < 0.05:
                    jittered.append(seq[t]) # 5% chance to repeat frame
            if len(jittered) < 2:
                jittered = [seq[t] for t in range(T)]
            jittered = np.stack(jittered, axis=0)
            seq = temporal_resample(jittered, self.target_frames) # Resample back to target
        return seq

    def __call__(self, landmark_sequence: np.ndarray, is_train: bool):
        """Applies the full preprocessing pipeline."""
        # Clean NaNs and resample time
        landmark_sequence = np.nan_to_num(landmark_sequence, 0.0).astype(np.float32)
        seq = temporal_resample(landmark_sequence, self.target_frames)
        
        # Normalize each frame
        seq = np.stack([self.normalize_frame(frame) for frame in seq], axis=0)
        
        # Apply augmentations only if it's training data
        if is_train and self.augment:
            seq = self.augment_sequence(seq)
            
        return seq

### --- 4. PyTorch Dataset ---

In [None]:
class WLASLLandmarkDataset(Dataset):
    """
    A PyTorch Dataset class to load landmarks and labels.
    - Loads data from the .npz file.
    - Reads the train/val/test split from the .json file.
    - Uses the `label_map` to convert string labels (e.g., "17") to integer indices (e.g., 0).
    - Applies the `preprocessor`.
    """
    def __init__(self, landmark_path, split_file_path, split='train', preprocessor=None, label_map=None):
        self.split = split
        self.preprocessor = preprocessor if preprocessor is not None else LandmarkPreprocessor()
        
        # Load the big landmark file
        landmarks_data = dict(np.load(landmark_path, allow_pickle=True))
        
        # Load the train/val/test split definition
        with open(split_file_path, 'r') as f:
            split_data = json.load(f)
        split_dict = split_data.get('root', split_data)
        
        self.samples = []
        for video_id, info in split_dict.items():
            # Check if this video is in the correct split (train, val, or test)
            subset = info.get('subset') or info.get('split') or info.get('subset', None)
            if subset == self.split:
                # Check if we have landmarks for this video
                if video_id in landmarks_data:
                    action = info.get('action', [])
                    if not isinstance(action, list) or len(action) == 0:
                        continue
                    
                    # Get the raw label (e.g., "17", "42")
                    raw_label = action[0]
                    
                    # Use the label_map to get a 0-indexed integer label
                    if label_map is None:
                        label = int(raw_label) # Fails if labels aren't 0-N
                    else:
                        if str(raw_label) not in label_map:
                            continue # Skip if this label isn't in our 100 classes
                        label = int(label_map[str(raw_label)])
                        
                    landmarks = landmarks_data[video_id]
                    self.samples.append({"video_id": video_id, "landmarks": landmarks, "label": label})
                    
        print(f"✅ Created '{self.split}' split with {len(self.samples)} samples.")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        landmark_sequence = sample["landmarks"]
        
        # Handle a legacy flattened format (if data is (T, V*3) instead of (T, V, 3))
        if isinstance(landmark_sequence, np.ndarray) and landmark_sequence.ndim == 2 and landmark_sequence.shape[1] == TOTAL_LANDMARKS * 3:
            landmark_sequence = landmark_sequence.reshape(-1, TOTAL_LANDMARKS, 3)
            
        # Apply normalization and augmentation
        processed = self.preprocessor(landmark_sequence, is_train=(self.split == 'train'))
        
        # Transpose to (C, T, V) format for PyTorch Conv2D layers
        processed = np.transpose(processed, (2, 0, 1)).astype(np.float32)  # (4, 128, 67)
        
        label = int(sample['label'])
        
        return torch.from_numpy(processed), torch.tensor(label, dtype=torch.long)

def collate_fn(batch):
    """Custom collate function to stack batch data."""
    xs, ys = zip(*batch)
    xs = torch.stack(xs, dim=0)
    ys = torch.tensor(ys, dtype=torch.long)
    return xs, ys

### --- 5. ST-GCN Model ---

In [None]:
class GraphConv(nn.Module):
    """
    Spatial Graph Convolution layer.
    It learns a weighted adjacency matrix 'A' and applies a transform 'theta'.
    """
    def __init__(self, in_channels, out_channels, num_nodes, bias=True):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        self.theta = nn.Parameter(torch.Tensor(in_channels, out_channels))
        # A is the learnable adjacency matrix
        self.A = nn.Parameter(torch.eye(num_nodes, dtype=torch.float32), requires_grad=True)
        
        if bias:
            self.bias = nn.Parameter(torch.zeros(out_channels))
        else:
            self.register_parameter('bias', None)
        nn.init.xavier_uniform_(self.theta)

    def forward(self, x):
        B, C, T, V = x.shape
        # Reshape for matrix multiplication: (B, C, T, V) -> (B*T, V, C)
        x_perm = x.permute(0, 2, 3, 1).contiguous().view(B * T, V, C)
        
        # y = A @ x @ theta
        y = x_perm @ self.theta
        A = self.A.unsqueeze(0)
        y = torch.bmm(A.repeat(B * T, 1, 1), y)
        
        # Reshape back to (B, C_out, T, V)
        y = y.view(B, T, V, self.out_channels).permute(0, 3, 1, 2).contiguous()
        if self.bias is not None:
            y = y + self.bias.view(1, -1, 1, 1)
        return y

class STGCNBlock(nn.Module):
    """
    Spatio-Temporal Graph Convolutional (ST-GCN) block.
    Combines a GraphConv (spatial) with a 2D-Conv (temporal).
    """
    def __init__(self, in_channels, out_channels, num_nodes, kernel_size=9, stride=1, residual=True):
        super().__init__()
        self.gcn = GraphConv(in_channels, out_channels, num_nodes)
        
        padding = (kernel_size - 1) // 2
        # Temporal convolution (as a 2D conv with kernel (kernel_size, 1))
        self.tcn = nn.Sequential(
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=(kernel_size, 1), padding=(padding, 0), stride=(stride, 1)),
            nn.BatchNorm2d(out_channels),
            nn.Dropout(0.3)
        )
        
        # Residual connection to help with training deep networks
        if not residual:
            self.residual = lambda x: 0
        elif (in_channels == out_channels) and stride == 1:
            self.residual = lambda x: x # Identity
        else:
            # 1x1 conv to match dimensions if channels or stride change
            self.residual = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=(stride,1)),
                nn.BatchNorm2d(out_channels),
            )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        res = self.residual(x)
        x = self.gcn(x)
        x = self.tcn(x)
        x = x + res # Add residual
        return self.relu(x)

class TemporalSelfAttention(nn.Module):
    """
    Applies self-attention across the TIME dimension.
    This helps the model focus on the most important frames in the video.
    """
    def __init__(self, d_model, n_heads=4):
        super().__init__()
        self.attn = nn.MultiheadAttention(d_model, n_heads, batch_first=True)
        self.ln = nn.LayerNorm(d_model) # Layer normalization

    def forward(self, x):
        B, C, T, V = x.shape
        # Reshape for attention: (B, C, T, V) -> (B*V, T, C)
        # This treats each landmark's time-series independently
        x = x.permute(0, 3, 2, 1).contiguous().view(B * V, T, C)
        
        # Apply self-attention (query=x, key=x, value=x)
        out, _ = self.attn(x, x, x)
        
        # Add residual connection and normalize
        out = self.ln(out + x)
        
        # Reshape back to (B, C, T, V)
        out = out.view(B, V, T, C).permute(0, 3, 2, 1).contiguous()
        return out

class STGCN(nn.Module):
    """
    The full ST-GCN model with Temporal Self-Attention.
    """
    def __init__(self, in_channels, num_class, num_nodes):
        super().__init__()
        # Input batch normalization
        self.data_bn = nn.BatchNorm1d(in_channels * num_nodes)
        
        # Backbone of ST-GCN blocks
        self.layers = nn.ModuleList([
            STGCNBlock(in_channels, 64, num_nodes, kernel_size=9, stride=1, residual=False),
            STGCNBlock(64, 64, num_nodes, kernel_size=9, stride=1),
            STGCNBlock(64, 128, num_nodes, kernel_size=9, stride=2), # Downsample time
            STGCNBlock(128, 256, num_nodes, kernel_size=9, stride=2), # Downsample time
        ])
        
        # Temporal Attention layer
        self.temporal_attn = TemporalSelfAttention(d_model=256, n_heads=4)
        
        # Global pooling and final classifier
        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(256, num_class)

    def forward(self, x):
        B, C, T, V = x.shape
        
        # Apply initial batch norm
        x = x.permute(0, 1, 3, 2).contiguous()
        x = x.view(B, C * V, T)
        x = self.data_bn(x)
        x = x.view(B, C, V, T).permute(0, 1, 3, 2).contiguous() # (B, C, T, V)
        
        # Pass through ST-GCN blocks
        for layer in self.layers:
            x = layer(x)
            
        # Pass through temporal attention
        x = self.temporal_attn(x)
        
        # Pool features and classify
        x = self.pool(x) # (B, 256, 1, 1)
        x = x.view(B, -1) # (B, 256)
        out = self.fc(x)
        
        return out

# --------------------------
# --- 6. Metrics & Mixup ---
# --------------------------

def mixup_data(x, y, alpha=1.0, device=None):
    """
    Applies Mixup augmentation.
    Blends pairs of samples (x, y) from the batch.
    """
    if alpha <= 0:
        return x, y, y, 1.0
        
    lam = np.random.beta(alpha, alpha) # Sample blending factor
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(device) # Get random shuffle
    
    # Create mixed samples
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index] # Get corresponding labels
    
    return mixed_x, y_a, y_b, lam

def accuracy_topk(output, target, topk=(1,5)):
    """Computes the accuracy over the k top predictions."""
    maxk = max(topk)
    batch_size = target.size(0)
    
    _, pred = output.topk(maxk, 1, True, True) # Get top-k indices
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred)) # Compare with ground truth
    
    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
        res.append((correct_k.mul_(100.0 / batch_size)).item())
    return res  # [top1_acc (%), top5_acc (%)]

def count_parameters(model):
    """Counts the number of trainable parameters in a model."""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

### --- 7. Training Pipeline ---

In [None]:
def prepare_label_map(split_file, target_num_classes=NUM_CLASSES):
    """
    Creates a mapping from the raw string labels in the JSON (e.g., "17")
    to a contiguous integer index (e.g., 0).
    This is CRITICAL because the model outputs logits from 0 to N-1.
    """
    with open(split_file, 'r') as f:
        data = json.load(f)
    split_dict = data.get('root', data)
    
    # Find all unique labels in the 'train' split
    train_actions = set()
    for vid, info in split_dict.items():
        subset = info.get('subset') or info.get('split') or info.get('subset', None)
        if subset == 'train':
            action = info.get('action', [])
            if isinstance(action, list) and len(action) > 0:
                train_actions.add(str(action[0]))
                
    # Sort them to ensure consistent mapping
    train_actions = sorted(list(train_actions), key=lambda x: int(x))
    
    if len(train_actions) != target_num_classes:
        print(f"[Warning] Found {len(train_actions)} unique train actions, expected {target_num_classes}.")
        
    # Create the map: {"17": 0, "42": 1, ...}
    label_map = {raw_label: idx for idx, raw_label in enumerate(train_actions)}
    
    return label_map


def main():
    # --- 1. Create Output Directory ---
    # 💡 This new step keeps your project root folder clean.
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    print(f"All outputs will be saved to: {OUTPUT_DIR}")

    # --- 2. Load Data & Label Map ---
    print("Preparing label map...")
    label_map = prepare_label_map(SPLIT_FILE, target_num_classes=NUM_CLASSES)
    
    print("Initializing preprocessor...")
    preprocessor = LandmarkPreprocessor(augment=True, target_frames=T_FRAMES)
    val_preprocessor = LandmarkPreprocessor(augment=False, target_frames=T_FRAMES) # No augmentation for validation

    # --- 3. Create Datasets & Loaders ---
    print("Loading datasets...")
    train_dataset = WLASLLandmarkDataset(LANDMARK_FILE, SPLIT_FILE, split='train', preprocessor=preprocessor, label_map=label_map)
    val_dataset = WLASLLandmarkDataset(LANDMARK_FILE, SPLIT_FILE, split='val', preprocessor=val_preprocessor, label_map=label_map)
    test_dataset = WLASLLandmarkDataset(LANDMARK_FILE, SPLIT_FILE, split='test', preprocessor=val_preprocessor, label_map=label_map)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True, collate_fn=collate_fn)

    # --- 4. Compute Class Weights (for unbalanced data) ---
    print("Calculating class weights...")
    train_labels = [int(s['label']) for s in train_dataset.samples]
    if len(train_labels) == 0:
        raise RuntimeError('No training samples found. Check your split file and landmark file paths.')
    unique_classes = np.unique(train_labels)
    class_weights = compute_class_weight('balanced', classes=np.arange(len(unique_classes)), y=train_labels)
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(DEVICE)
    print(f"Found {len(unique_classes)} classes. Using {len(label_map)} from label map.")

    # --- 5. Initialize Model, Optimizer, Loss ---
    print("Initializing model...")
    model = STGCN(in_channels=C, num_class=len(unique_classes), num_nodes=TOTAL_LANDMARKS).to(DEVICE)
    print(f"Model has {count_parameters(model)} trainable parameters.")
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=BASE_LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    scaler = torch.cuda.amp.GradScaler(enabled=USE_AMP)
    
    # Loss function with class weights and label smoothing
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=LABEL_SMOOTHING)

    # --- 6. Start Training Loop ---
    best_val = 0.0
    train_history = {'epoch': [], 'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'val_top5': []}
    
    print(f"--- Starting training for {EPOCHS} epochs on {DEVICE} ---")
    for epoch in range(EPOCHS):
        # --- Training Phase ---
        model.train()
        running_loss = 0.0
        running_acc = 0.0
        total_samples = 0
        
        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} Train")
        for x, y in pbar:
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            batch_size = x.size(0)
            
            # Apply Mixup
            if MIXUP_ALPHA > 0:
                mixed_x, y_a, y_b, lam = mixup_data(x, y, alpha=MIXUP_ALPHA, device=DEVICE)
            else:
                mixed_x, y_a, y_b, lam = x, y, y, 1.0

            optimizer.zero_grad()
            # Use Automatic Mixed Precision (AMP)
            with torch.cuda.amp.autocast(enabled=USE_AMP):
                logits = model(mixed_x)
                # Calculate loss with mixed labels
                loss = lam * criterion(logits, y_a) + (1 - lam) * criterion(logits, y_b)
                
            # Backpropagation
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer) # Unscale gradients before clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()

            # Calculate accuracy on the original (non-mixed) labels
            acc1 = accuracy_topk(logits.detach().cpu(), y.detach().cpu(), topk=(1,))[0]
            running_loss += loss.item() * batch_size
            running_acc += acc1 * batch_size / 100.0
            total_samples += batch_size
            pbar.set_postfix(loss=running_loss / total_samples, acc=(running_acc / total_samples * 100.0))

        avg_train_loss = running_loss / len(train_dataset)
        avg_train_acc = running_acc / len(train_dataset) * 100.0

        # --- Validation Phase ---
        model.eval()
        val_loss = 0.0
        val_acc = 0.0
        val_top5 = 0.0
        with torch.no_grad():
            for x, y in tqdm(val_loader, desc="Validation"):
                x = x.to(DEVICE); y = y.to(DEVICE)
                with torch.cuda.amp.autocast(enabled=USE_AMP):
                    logits = model(x)
                    loss = criterion(logits, y)
                    
                val_loss += loss.item() * x.size(0)
                acc1, acc5 = accuracy_topk(logits.detach().cpu(), y.detach().cpu(), topk=(1,5))
                val_acc += acc1 * x.size(0) / 100.0
                val_top5 += acc5 * x.size(0) / 100.0
                
        avg_val_loss = val_loss / len(val_dataset) if len(val_dataset) > 0 else 0.0
        avg_val_acc = val_acc / len(val_dataset) * 100.0 if len(val_dataset) > 0 else 0.0
        avg_val_top5 = val_top5 / len(val_dataset) * 100.0 if len(val_dataset) > 0 else 0.0

        # Step the learning rate scheduler
        scheduler.step()

        print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss {avg_train_loss:.4f} Acc {avg_train_acc:.2f}% | Val Loss {avg_val_loss:.4f} Acc {avg_val_acc:.2f}% Top5 {avg_val_top5:.2f}%")

        # Log history
        train_history['epoch'].append(epoch+1)
        train_history['train_loss'].append(avg_train_loss)
        train_history['train_acc'].append(avg_train_acc)
        train_history['val_loss'].append(avg_val_loss)
        train_history['val_acc'].append(avg_val_acc)
        train_history['val_top5'].append(avg_val_top5)

        # Save the best model based on validation accuracy
        if avg_val_acc > best_val:
            best_val = avg_val_acc
            # 💡 Save to the new 'outputs' directory
            save_path = os.path.join(OUTPUT_DIR, f"best_stgcn_{NUM_CLASSES}.pth")
            torch.save(model.state_dict(), save_path)
            print(f"✅ New best model saved: {best_val:.2f}%")

    print(f"Training complete. Best val acc: {best_val:.2f}%")

    # --- 7. Save History & Plots ---
    # 💡 Save to the new 'outputs' directory
    df = pd.DataFrame(train_history)
    df.to_csv(os.path.join(OUTPUT_DIR, 'training_log.csv'), index=False)
    print(f"Training log saved to {os.path.join(OUTPUT_DIR, 'training_log.csv')}")

    # Plot curves
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(df['epoch'], df['train_loss'], label='Train Loss')
    plt.plot(df['epoch'], df['val_loss'], label='Val Loss')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.title('Loss Curve')
    plt.subplot(1,2,2)
    plt.plot(df['epoch'], df['train_acc'], label='Train Acc')
    plt.plot(df['epoch'], df['val_acc'], label='Val Acc')
    plt.xlabel('Epoch'); plt.ylabel('Accuracy (%)'); plt.legend(); plt.title('Accuracy Curve')
    # 💡 Save to the new 'outputs' directory
    plt.savefig(os.path.join(OUTPUT_DIR, 'training_curves.png'))
    plt.close()
    print(f"Training curves saved to {os.path.join(OUTPUT_DIR, 'training_curves.png')}")

    # --- 8. Final Test ---
    print("Loading best model for final testing...")
    # 💡 Load from the new 'outputs' directory
    save_path = os.path.join(OUTPUT_DIR, f"best_stgcn_{NUM_CLASSES}.pth")
    model.load_state_dict(torch.load(save_path, map_location=DEVICE))
    model.eval()
    test_acc = 0.0
    test_top5 = 0.0
    with torch.no_grad():
        for x, y in tqdm(test_loader, desc="Testing"):
            x = x.to(DEVICE); y = y.to(DEVICE)
            with torch.cuda.amp.autocast(enabled=USE_AMP):
                logits = model(x)
            acc1, acc5 = accuracy_topk(logits.detach().cpu(), y.detach().cpu(), topk=(1,5))
            test_acc += acc1 * x.size(0) / 100.0
            test_top5 += acc5 * x.size(0) / 100.0
            
    avg_test_acc = test_acc / len(test_dataset) * 100.0 if len(test_dataset) > 0 else 0.0
    avg_test_top5 = test_top5 / len(test_dataset) * 100.0 if len(test_dataset) > 0 else 0.0
    
    print(f"--- 🏁 Final Test Accuracy ---")
    print(f"Top-1: {avg_test_acc:.2f}%")
    print(f"Top-5: {avg_test_top5:.2f}%")

In [None]:
if __name__ == '__main__':
    main()

✅ Created 'train' split with 1442 samples.
✅ Created 'val' split with 338 samples.
✅ Created 'test' split with 258 samples.
Model has 1209376 trainable parameters.


Epoch 1/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=0.832, loss=4.62]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 1/200 | Train Loss 4.6159 Acc 0.83% | Val Loss 4.4655 Acc 2.66% Top5 12.13%
✅ New best model: 2.66%


Epoch 2/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=2.43, loss=4.38]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 2/200 | Train Loss 4.3758 Acc 2.43% | Val Loss 4.2024 Acc 5.33% Top5 18.34%
✅ New best model: 5.33%


Epoch 3/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=4.51, loss=4.25]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Epoch 3/200 | Train Loss 4.2510 Acc 4.51% | Val Loss 4.1183 Acc 7.40% Top5 22.49%
✅ New best model: 7.40%


Epoch 4/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.07it/s, acc=4.23, loss=4.07]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 4/200 | Train Loss 4.0749 Acc 4.23% | Val Loss 3.9360 Acc 9.47% Top5 30.77%
✅ New best model: 9.47%


Epoch 5/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=6.66, loss=3.99]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 5/200 | Train Loss 3.9904 Acc 6.66% | Val Loss 3.8397 Acc 9.76% Top5 35.80%
✅ New best model: 9.76%


Epoch 6/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=7.21, loss=3.96]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.32it/s]


Epoch 6/200 | Train Loss 3.9587 Acc 7.21% | Val Loss 3.8081 Acc 13.91% Top5 36.69%
✅ New best model: 13.91%


Epoch 7/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=5.89, loss=3.81]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 7/200 | Train Loss 3.8121 Acc 5.89% | Val Loss 3.7181 Acc 16.27% Top5 42.60%
✅ New best model: 16.27%


Epoch 8/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=7.35, loss=3.81]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 8/200 | Train Loss 3.8071 Acc 7.35% | Val Loss 3.7156 Acc 15.38% Top5 42.60%


Epoch 9/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=12.4, loss=3.62]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 9/200 | Train Loss 3.6167 Acc 12.41% | Val Loss 3.5291 Acc 20.71% Top5 48.82%
✅ New best model: 20.71%


Epoch 10/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=13.7, loss=3.61]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 10/200 | Train Loss 3.6091 Acc 13.66% | Val Loss 3.4699 Acc 23.37% Top5 54.14%
✅ New best model: 23.37%


Epoch 11/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=14.5, loss=3.47]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 11/200 | Train Loss 3.4720 Acc 14.49% | Val Loss 3.4580 Acc 25.74% Top5 53.55%
✅ New best model: 25.74%


Epoch 12/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=16.8, loss=3.39]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 12/200 | Train Loss 3.3922 Acc 16.78% | Val Loss 3.3368 Acc 29.88% Top5 55.62%
✅ New best model: 29.88%


Epoch 13/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=17.6, loss=3.4] 
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.15it/s]


Epoch 13/200 | Train Loss 3.3969 Acc 17.61% | Val Loss 3.3319 Acc 23.96% Top5 57.69%


Epoch 14/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.96it/s, acc=17.1, loss=3.35]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 14/200 | Train Loss 3.3470 Acc 17.06% | Val Loss 3.2094 Acc 30.47% Top5 59.47%
✅ New best model: 30.47%


Epoch 15/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=22.4, loss=3.18]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 15/200 | Train Loss 3.1810 Acc 22.40% | Val Loss 3.1832 Acc 29.29% Top5 65.38%


Epoch 16/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=21.9, loss=3.06]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 16/200 | Train Loss 3.0603 Acc 21.91% | Val Loss 2.9824 Acc 37.87% Top5 68.05%
✅ New best model: 37.87%


Epoch 17/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=21.5, loss=3.02]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.16it/s]


Epoch 17/200 | Train Loss 3.0223 Acc 21.50% | Val Loss 2.9670 Acc 35.21% Top5 68.64%


Epoch 18/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=25.1, loss=2.96]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 18/200 | Train Loss 2.9609 Acc 25.10% | Val Loss 2.8612 Acc 40.24% Top5 71.89%
✅ New best model: 40.24%


Epoch 19/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=23.1, loss=2.84]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.31it/s]


Epoch 19/200 | Train Loss 2.8391 Acc 23.09% | Val Loss 2.7791 Acc 41.12% Top5 74.56%
✅ New best model: 41.12%


Epoch 20/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=25.2, loss=2.74]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 20/200 | Train Loss 2.7423 Acc 25.24% | Val Loss 2.6919 Acc 44.67% Top5 75.15%
✅ New best model: 44.67%


Epoch 21/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.08it/s, acc=30.5, loss=2.72]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 21/200 | Train Loss 2.7182 Acc 30.51% | Val Loss 2.6092 Acc 45.86% Top5 77.22%
✅ New best model: 45.86%


Epoch 22/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=34.5, loss=2.52]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 22/200 | Train Loss 2.5243 Acc 34.54% | Val Loss 2.4898 Acc 50.89% Top5 78.70%
✅ New best model: 50.89%


Epoch 23/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=37.7, loss=2.6] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 23/200 | Train Loss 2.5984 Acc 37.73% | Val Loss 2.4680 Acc 51.78% Top5 80.77%
✅ New best model: 51.78%


Epoch 24/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=28.8, loss=2.42]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 24/200 | Train Loss 2.4241 Acc 28.78% | Val Loss 2.4034 Acc 53.55% Top5 79.88%
✅ New best model: 53.55%


Epoch 25/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.08it/s, acc=35.8, loss=2.36]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 25/200 | Train Loss 2.3586 Acc 35.78% | Val Loss 2.4108 Acc 51.48% Top5 81.95%


Epoch 26/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=46.5, loss=2.24]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 26/200 | Train Loss 2.2388 Acc 46.46% | Val Loss 2.3309 Acc 51.18% Top5 83.73%


Epoch 27/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=45, loss=2.2]   
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 27/200 | Train Loss 2.2029 Acc 45.01% | Val Loss 2.2633 Acc 58.58% Top5 83.73%
✅ New best model: 58.58%


Epoch 28/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=39, loss=2.29]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 28/200 | Train Loss 2.2946 Acc 39.04% | Val Loss 2.3283 Acc 52.66% Top5 83.43%


Epoch 29/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=43.3, loss=2.18]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 29/200 | Train Loss 2.1756 Acc 43.27% | Val Loss 2.2779 Acc 57.10% Top5 84.62%


Epoch 30/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=39.9, loss=2.32]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.18it/s]


Epoch 30/200 | Train Loss 2.3207 Acc 39.94% | Val Loss 2.2914 Acc 54.73% Top5 84.91%


Epoch 31/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=43.8, loss=2.19]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 31/200 | Train Loss 2.1889 Acc 43.83% | Val Loss 2.2432 Acc 57.69% Top5 84.62%


Epoch 32/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=42.1, loss=2.12]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 32/200 | Train Loss 2.1229 Acc 42.09% | Val Loss 2.2714 Acc 56.21% Top5 84.91%


Epoch 33/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=28.8, loss=2.16]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 33/200 | Train Loss 2.1570 Acc 28.78% | Val Loss 2.1294 Acc 62.13% Top5 87.87%
✅ New best model: 62.13%


Epoch 34/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=57.8, loss=2.02]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 34/200 | Train Loss 2.0234 Acc 57.84% | Val Loss 2.1799 Acc 60.65% Top5 86.09%


Epoch 35/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=35.2, loss=2.12]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 35/200 | Train Loss 2.1240 Acc 35.16% | Val Loss 2.1918 Acc 57.99% Top5 86.09%


Epoch 36/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=57.6, loss=1.97]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 36/200 | Train Loss 1.9654 Acc 57.56% | Val Loss 2.1675 Acc 59.76% Top5 86.39%


Epoch 37/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=44.5, loss=1.95]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 37/200 | Train Loss 1.9484 Acc 44.52% | Val Loss 2.0539 Acc 64.20% Top5 87.87%
✅ New best model: 64.20%


Epoch 38/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=54.2, loss=2.07]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 38/200 | Train Loss 2.0678 Acc 54.23% | Val Loss 2.1009 Acc 60.65% Top5 88.17%


Epoch 39/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=42.2, loss=1.99]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 39/200 | Train Loss 1.9936 Acc 42.16% | Val Loss 2.0777 Acc 64.20% Top5 89.05%
✅ New best model: 64.20%


Epoch 40/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=50, loss=2.06]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Epoch 40/200 | Train Loss 2.0591 Acc 50.00% | Val Loss 2.0282 Acc 65.98% Top5 88.17%
✅ New best model: 65.98%


Epoch 41/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=35.6, loss=2.03]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 41/200 | Train Loss 2.0258 Acc 35.64% | Val Loss 2.0771 Acc 62.72% Top5 87.28%


Epoch 42/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=39.5, loss=2.21]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.15it/s]


Epoch 42/200 | Train Loss 2.2073 Acc 39.53% | Val Loss 1.9980 Acc 65.68% Top5 88.46%


Epoch 43/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=49.9, loss=1.87]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 43/200 | Train Loss 1.8689 Acc 49.86% | Val Loss 2.0014 Acc 64.79% Top5 89.64%


Epoch 44/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=50.7, loss=1.87]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 44/200 | Train Loss 1.8729 Acc 50.69% | Val Loss 2.0069 Acc 66.57% Top5 87.57%
✅ New best model: 66.57%


Epoch 45/200 Train: 100%|██████████| 46/46 [00:24<00:00,  1.91it/s, acc=44.1, loss=1.76]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.17it/s]


Epoch 45/200 | Train Loss 1.7570 Acc 44.11% | Val Loss 2.0132 Acc 66.57% Top5 89.94%


Epoch 46/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=43.8, loss=1.85]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 46/200 | Train Loss 1.8509 Acc 43.83% | Val Loss 2.0101 Acc 65.98% Top5 88.46%


Epoch 47/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=53.7, loss=1.91]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 47/200 | Train Loss 1.9140 Acc 53.74% | Val Loss 1.9189 Acc 68.93% Top5 90.53%
✅ New best model: 68.93%


Epoch 48/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=48.7, loss=1.78]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 48/200 | Train Loss 1.7811 Acc 48.68% | Val Loss 2.0562 Acc 62.43% Top5 88.46%


Epoch 49/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=44.7, loss=1.86]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 49/200 | Train Loss 1.8591 Acc 44.66% | Val Loss 1.9788 Acc 68.05% Top5 88.76%


Epoch 50/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=44.3, loss=1.86]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 50/200 | Train Loss 1.8632 Acc 44.31% | Val Loss 2.0537 Acc 64.79% Top5 86.39%


Epoch 51/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=50.8, loss=1.72]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 51/200 | Train Loss 1.7242 Acc 50.76% | Val Loss 2.0052 Acc 65.98% Top5 87.28%


Epoch 52/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=39.1, loss=2]   
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.16it/s]


Epoch 52/200 | Train Loss 1.9980 Acc 39.11% | Val Loss 2.0639 Acc 63.02% Top5 86.39%


Epoch 53/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=55.8, loss=1.72]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 53/200 | Train Loss 1.7236 Acc 55.83% | Val Loss 1.9438 Acc 68.34% Top5 88.76%


Epoch 54/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=49.9, loss=1.72]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 54/200 | Train Loss 1.7184 Acc 49.86% | Val Loss 2.0506 Acc 64.79% Top5 86.98%


Epoch 55/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=44.9, loss=1.83]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.17it/s]


Epoch 55/200 | Train Loss 1.8308 Acc 44.94% | Val Loss 1.9381 Acc 68.05% Top5 87.87%


Epoch 56/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=52.1, loss=1.68]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.16it/s]


Epoch 56/200 | Train Loss 1.6842 Acc 52.15% | Val Loss 1.9687 Acc 66.27% Top5 87.87%


Epoch 57/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=34, loss=1.75]  
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.20it/s]


Epoch 57/200 | Train Loss 1.7480 Acc 34.05% | Val Loss 1.8669 Acc 70.71% Top5 91.42%
✅ New best model: 70.71%


Epoch 58/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.95it/s, acc=51.6, loss=1.78]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 58/200 | Train Loss 1.7845 Acc 51.60% | Val Loss 2.0033 Acc 68.34% Top5 87.28%


Epoch 59/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=37.7, loss=1.89]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 59/200 | Train Loss 1.8854 Acc 37.73% | Val Loss 1.9310 Acc 66.86% Top5 89.35%


Epoch 60/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=64.4, loss=1.48]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 60/200 | Train Loss 1.4819 Acc 64.36% | Val Loss 1.9564 Acc 67.75% Top5 89.64%


Epoch 61/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=47.6, loss=1.77]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 61/200 | Train Loss 1.7706 Acc 47.64% | Val Loss 1.9293 Acc 68.93% Top5 90.53%


Epoch 62/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.94it/s, acc=53.7, loss=1.8] 
Validation: 100%|██████████| 11/11 [00:05<00:00,  1.94it/s]


Epoch 62/200 | Train Loss 1.8026 Acc 53.68% | Val Loss 1.9681 Acc 69.53% Top5 89.05%


Epoch 63/200 Train: 100%|██████████| 46/46 [00:25<00:00,  1.82it/s, acc=58.7, loss=1.68]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Epoch 63/200 | Train Loss 1.6788 Acc 58.74% | Val Loss 1.8421 Acc 72.49% Top5 90.83%
✅ New best model: 72.49%


Epoch 64/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=59.6, loss=1.77]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.31it/s]


Epoch 64/200 | Train Loss 1.7679 Acc 59.57% | Val Loss 1.9343 Acc 69.23% Top5 88.76%


Epoch 65/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=52.6, loss=1.82]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 65/200 | Train Loss 1.8195 Acc 52.64% | Val Loss 1.9351 Acc 68.05% Top5 88.46%


Epoch 66/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.07it/s, acc=50.2, loss=1.72]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 66/200 | Train Loss 1.7165 Acc 50.21% | Val Loss 1.9090 Acc 71.01% Top5 88.76%


Epoch 67/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=49.4, loss=1.69]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 67/200 | Train Loss 1.6912 Acc 49.38% | Val Loss 1.9432 Acc 69.82% Top5 89.35%


Epoch 68/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=40.9, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 68/200 | Train Loss 1.6629 Acc 40.92% | Val Loss 1.9215 Acc 69.53% Top5 88.46%


Epoch 69/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=55.3, loss=1.91]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 69/200 | Train Loss 1.9090 Acc 55.27% | Val Loss 1.9255 Acc 69.82% Top5 88.76%


Epoch 70/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=41.7, loss=1.85]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 70/200 | Train Loss 1.8513 Acc 41.75% | Val Loss 1.9122 Acc 68.93% Top5 88.46%


Epoch 71/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=52.7, loss=1.89]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 71/200 | Train Loss 1.8891 Acc 52.70% | Val Loss 1.9069 Acc 70.12% Top5 89.64%


Epoch 72/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=37.9, loss=1.51]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 72/200 | Train Loss 1.5080 Acc 37.93% | Val Loss 1.8600 Acc 72.78% Top5 89.94%
✅ New best model: 72.78%


Epoch 73/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=39.9, loss=1.68]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 73/200 | Train Loss 1.6779 Acc 39.94% | Val Loss 1.8993 Acc 71.01% Top5 88.76%


Epoch 74/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=57.8, loss=1.68]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 74/200 | Train Loss 1.6775 Acc 57.84% | Val Loss 1.8906 Acc 69.23% Top5 88.76%


Epoch 75/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=56.4, loss=1.83]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 75/200 | Train Loss 1.8290 Acc 56.45% | Val Loss 1.8753 Acc 71.60% Top5 89.35%


Epoch 76/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=60.8, loss=1.6] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 76/200 | Train Loss 1.6031 Acc 60.82% | Val Loss 1.8397 Acc 73.67% Top5 88.17%
✅ New best model: 73.67%


Epoch 77/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=39, loss=1.63]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 77/200 | Train Loss 1.6295 Acc 39.04% | Val Loss 1.9246 Acc 70.71% Top5 87.57%


Epoch 78/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=54.2, loss=1.79]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 78/200 | Train Loss 1.7936 Acc 54.16% | Val Loss 1.9273 Acc 68.05% Top5 89.64%


Epoch 79/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=46.9, loss=1.61]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 79/200 | Train Loss 1.6054 Acc 46.88% | Val Loss 1.8691 Acc 71.89% Top5 90.53%


Epoch 80/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.95it/s, acc=50.4, loss=1.72]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Epoch 80/200 | Train Loss 1.7178 Acc 50.42% | Val Loss 1.8509 Acc 72.19% Top5 90.83%


Epoch 81/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.07it/s, acc=47.9, loss=1.78]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 81/200 | Train Loss 1.7760 Acc 47.85% | Val Loss 1.8716 Acc 70.71% Top5 88.46%


Epoch 82/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=54, loss=1.73]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 82/200 | Train Loss 1.7270 Acc 54.02% | Val Loss 1.8743 Acc 71.01% Top5 88.76%


Epoch 83/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=47.7, loss=1.63]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 83/200 | Train Loss 1.6283 Acc 47.71% | Val Loss 1.8334 Acc 72.78% Top5 90.24%


Epoch 84/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=41.3, loss=1.86]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 84/200 | Train Loss 1.8647 Acc 41.33% | Val Loss 1.8886 Acc 72.19% Top5 89.05%


Epoch 85/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=43.6, loss=1.81]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 85/200 | Train Loss 1.8088 Acc 43.62% | Val Loss 1.9172 Acc 71.89% Top5 90.53%


Epoch 86/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=51.6, loss=1.76]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 86/200 | Train Loss 1.7622 Acc 51.60% | Val Loss 1.8685 Acc 72.19% Top5 90.24%


Epoch 87/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=46.8, loss=1.76]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 87/200 | Train Loss 1.7633 Acc 46.81% | Val Loss 1.8527 Acc 73.37% Top5 90.24%


Epoch 88/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=54.9, loss=1.64]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 88/200 | Train Loss 1.6425 Acc 54.85% | Val Loss 1.8732 Acc 72.19% Top5 89.64%


Epoch 89/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=42.1, loss=1.82]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.20it/s]


Epoch 89/200 | Train Loss 1.8234 Acc 42.09% | Val Loss 1.8438 Acc 73.37% Top5 89.64%


Epoch 90/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.96it/s, acc=54.2, loss=1.71]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.18it/s]


Epoch 90/200 | Train Loss 1.7136 Acc 54.16% | Val Loss 1.8465 Acc 76.33% Top5 89.35%
✅ New best model: 76.33%


Epoch 91/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=38.6, loss=1.78]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 91/200 | Train Loss 1.7777 Acc 38.63% | Val Loss 1.8613 Acc 71.89% Top5 89.64%


Epoch 92/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=47.9, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 92/200 | Train Loss 1.6613 Acc 47.92% | Val Loss 1.8423 Acc 71.60% Top5 89.35%


Epoch 93/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=49, loss=1.65]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 93/200 | Train Loss 1.6503 Acc 48.96% | Val Loss 1.8992 Acc 71.60% Top5 89.64%


Epoch 94/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=61.9, loss=1.6] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 94/200 | Train Loss 1.6039 Acc 61.86% | Val Loss 1.8473 Acc 74.26% Top5 90.83%


Epoch 95/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=55.5, loss=1.91]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 95/200 | Train Loss 1.9073 Acc 55.55% | Val Loss 1.8633 Acc 70.41% Top5 88.76%


Epoch 96/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=62.3, loss=1.65]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 96/200 | Train Loss 1.6468 Acc 62.34% | Val Loss 1.8265 Acc 73.08% Top5 89.64%


Epoch 97/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=45.8, loss=1.72]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 97/200 | Train Loss 1.7188 Acc 45.84% | Val Loss 1.8067 Acc 72.78% Top5 90.24%


Epoch 98/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=55.4, loss=1.88]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.18it/s]


Epoch 98/200 | Train Loss 1.8779 Acc 55.41% | Val Loss 1.8124 Acc 71.60% Top5 90.83%


Epoch 99/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=43.6, loss=1.87]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 99/200 | Train Loss 1.8652 Acc 43.55% | Val Loss 1.8240 Acc 71.60% Top5 90.53%


Epoch 100/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.96it/s, acc=58.3, loss=1.61]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 100/200 | Train Loss 1.6071 Acc 58.32% | Val Loss 1.8257 Acc 72.19% Top5 90.24%


Epoch 101/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=59.8, loss=1.64]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 101/200 | Train Loss 1.6395 Acc 59.78% | Val Loss 1.8344 Acc 72.19% Top5 89.35%


Epoch 102/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=56.9, loss=1.59]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 102/200 | Train Loss 1.5852 Acc 56.93% | Val Loss 1.8279 Acc 71.89% Top5 90.53%


Epoch 103/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=49.7, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 103/200 | Train Loss 1.6576 Acc 49.72% | Val Loss 1.8051 Acc 76.04% Top5 90.83%


Epoch 104/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=46.3, loss=1.59]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 104/200 | Train Loss 1.5889 Acc 46.32% | Val Loss 1.7982 Acc 76.04% Top5 89.94%


Epoch 105/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=45.6, loss=1.64]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 105/200 | Train Loss 1.6443 Acc 45.63% | Val Loss 1.8122 Acc 73.96% Top5 90.53%


Epoch 106/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=62, loss=1.51]  
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.18it/s]


Epoch 106/200 | Train Loss 1.5139 Acc 62.00% | Val Loss 1.8389 Acc 71.89% Top5 89.94%


Epoch 107/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=48.3, loss=1.63]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 107/200 | Train Loss 1.6300 Acc 48.27% | Val Loss 1.7981 Acc 75.44% Top5 90.83%


Epoch 108/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=52.1, loss=1.65]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 108/200 | Train Loss 1.6533 Acc 52.08% | Val Loss 1.8203 Acc 75.15% Top5 90.24%


Epoch 109/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=40.5, loss=1.72]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.16it/s]


Epoch 109/200 | Train Loss 1.7172 Acc 40.50% | Val Loss 1.8449 Acc 73.08% Top5 89.64%


Epoch 110/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=50.9, loss=1.65]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 110/200 | Train Loss 1.6534 Acc 50.90% | Val Loss 1.7968 Acc 76.04% Top5 89.64%


Epoch 111/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=53.1, loss=1.51]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 111/200 | Train Loss 1.5138 Acc 53.12% | Val Loss 1.7823 Acc 75.15% Top5 90.53%


Epoch 112/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=60.3, loss=1.62]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.20it/s]


Epoch 112/200 | Train Loss 1.6152 Acc 60.26% | Val Loss 1.8103 Acc 74.56% Top5 89.94%


Epoch 113/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=43.1, loss=1.67]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.33it/s]


Epoch 113/200 | Train Loss 1.6682 Acc 43.13% | Val Loss 1.8199 Acc 73.67% Top5 90.24%


Epoch 114/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=48.5, loss=1.75]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 114/200 | Train Loss 1.7513 Acc 48.54% | Val Loss 1.7889 Acc 73.37% Top5 89.94%


Epoch 115/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=48.5, loss=1.52]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Epoch 115/200 | Train Loss 1.5235 Acc 48.47% | Val Loss 1.8133 Acc 74.56% Top5 90.24%


Epoch 116/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=64.4, loss=1.6] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 116/200 | Train Loss 1.6048 Acc 64.42% | Val Loss 1.8072 Acc 75.74% Top5 89.94%


Epoch 117/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=52, loss=1.65]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 117/200 | Train Loss 1.6510 Acc 52.01% | Val Loss 1.7867 Acc 75.15% Top5 89.35%


Epoch 118/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=50.8, loss=1.82]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 118/200 | Train Loss 1.8225 Acc 50.76% | Val Loss 1.7849 Acc 74.56% Top5 90.83%


Epoch 119/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=42.8, loss=1.71]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 119/200 | Train Loss 1.7084 Acc 42.79% | Val Loss 1.7917 Acc 75.44% Top5 90.83%


Epoch 120/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=56.2, loss=1.85]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 120/200 | Train Loss 1.8507 Acc 56.17% | Val Loss 1.8004 Acc 75.44% Top5 90.53%


Epoch 121/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=47.8, loss=1.53]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.31it/s]


Epoch 121/200 | Train Loss 1.5316 Acc 47.78% | Val Loss 1.7946 Acc 74.56% Top5 90.83%


Epoch 122/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=54.8, loss=1.56]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 122/200 | Train Loss 1.5572 Acc 54.79% | Val Loss 1.7763 Acc 72.49% Top5 91.72%


Epoch 123/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.96it/s, acc=40.2, loss=1.65]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 123/200 | Train Loss 1.6460 Acc 40.15% | Val Loss 1.7954 Acc 75.44% Top5 89.64%


Epoch 124/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=57.1, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Epoch 124/200 | Train Loss 1.6573 Acc 57.07% | Val Loss 1.7924 Acc 75.44% Top5 89.64%


Epoch 125/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=35.2, loss=1.6] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 125/200 | Train Loss 1.5962 Acc 35.23% | Val Loss 1.7902 Acc 73.67% Top5 90.53%


Epoch 126/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=43.5, loss=1.67]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 126/200 | Train Loss 1.6747 Acc 43.48% | Val Loss 1.8054 Acc 73.96% Top5 89.05%


Epoch 127/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=50.5, loss=1.82]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 127/200 | Train Loss 1.8248 Acc 50.49% | Val Loss 1.7937 Acc 73.08% Top5 90.24%


Epoch 128/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=58.3, loss=1.69]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Epoch 128/200 | Train Loss 1.6900 Acc 58.32% | Val Loss 1.7631 Acc 74.56% Top5 90.53%


Epoch 129/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=62.6, loss=1.5] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 129/200 | Train Loss 1.5027 Acc 62.62% | Val Loss 1.7734 Acc 75.74% Top5 89.94%


Epoch 130/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=47.7, loss=1.65]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 130/200 | Train Loss 1.6537 Acc 47.71% | Val Loss 1.7879 Acc 74.26% Top5 89.94%


Epoch 131/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=55.5, loss=1.61]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 131/200 | Train Loss 1.6143 Acc 55.55% | Val Loss 1.7542 Acc 76.33% Top5 90.83%


Epoch 132/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.96it/s, acc=47.5, loss=1.69]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 132/200 | Train Loss 1.6860 Acc 47.50% | Val Loss 1.7629 Acc 76.33% Top5 90.83%


Epoch 133/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=50.3, loss=1.71]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 133/200 | Train Loss 1.7058 Acc 50.28% | Val Loss 1.8121 Acc 74.85% Top5 89.94%


Epoch 134/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=55.1, loss=1.54]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 134/200 | Train Loss 1.5416 Acc 55.06% | Val Loss 1.7666 Acc 74.85% Top5 91.72%


Epoch 135/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=50.8, loss=1.68]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 135/200 | Train Loss 1.6841 Acc 50.76% | Val Loss 1.7811 Acc 75.44% Top5 90.24%


Epoch 136/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=45, loss=1.7]   
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 136/200 | Train Loss 1.6960 Acc 45.01% | Val Loss 1.7603 Acc 74.56% Top5 91.42%


Epoch 137/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=55.1, loss=1.67]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 137/200 | Train Loss 1.6691 Acc 55.13% | Val Loss 1.7656 Acc 74.85% Top5 90.24%


Epoch 138/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=47.3, loss=1.66]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Epoch 138/200 | Train Loss 1.6595 Acc 47.30% | Val Loss 1.7884 Acc 73.67% Top5 91.12%


Epoch 139/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=50.2, loss=1.74]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 139/200 | Train Loss 1.7410 Acc 50.21% | Val Loss 1.7573 Acc 76.33% Top5 90.83%


Epoch 140/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=39.9, loss=1.4] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 140/200 | Train Loss 1.4043 Acc 39.88% | Val Loss 1.7516 Acc 75.15% Top5 91.72%


Epoch 141/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=51, loss=1.79]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 141/200 | Train Loss 1.7903 Acc 50.97% | Val Loss 1.7690 Acc 75.44% Top5 91.12%


Epoch 142/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=47.5, loss=1.51]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 142/200 | Train Loss 1.5136 Acc 47.50% | Val Loss 1.7805 Acc 73.37% Top5 91.42%


Epoch 143/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=58.9, loss=1.55]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 143/200 | Train Loss 1.5484 Acc 58.95% | Val Loss 1.7591 Acc 74.85% Top5 90.83%


Epoch 144/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=55.2, loss=1.54]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 144/200 | Train Loss 1.5389 Acc 55.20% | Val Loss 1.7528 Acc 75.15% Top5 90.24%


Epoch 145/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=57.3, loss=1.71]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 145/200 | Train Loss 1.7079 Acc 57.28% | Val Loss 1.7598 Acc 74.85% Top5 91.12%


Epoch 146/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=50.7, loss=1.43]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 146/200 | Train Loss 1.4346 Acc 50.69% | Val Loss 1.7574 Acc 75.44% Top5 90.83%


Epoch 147/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.96it/s, acc=48.3, loss=1.59]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 147/200 | Train Loss 1.5873 Acc 48.34% | Val Loss 1.7768 Acc 75.74% Top5 91.12%


Epoch 148/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=57.1, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.32it/s]


Epoch 148/200 | Train Loss 1.6554 Acc 57.07% | Val Loss 1.7888 Acc 74.85% Top5 90.24%


Epoch 149/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=57.3, loss=1.55]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 149/200 | Train Loss 1.5455 Acc 57.28% | Val Loss 1.7681 Acc 76.33% Top5 90.53%


Epoch 150/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.94it/s, acc=59.6, loss=1.65]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.05it/s]


Epoch 150/200 | Train Loss 1.6483 Acc 59.64% | Val Loss 1.7842 Acc 75.74% Top5 90.24%


Epoch 151/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=41.6, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 151/200 | Train Loss 1.6622 Acc 41.61% | Val Loss 1.7634 Acc 75.15% Top5 90.53%


Epoch 152/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=42.4, loss=1.69]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 152/200 | Train Loss 1.6925 Acc 42.37% | Val Loss 1.7943 Acc 74.26% Top5 90.24%


Epoch 153/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=44.1, loss=1.61]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 153/200 | Train Loss 1.6129 Acc 44.11% | Val Loss 1.8071 Acc 73.37% Top5 89.94%


Epoch 154/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=43.3, loss=1.67]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 154/200 | Train Loss 1.6671 Acc 43.27% | Val Loss 1.7851 Acc 73.96% Top5 89.94%


Epoch 155/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=61.9, loss=1.75]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 155/200 | Train Loss 1.7514 Acc 61.86% | Val Loss 1.8139 Acc 71.89% Top5 89.35%


Epoch 156/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=53.1, loss=1.69]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 156/200 | Train Loss 1.6919 Acc 53.12% | Val Loss 1.7657 Acc 75.15% Top5 90.83%


Epoch 157/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=53.1, loss=1.73]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 157/200 | Train Loss 1.7349 Acc 53.05% | Val Loss 1.7682 Acc 75.15% Top5 89.94%


Epoch 158/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, acc=59.6, loss=1.56]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 158/200 | Train Loss 1.5602 Acc 59.64% | Val Loss 1.7694 Acc 76.33% Top5 91.42%


Epoch 159/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=48.7, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 159/200 | Train Loss 1.6600 Acc 48.68% | Val Loss 1.7546 Acc 75.15% Top5 91.12%


Epoch 160/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=39.9, loss=1.86]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 160/200 | Train Loss 1.8645 Acc 39.88% | Val Loss 1.7687 Acc 76.04% Top5 90.24%


Epoch 161/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=46.9, loss=1.65]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 161/200 | Train Loss 1.6495 Acc 46.88% | Val Loss 1.7599 Acc 75.44% Top5 91.12%


Epoch 162/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=50.1, loss=1.6] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 162/200 | Train Loss 1.5958 Acc 50.14% | Val Loss 1.7638 Acc 76.63% Top5 90.24%
✅ New best model: 76.63%


Epoch 163/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=49.4, loss=1.76]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 163/200 | Train Loss 1.7559 Acc 49.38% | Val Loss 1.7545 Acc 75.44% Top5 90.83%


Epoch 164/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=50.3, loss=1.66]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 164/200 | Train Loss 1.6569 Acc 50.28% | Val Loss 1.7642 Acc 74.26% Top5 90.83%


Epoch 165/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=46.3, loss=1.65]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.01it/s]


Epoch 165/200 | Train Loss 1.6515 Acc 46.26% | Val Loss 1.7462 Acc 75.15% Top5 90.53%


Epoch 166/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.95it/s, acc=44.7, loss=1.71]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Epoch 166/200 | Train Loss 1.7058 Acc 44.73% | Val Loss 1.7573 Acc 75.15% Top5 90.83%


Epoch 167/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=46.9, loss=1.53]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 167/200 | Train Loss 1.5286 Acc 46.95% | Val Loss 1.7617 Acc 74.56% Top5 90.83%


Epoch 168/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=49, loss=1.65]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Epoch 168/200 | Train Loss 1.6454 Acc 48.96% | Val Loss 1.7547 Acc 75.74% Top5 90.53%


Epoch 169/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=43.4, loss=1.57]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 169/200 | Train Loss 1.5722 Acc 43.41% | Val Loss 1.7565 Acc 75.15% Top5 90.53%


Epoch 170/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=46, loss=1.57]  
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Epoch 170/200 | Train Loss 1.5743 Acc 46.05% | Val Loss 1.7735 Acc 73.08% Top5 91.12%


Epoch 171/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=52.8, loss=1.7] 
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.17it/s]


Epoch 171/200 | Train Loss 1.6984 Acc 52.84% | Val Loss 1.7685 Acc 74.26% Top5 90.53%


Epoch 172/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=42, loss=1.71]  
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.18it/s]


Epoch 172/200 | Train Loss 1.7070 Acc 42.02% | Val Loss 1.7761 Acc 74.26% Top5 91.12%


Epoch 173/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=39.2, loss=1.61]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 173/200 | Train Loss 1.6078 Acc 39.18% | Val Loss 1.7704 Acc 75.44% Top5 90.24%


Epoch 174/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=42.6, loss=1.53]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 174/200 | Train Loss 1.5254 Acc 42.65% | Val Loss 1.7676 Acc 74.85% Top5 90.83%


Epoch 175/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=44.4, loss=1.55]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 175/200 | Train Loss 1.5499 Acc 44.38% | Val Loss 1.7821 Acc 74.26% Top5 91.12%


Epoch 176/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=52.5, loss=1.56]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 176/200 | Train Loss 1.5558 Acc 52.50% | Val Loss 1.7892 Acc 74.26% Top5 90.53%


Epoch 177/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.98it/s, acc=51.5, loss=1.63]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 177/200 | Train Loss 1.6263 Acc 51.46% | Val Loss 1.7484 Acc 76.63% Top5 89.94%


Epoch 178/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=51.9, loss=1.63]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 178/200 | Train Loss 1.6297 Acc 51.94% | Val Loss 1.7856 Acc 75.44% Top5 90.83%


Epoch 179/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=46.6, loss=1.69]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 179/200 | Train Loss 1.6899 Acc 46.60% | Val Loss 1.7794 Acc 75.44% Top5 90.24%


Epoch 180/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=63.2, loss=1.64]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 180/200 | Train Loss 1.6448 Acc 63.18% | Val Loss 1.7599 Acc 76.04% Top5 90.83%


Epoch 181/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=54.2, loss=1.68]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 181/200 | Train Loss 1.6829 Acc 54.16% | Val Loss 1.7664 Acc 76.04% Top5 91.42%


Epoch 182/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=56, loss=1.69]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 182/200 | Train Loss 1.6937 Acc 56.03% | Val Loss 1.7555 Acc 76.33% Top5 91.72%


Epoch 183/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, acc=56, loss=1.61]  
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Epoch 183/200 | Train Loss 1.6073 Acc 56.03% | Val Loss 1.7617 Acc 76.04% Top5 91.12%


Epoch 184/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.00it/s, acc=43.3, loss=1.53]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 184/200 | Train Loss 1.5325 Acc 43.27% | Val Loss 1.7512 Acc 75.74% Top5 91.12%


Epoch 185/200 Train: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, acc=60.1, loss=1.62]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 185/200 | Train Loss 1.6180 Acc 60.06% | Val Loss 1.7530 Acc 76.33% Top5 90.83%


Epoch 186/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=57.3, loss=1.69]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 186/200 | Train Loss 1.6943 Acc 57.28% | Val Loss 1.7635 Acc 76.33% Top5 91.42%


Epoch 187/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=62.6, loss=1.58]
Validation: 100%|██████████| 11/11 [00:05<00:00,  2.14it/s]


Epoch 187/200 | Train Loss 1.5773 Acc 62.55% | Val Loss 1.7729 Acc 74.26% Top5 91.72%


Epoch 188/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.97it/s, acc=53.3, loss=1.59]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Epoch 188/200 | Train Loss 1.5889 Acc 53.33% | Val Loss 1.7714 Acc 75.15% Top5 90.83%


Epoch 189/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=45.4, loss=1.48]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 189/200 | Train Loss 1.4792 Acc 45.35% | Val Loss 1.7596 Acc 76.04% Top5 91.72%


Epoch 190/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=44.5, loss=1.8] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Epoch 190/200 | Train Loss 1.8015 Acc 44.52% | Val Loss 1.7642 Acc 76.04% Top5 91.12%


Epoch 191/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=61.7, loss=1.7] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 191/200 | Train Loss 1.7008 Acc 61.72% | Val Loss 1.7729 Acc 74.85% Top5 90.83%


Epoch 192/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, acc=56.4, loss=1.62]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 192/200 | Train Loss 1.6206 Acc 56.38% | Val Loss 1.7741 Acc 75.15% Top5 90.24%


Epoch 193/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=61.4, loss=1.67]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Epoch 193/200 | Train Loss 1.6687 Acc 61.37% | Val Loss 1.7896 Acc 75.15% Top5 90.53%


Epoch 194/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=55, loss=1.6]   
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Epoch 194/200 | Train Loss 1.6034 Acc 54.99% | Val Loss 1.7766 Acc 74.56% Top5 91.12%


Epoch 195/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, acc=48.1, loss=1.56]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 195/200 | Train Loss 1.5584 Acc 48.13% | Val Loss 1.7633 Acc 76.33% Top5 90.53%


Epoch 196/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.02it/s, acc=51.5, loss=1.6] 
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Epoch 196/200 | Train Loss 1.5957 Acc 51.46% | Val Loss 1.7885 Acc 74.56% Top5 90.83%


Epoch 197/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.04it/s, acc=50.1, loss=1.61]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Epoch 197/200 | Train Loss 1.6099 Acc 50.14% | Val Loss 1.7698 Acc 75.15% Top5 90.83%


Epoch 198/200 Train: 100%|██████████| 46/46 [00:22<00:00,  2.07it/s, acc=58.7, loss=1.63]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 198/200 | Train Loss 1.6280 Acc 58.67% | Val Loss 1.7803 Acc 74.56% Top5 90.53%


Epoch 199/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=60.8, loss=1.41]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Epoch 199/200 | Train Loss 1.4118 Acc 60.82% | Val Loss 1.7626 Acc 74.26% Top5 91.72%


Epoch 200/200 Train: 100%|██████████| 46/46 [00:23<00:00,  1.99it/s, acc=41.2, loss=1.46]
Validation: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Epoch 200/200 | Train Loss 1.4591 Acc 41.19% | Val Loss 1.7536 Acc 75.44% Top5 91.42%
Training complete. Best val: 76.62721866404517
Training log saved to training_log.csv
Training curves saved to training_curves.png


Testing: 100%|██████████| 9/9 [00:03<00:00,  2.40it/s]

Final Test Accuracy: 72.09% Top5: 88.76%



