In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# TODO: Update this path to YOUR project folder location in Google Drive
# Example: '/content/drive/MyDrive/tile_classifier_project'
PROJECT_PATH = '/content/drive/MyDrive/2109_miniproj'

# Change to project directory
os.chdir(PROJECT_PATH)
print(f"‚úÖ Working directory: {os.getcwd()}")
print(f"\nüìÇ Contents:")
!ls -la

Mounted at /content/drive
‚úÖ Working directory: /content/drive/MyDrive/2109_miniproj

üìÇ Contents:
total 18354
-rw------- 1 root root 2411814 Nov 11 19:41 cnnwithkai.ipynb
drwx------ 2 root root    4096 Nov 12 05:31 data
-rw------- 1 root root   65963 Nov 12 05:10 debug_inference_tile_0_0.png
-rw------- 1 root root   17310 Nov 12 02:29 debug_training_overlay_after_transforms.png
-rw------- 1 root root   17248 Nov 12 02:29 debug_training_overlay_before_transforms.png
-rw------- 1 root root   18498 Nov 12 02:28 debug_training_sample_processed.png
-rw------- 1 root root   18479 Nov 12 02:28 debug_training_sample_resized.png
-rw------- 1 root root     595 Nov  1 08:40 environment.yml
-rw------- 1 root root 6872430 Nov  2 11:00 grid-universe.ipynb
drwx------ 2 root root    4096 Nov 12 05:31 .ipynb_checkpoints
drwx------ 2 root root    4096 Nov 12 05:31 __MACOSX
-rw------- 1 root root 2779078 Nov 12 05:08 mini-project.ipynb
-rw------- 1 root root 4252088 Nov 12 05:10 mp2.ipynb
-rw------- 

In [None]:
import torch
import subprocess

# Check GPU
print("üñ•Ô∏è  GPU Information:")
print("=" * 70)
if torch.cuda.is_available():
    print(f"‚úÖ GPU Available: {torch.cuda.get_device_name(0)}")
    print(f"‚úÖ CUDA Version: {torch.version.cuda}")
    print(f"‚úÖ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("‚ùå No GPU found! Make sure you selected GPU runtime.")
    print("   Go to: Runtime ‚Üí Change runtime type ‚Üí Hardware accelerator ‚Üí GPU")

print("\n" + "=" * 70)

# Check dataset
print("\nüìÇ Checking Dataset:")
print("=" * 70)
data_path = 'data/assets/imagen2'
if os.path.exists(data_path):
    print(f"‚úÖ Dataset found at: {data_path}")
    classes = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
    print(f"‚úÖ Found {len(classes)} classes: {classes}")

    # Count images per class
    print("\nüìä Images per class:")
    for cls in sorted(classes):
        cls_path = os.path.join(data_path, cls)
        num_images = len([f for f in os.listdir(cls_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
        print(f"   {cls:20s}: {num_images:4d} images")
else:
    print(f"‚ùå Dataset NOT found at: {data_path}")
    print("   Please update PROJECT_PATH in Step 1!")

üñ•Ô∏è  GPU Information:
‚úÖ GPU Available: Tesla T4
‚úÖ CUDA Version: 12.6
‚úÖ GPU Memory: 15.83 GB


üìÇ Checking Dataset:
‚úÖ Dataset found at: data/assets/imagen2
‚úÖ Found 21 classes: ['wall', 'robot', 'wolf', 'spike', 'sleeping', 'shield', 'portal', 'opened', 'metalbox', 'locked', 'key', 'human', 'ghost', 'lava', 'gem', 'floor', 'coin', 'dragon', 'box', 'exit', 'boots']

üìä Images per class:
   boots               :   22 images
   box                 :   19 images
   coin                :   25 images
   dragon              :   25 images
   exit                :   22 images
   floor               :   25 images
   gem                 :   25 images
   ghost               :   25 images
   human               :   32 images
   key                 :   22 images
   lava                :   25 images
   locked              :   22 images
   metalbox            :   22 images
   opened              :   25 images
   portal              :   25 images
   robot               :   25 images
   

In [None]:
# Most packages are pre-installed in Colab, but just in case:
!pip install -q torch torchvision pillow numpy
print("‚úÖ Dependencies ready!")

‚úÖ Dependencies ready!


In [None]:
import numpy as np
from pathlib import Path
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import time

# =========================
# GPU OPTIMIZATION
# =========================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üöÄ Using device: {device}")

# Enable cuDNN autotuner for optimal performance
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    print("‚úÖ cuDNN autotuner enabled")

# =========================
# Config
# =========================
DATA_ROOT   = "data/assets/imagen2"
SAVE_PATH   = "tile_rgba_cnn.pth"
IMG_SIZE    = 128
BATCH_SIZE  = 8         # Smaller batch = more frequent updates
EPOCHS      = 150         # More epochs to reach plateau
LR          = 4.5e-4      # Higher LR for better convergence
VAL_SPLIT   = 0.15
TEST_SPLIT  = 0.15
SEED        = 42
NUM_WORKERS = 2           # For Colab
LABEL_SMOOTHING = 0.05    # Less smoothing = sharper predictions

# RGBA normalization
MEAN = [0.485, 0.456, 0.406, 0.5]
STD  = [0.229, 0.224, 0.225, 0.5]

torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

print("‚úÖ Configuration loaded")

üöÄ Using device: cuda
‚úÖ cuDNN autotuner enabled
‚úÖ Configuration loaded


In [None]:
# =========================
# Data Transforms
# =========================

class EnsureRGBA:
    """Convert images to RGBA format"""
    def __call__(self, img):
        return img.convert("RGBA")

print("‚úÖ Transform classes defined")

‚úÖ Transform classes defined


In [None]:
# Compact model - ~350K parameters (770KB smaller)
class CompactTileCNN(nn.Module):
    def __init__(self, num_classes=21, in_channels=4, img_size=128):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels, 28, kernel_size=3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(28)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(28, 48, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(48)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(48, 80, kernel_size=3, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(80)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.conv4a = nn.Conv2d(80, 96, kernel_size=3, padding=1, bias=False)
        self.bn4a = nn.BatchNorm2d(96)
        self.conv4b = nn.Conv2d(96, 96, kernel_size=3, padding=1, bias=False)
        self.bn4b = nn.BatchNorm2d(96)
        self.pool4 = nn.MaxPool2d(2, 2)

        self.conv5 = nn.Conv2d(96, 108, kernel_size=3, padding=1, bias=False)
        self.bn5 = nn.BatchNorm2d(108)
        self.pool5 = nn.MaxPool2d(2, 2)

        self.gap = nn.AdaptiveAvgPool2d(1)

        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(108, 72),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(72, num_classes)
        )

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = torch.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)
        x = torch.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)
        x = torch.relu(self.bn4a(self.conv4a(x)))
        x = torch.relu(self.bn4b(self.conv4b(x)))
        x = self.pool4(x)
        x = torch.relu(self.bn5(self.conv5(x)))
        x = self.pool5(x)
        x = self.gap(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

print("‚úÖ Compact model defined (~350K params)")

‚úÖ Compact model defined (~350K params)


In [None]:
# =========================
# Helper Functions
# =========================

def evaluate(model, loader, criterion=None, return_per_class=False, return_confusion=False):
    model.eval()
    total, correct, loss_sum = 0, 0, 0.0

    if return_per_class or return_confusion:
        num_classes = len(loader.dataset.classes)
        class_correct = torch.zeros(num_classes)
        class_total = torch.zeros(num_classes)

    if return_confusion:
        confusion = {}

    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            if criterion is not None:
                loss_sum += criterion(out, yb).item() * xb.size(0)
            preds = out.argmax(1)
            correct += (preds == yb).sum().item()
            total += xb.size(0)

            if return_per_class or return_confusion:
                for pred, label in zip(preds.cpu(), yb.cpu()):
                    class_total[label] += 1
                    if pred == label:
                        class_correct[label] += 1

                    if return_confusion:
                        true_idx = label.item()
                        pred_idx = pred.item()
                        if true_idx not in confusion:
                            confusion[true_idx] = {}
                        confusion[true_idx][pred_idx] = confusion[true_idx].get(pred_idx, 0) + 1

    acc = correct / max(1, total)
    loss = loss_sum / max(1, total) if criterion is not None else 0.0

    if return_confusion:
        return acc, loss, class_correct, class_total, confusion
    if return_per_class:
        return acc, loss, class_correct, class_total
    return acc, loss

def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)

print("‚úÖ Helper functions defined")

‚úÖ Helper functions defined


In [None]:
print("üìÇ Loading dataset...")
print("="*70)

# Training transforms (with augmentation)
train_transform = transforms.Compose([
    EnsureRGBA(),
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomHorizontalFlip(p=0.35),
    transforms.RandomRotation(degrees=12, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomApply([
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.04)
    ], p=0.35),
    transforms.RandomApply([
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.08)
    ], p=0.25),
    transforms.RandomAffine(degrees=0, translate=(0.06, 0.06), scale=(0.94, 1.06)),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

# Val/test transforms (no augmentation)
eval_transform = transforms.Compose([
    EnsureRGBA(),
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

# Load base dataset without transforms
full_ds = datasets.ImageFolder(DATA_ROOT)
num_classes = len(full_ds.classes)
print(f"‚úÖ Dataset loaded: {len(full_ds)} images across {num_classes} classes")
print(f"‚úÖ Classes: {full_ds.classes}")

# Split dataset by indices
N = len(full_ds)
n_test = int(N * TEST_SPLIT)
n_val  = int(N * VAL_SPLIT)
n_train = N - n_val - n_test

split_generator = torch.Generator().manual_seed(SEED)
train_indices, val_indices, test_indices = random_split(
    range(N), [n_train, n_val, n_test], generator=split_generator
)

# Extract indices
train_idx = list(train_indices)
val_idx = list(val_indices)
test_idx = list(test_indices)

print(f"\n‚úÖ Split complete:")
print(f"   Train: {len(train_idx)} samples")
print(f"   Val:   {len(val_idx)} samples")
print(f"   Test:  {len(test_idx)} samples")

# Create subsets with appropriate transforms
class TransformSubset(torch.utils.data.Dataset):
    def __init__(self, dataset, indices, transform):
        self.dataset = dataset
        self.indices = indices
        self.transform = transform
        self.classes = dataset.classes
        self.class_to_idx = dataset.class_to_idx

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        img, label = self.dataset[self.indices[idx]]
        if self.transform:
            img = self.transform(img)
        return img, label

train_ds = TransformSubset(full_ds, train_idx, train_transform)
val_ds = TransformSubset(full_ds, val_idx, eval_transform)
test_ds = TransformSubset(full_ds, test_idx, eval_transform)

# Create data loaders
print(f"\n{'='*70}")
print("Creating data loaders...")
print("="*70)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)

print(f"\n{'='*70}")
print("‚úÖ DATASET READY!")
print("="*70)

üìÇ Loading dataset...
‚úÖ Dataset loaded: 505 images across 21 classes
‚úÖ Classes: ['boots', 'box', 'coin', 'dragon', 'exit', 'floor', 'gem', 'ghost', 'human', 'key', 'lava', 'locked', 'metalbox', 'opened', 'portal', 'robot', 'shield', 'sleeping', 'spike', 'wall', 'wolf']

‚úÖ Split complete:
   Train: 355 samples
   Val:   75 samples
   Test:  75 samples

Creating data loaders...

‚úÖ DATASET READY!


In [None]:
print("\n" + "="*70)
print("üöÄ TRAINING COMPACT MODEL")
print("="*70 + "\n")

model = CompactTileCNN(num_classes=num_classes, in_channels=4, img_size=IMG_SIZE).to(device)
model.apply(init_weights)

total_params = sum(p.numel() for p in model.parameters())
model_size_mb = (total_params * 4) / (1024 * 1024)
print(f"üìä Model parameters: {total_params:,} ({model_size_mb:.2f} MB)")
print(f"üìâ Size reduction: ~770KB vs previous model")
if model_size_mb > 2.0:
    print(f"‚ö†Ô∏è  WARNING: Model exceeds 2 MB!")
else:
    print(f"‚úÖ Under 2 MB limit")

criterion = nn.CrossEntropyLoss(label_smoothing=LABEL_SMOOTHING)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=0.002)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=5e-7)

best_val_acc = 0.0
best_state = None
patience_counter = 0
patience_limit = 30

print("\n" + "="*70)
print("üìà TRAINING PROGRESS")
print("="*70 + "\n")

training_start = time.time()

for ep in range(1, EPOCHS + 1):
    epoch_start = time.time()
    model.train()
    tl_sum, correct_sum, n_seen = 0.0, 0, 0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()

        tl_sum += loss.item() * xb.size(0)
        correct_sum += (out.argmax(1) == yb).sum().item()
        n_seen += xb.size(0)

    train_loss = tl_sum / max(1, n_seen)
    train_acc  = correct_sum / max(1, n_seen)

    val_acc, val_loss = evaluate(model, val_loader, criterion)

    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']

    epoch_time = time.time() - epoch_start
    print(f"Epoch {ep:02d} [{epoch_time:5.1f}s] | Train {train_loss:.4f}/{train_acc:.3f} "
          f"| Val {val_loss:.4f}/{val_acc:.3f} | LR {current_lr:.6f}")

    # Per-class accuracy every 10 epochs
    if ep % 10 == 0 or ep == 1 or ep == EPOCHS:
        _, _, class_correct, class_total = evaluate(model, val_loader, criterion, return_per_class=True)
        print(f"  üìä Per-class validation accuracy:")
        poor_classes = []
        for i, cls_name in enumerate(full_ds.classes):
            if class_total[i] > 0:
                cls_acc = class_correct[i] / class_total[i]
                marker = "‚ö†Ô∏è" if cls_acc < 0.90 else "‚úÖ"
                print(f"     {marker} {cls_name:20s}: {cls_acc:.3f} ({int(class_correct[i])}/{int(class_total[i])})")
                if cls_acc < 0.90:
                    poor_classes.append(cls_name)
        if poor_classes:
            print(f"  ‚ö†Ô∏è  Classes below 90%: {', '.join(poor_classes)}")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        patience_counter = 0
        print(f"  ‚úÖ New best model: {best_val_acc:.4f}")
    else:
        patience_counter += 1

    # Early stopping
    if patience_counter >= patience_limit:
        print(f"\n‚èπÔ∏è  Early stopping at epoch {ep} (no improvement for {patience_limit} epochs)")
        break

training_time = time.time() - training_start
print(f"\n‚è±Ô∏è  Total training time: {training_time/60:.1f} minutes")

# Load best model
if best_state is not None:
    model.load_state_dict(best_state)
    model.to(device)

print("\n‚úÖ Training complete!")


üöÄ TRAINING COMPACT MODEL

üìä Model parameters: 303,333 (1.16 MB)
üìâ Size reduction: ~770KB vs previous model
‚úÖ Under 2 MB limit

üìà TRAINING PROGRESS

Epoch 01 [108.3s] | Train 4.6452/0.104 | Val 2.2734/0.413 | LR 0.000450
  üìä Per-class validation accuracy:
     ‚ö†Ô∏è boots               : 0.000 (0/2)
     ‚úÖ box                 : 1.000 (2/2)
     ‚úÖ coin                : 1.000 (5/5)
     ‚ö†Ô∏è dragon              : 0.000 (0/3)
     ‚ö†Ô∏è exit                : 0.800 (4/5)
     ‚úÖ floor               : 1.000 (3/3)
     ‚ö†Ô∏è gem                 : 0.000 (0/3)
     ‚ö†Ô∏è ghost               : 0.667 (4/6)
     ‚ö†Ô∏è human               : 0.000 (0/6)
     ‚ö†Ô∏è key                 : 0.250 (1/4)
     ‚úÖ lava                : 1.000 (6/6)
     ‚ö†Ô∏è locked              : 0.333 (1/3)
     ‚ö†Ô∏è metalbox            : 0.667 (2/3)
     ‚ö†Ô∏è opened              : 0.000 (0/3)
     ‚ö†Ô∏è portal              : 0.000 (0/4)
     ‚úÖ robot               : 1.000 (1/1)
     ‚

In [None]:
# =========================
# Generate Model Loader Snippet
# =========================

print("üíæ Generating compressed model snippet...")
print("="*70)

try:
    # Mock grid_universe imports that are at module level in utils.py
    import sys
    import types

    # Create mock modules so utils.py can import
    for module_name in ['grid_universe', 'grid_universe.state',
                        'grid_universe.step', 'grid_universe.actions']:
        if module_name not in sys.modules:
            sys.modules[module_name] = types.ModuleType(module_name)

    # Add dummy State, step, Action to the mocks
    sys.modules['grid_universe.state'].State = type('State', (), {})
    sys.modules['grid_universe.step'].step = lambda *args: None
    sys.modules['grid_universe.actions'].Action = type('Action', (), {})

    # NOW we can import from utils
    from utils import generate_torch_loader_snippet

    # Create example input
    example_input = torch.randn(1, 4, IMG_SIZE, IMG_SIZE).to(device)

    # Generate snippet
    snippet = generate_torch_loader_snippet(
        model,
        example_inputs=example_input,
        prefer="auto",
        compression="zlib"
    )

    print("\n" + snippet + "\n")
    print("="*70)
    print("\n‚úÖ Copy the snippet above to use in your assignment!")
    print(f"   Model size: {model_size_mb:.2f} MB")

except Exception as e:
    print(f"\n‚ùå Error: {e}")


üíæ Generating compressed model snippet...

def get_model(device: str = "cpu", dtype: str | None = None):
    """
    Return a TorchScript model loaded from an embedded, base64-encoded compressed blob.
    Self-contained: no need for the original Python class.

    Args:
        device: Where to map the model (e.g., "cpu", "cuda", "cuda:0").
        dtype: Optional dtype to convert parameters/buffers to (e.g., "float32", "float16").
    """
    import base64, io, torch
    import zlib as _z; _decomp = _z.decompress
    _blob_b64 = "eNqcuXk019H3Lm6eypQxIsk8RYp4n/0iDSLRQEKpNEoTpYQyy6yiTClThkxRxvfZryZKo0yFUpQGTRRJo9vnrvvPvfe7fr+7vnut88f54+z1nPNa+9nPs18rl/ML8PCIiPD8XyHBI8uz2W/LTu9D2+Zs3Xxw8xzjpTZSPO7/R0Twn+EG9pkx67g+9Gz/AVh2cC6rkxAPBukF+M2gCO9GbgApaX42WVWbXX9eHAda3GmUkw7Z6XEX0XMWM2KajF2WcVCsfxjKWxdRZyEB5mS4HeYkvaJzHUSZKeelcPrDAaz96MZo5q/Fb3wd5N7CPtx2ToodKRzHM5vywTBAggmfdgr8LyiBfJcMGyzpDSMq8ox9UAqeFs0Eo/ZJ7r6zGRzpwTy67BIhiVsjcXrXI1zBy0ve+FnjF3oALfZT1LBYAWNrqskrlzJqsvk+2A58xqQj6hjhvQtzc6Xh0

In [None]:
print("\n" + "="*70)
print("üìä FINAL EVALUATION")
print("="*70 + "\n")

test_acc, _, class_correct, class_total, confusion = evaluate(
    model, test_loader, None, return_per_class=True, return_confusion=True
)

print(f"{'='*70}")
print(f"üìà FINAL TEST RESULTS")
print(f"{'='*70}")
print(f"Overall Test Accuracy: {test_acc:.4f}")
print(f"\nPer-class test accuracy:")

failed_classes = []
for i, cls_name in enumerate(full_ds.classes):
    if class_total[i] > 0:
        cls_acc = class_correct[i] / class_total[i]
        marker = "‚ùå" if cls_acc < 0.90 else "‚úÖ"
        print(f"  {marker} {cls_name:20s}: {cls_acc:.3f} ({int(class_correct[i])}/{int(class_total[i])})")
        if cls_acc < 0.90:
            failed_classes.append((i, cls_name, cls_acc))
    else:
        print(f"  ‚ö†Ô∏è  {cls_name:20s}: N/A (no test samples)")

# Show confusion for failed classes
if failed_classes:
    print(f"\n{'='*70}")
    print(f"üîç CONFUSION ANALYSIS")
    print(f"{'='*70}")
    for true_idx, cls_name, acc in failed_classes:
        print(f"\n‚ùå {cls_name} ({acc:.3f} accuracy) confused with:")
        if true_idx in confusion:
            conf_items = [(pred_idx, count) for pred_idx, count in confusion[true_idx].items()
                         if pred_idx != true_idx]
            conf_items.sort(key=lambda x: x[1], reverse=True)
            for pred_idx, count in conf_items[:3]:
                print(f"   ‚Üí {full_ds.classes[pred_idx]:20s}: {count} times")

print(f"{'='*70}\n")

if test_acc >= 0.98:
    print("üéâ SUCCESS! Achieved 98%+ accuracy with optimized CNN + expanded dataset!")
elif test_acc >= 0.95:
    print("‚ö†Ô∏è  Close! 95%+ achieved - consider adjusting hyperparameters or augmentation")
elif test_acc >= 0.90:
    print("‚úÖ Good progress! 90%+ achieved - review confusion matrix for problematic classes")
else:
    print("‚ùå Below 90%. Check confusion matrix above for insights")


üìä FINAL EVALUATION

üìà FINAL TEST RESULTS
Overall Test Accuracy: 0.9867

Per-class test accuracy:
  ‚úÖ boots               : 1.000 (5/5)
  ‚ùå box                 : 0.500 (1/2)
  ‚úÖ coin                : 1.000 (2/2)
  ‚úÖ dragon              : 1.000 (3/3)
  ‚úÖ exit                : 1.000 (1/1)
  ‚úÖ floor               : 1.000 (4/4)
  ‚úÖ gem                 : 1.000 (2/2)
  ‚úÖ ghost               : 1.000 (7/7)
  ‚úÖ human               : 1.000 (3/3)
  ‚úÖ key                 : 1.000 (3/3)
  ‚úÖ lava                : 1.000 (4/4)
  ‚úÖ locked              : 1.000 (3/3)
  ‚úÖ metalbox            : 1.000 (3/3)
  ‚úÖ opened              : 1.000 (5/5)
  ‚úÖ portal              : 1.000 (6/6)
  ‚úÖ robot               : 1.000 (6/6)
  ‚úÖ shield              : 1.000 (4/4)
  ‚úÖ sleeping            : 1.000 (2/2)
  ‚ö†Ô∏è  spike               : N/A (no test samples)
  ‚úÖ wall                : 1.000 (4/4)
  ‚úÖ wolf                : 1.000 (6/6)

üîç CONFUSION ANALYSIS

‚ùå box (0.500 a