# üé¥ Card Recognition Training V2

**Train on Colab ‚Üí Deploy on Jetson Nano**

Optimized for 14K+ unique cards with improved hyperparameters

---

## 1Ô∏è‚É£ Setup & GPU Check

In [None]:
# Check GPU - Use A100 for best performance!
!nvidia-smi

import torch
print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    print(f"GPU: {gpu_name}")
    if 'A100' in gpu_name:
        print("‚úÖ A100 detected - optimal performance!")
    elif 'V100' in gpu_name:
        print("‚úÖ V100 detected - good performance")
    else:
        print("‚ö†Ô∏è Consider switching to A100 in Runtime > Change runtime type")

In [None]:
# Install dependencies
!pip install -q timm albumentations opencv-python-headless tqdm tensorboard imagehash

## 2Ô∏è‚É£ Mount Google Drive & Extract Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import zipfile
from pathlib import Path

ZIP_PATH = "/content/drive/MyDrive/CardData/card_images.zip"
IMAGE_DIR = "/content/card_images"
CHECKPOINT_DIR = '/content/checkpoints'
DRIVE_OUTPUT = '/content/drive/MyDrive/CardRecognition_Models'

os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(DRIVE_OUTPUT, exist_ok=True)

# Extract if needed
if os.path.exists(f"{IMAGE_DIR}/.extracted"):
    images = list(Path(IMAGE_DIR).glob("*.[jp][pn][g]*"))
    print(f"‚úì Already extracted: {len(images):,} images")
elif os.path.exists(ZIP_PATH):
    print(f"Extracting {ZIP_PATH}...")
    !rm -rf {IMAGE_DIR}
    os.makedirs(IMAGE_DIR, exist_ok=True)
    with zipfile.ZipFile(ZIP_PATH, 'r') as z:
        z.extractall(IMAGE_DIR)
    Path(f"{IMAGE_DIR}/.extracted").touch()
    images = list(Path(IMAGE_DIR).glob("*.[jp][pn][g]*"))
    print(f"‚úì Extracted {len(images):,} images")
else:
    print(f"‚ùå ZIP not found: {ZIP_PATH}")

In [None]:
# Validate images (remove corrupted ones)
from PIL import Image
from tqdm.notebook import tqdm
import json
from datetime import datetime

if os.path.exists(f"{IMAGE_DIR}/.validated"):
    print("‚úì Images already validated")
else:
    print("Validating images...")
    corrupted = []
    for img_path in tqdm(list(Path(IMAGE_DIR).glob("*"))):
        if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp']:
            try:
                with Image.open(img_path) as img:
                    img.verify()
                with Image.open(img_path) as img:
                    img.load()
            except:
                corrupted.append(img_path.name)
                img_path.unlink()
    
    # Save corrupted list
    with open('/content/drive/MyDrive/CardData/corrupted_images.json', 'w') as f:
        json.dump({'date': datetime.now().isoformat(), 'files': corrupted}, f)
    
    Path(f"{IMAGE_DIR}/.validated").touch()
    print(f"‚úì Removed {len(corrupted)} corrupted images")

## 3Ô∏è‚É£ Model Architecture

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import numpy as np
import cv2

class GeM(nn.Module):
    def __init__(self, p=3.0, eps=1e-6):
        super().__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps
    
    def forward(self, x):
        x = x.clamp(min=self.eps).pow(self.p)
        x = F.adaptive_avg_pool2d(x, 1).pow(1.0 / self.p)
        return x.view(x.size(0), -1)

class ColorHistogramBranch(nn.Module):
    def __init__(self, bins=32, output_dim=64):
        super().__init__()
        self.bins = bins
        self.fc = nn.Sequential(
            nn.Linear(bins * 3, 128), nn.ReLU(), nn.Dropout(0.3), nn.Linear(128, output_dim)
        )
        self.register_buffer('mean', torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
        self.register_buffer('std', torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
    
    def forward(self, x):
        x_denorm = ((x * self.std + self.mean) * 255).clamp(0, 255)
        histograms = []
        for i in range(x.shape[0]):
            img = x_denorm[i].permute(1, 2, 0).cpu().numpy().astype(np.uint8)
            hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
            h = np.histogram(hsv[:,:,0], bins=self.bins, range=(0, 180))[0]
            s = np.histogram(hsv[:,:,1], bins=self.bins, range=(0, 256))[0]
            v = np.histogram(hsv[:,:,2], bins=self.bins, range=(0, 256))[0]
            hist = np.concatenate([h, s, v]).astype(np.float32)
            histograms.append(hist / (hist.sum() + 1e-8))
        return self.fc(torch.tensor(np.stack(histograms), device=x.device, dtype=torch.float32))

class CardEmbeddingNetV2(nn.Module):
    def __init__(self, embedding_dim=512, color_dim=64, pretrained=True):
        super().__init__()
        self.backbone = timm.create_model('mobilenetv3_small_100', pretrained=pretrained,
                                          num_classes=0, global_pool='')
        with torch.no_grad():
            self.num_features = self.backbone(torch.randn(1, 3, 224, 224)).shape[1]
        self.gem = GeM(p=3.0)
        self.color_branch = ColorHistogramBranch(bins=32, output_dim=color_dim)
        self.fc = nn.Linear(self.num_features + color_dim, embedding_dim)
        self.bn = nn.BatchNorm1d(embedding_dim)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        visual = self.gem(self.backbone(x))
        color = self.color_branch(x)
        embedding = self.dropout(self.bn(self.fc(torch.cat([visual, color], dim=1))))
        return F.normalize(embedding, p=2, dim=1)

model = CardEmbeddingNetV2()
print(f"‚úì Model: {sum(p.numel() for p in model.parameters()):,} params")

## 4Ô∏è‚É£ Loss & Dataset

In [None]:
# IMPROVED: Higher margin and scale for better separation
class CosFaceLoss(nn.Module):
    def __init__(self, num_classes, embedding_dim, scale=64.0, margin=0.5):
        super().__init__()
        self.scale, self.margin = scale, margin
        self.weight = nn.Parameter(torch.FloatTensor(num_classes, embedding_dim))
        nn.init.xavier_uniform_(self.weight)
    
    def forward(self, embeddings, labels):
        W = F.normalize(self.weight, p=2, dim=1)
        cosine = F.linear(embeddings, W)
        one_hot = torch.zeros_like(cosine).scatter_(1, labels.view(-1, 1), 1.0)
        return F.cross_entropy((cosine - one_hot * self.margin) * self.scale, labels)

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
import random

def get_train_transforms(size=224):
    return A.Compose([
        A.Resize(size, size),
        A.Perspective(scale=(0.02, 0.05), p=0.3),
        A.Affine(scale=(0.97, 1.03), rotate=(-2, 2), p=0.3),
        A.OneOf([A.GaussianBlur(blur_limit=(3,5)), A.MotionBlur(blur_limit=(3,5))], p=0.2),
        A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.4),
        A.HueSaturationValue(hue_shift_limit=3, sat_shift_limit=10, val_shift_limit=10, p=0.2),
        A.GaussianBlur(blur_limit=(3, 5), p=0.1),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

def get_val_transforms(size=224):
    return A.Compose([
        A.Resize(size, size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

class CardDatasetWithRotation(Dataset):
    def __init__(self, image_dir, transform=None, rotations=[0, 90, 180, 270]):
        self.image_dir = Path(image_dir)
        self.transform = transform
        self.rotations = rotations
        self.images = sorted([f for f in self.image_dir.iterdir() 
                              if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp']])
        self.num_cards = len(self.images)
        self.samples = [(i, r) for i in range(len(self.images)) for r in rotations]
        print(f"Dataset: {self.num_cards} cards √ó {len(rotations)} rot = {len(self.samples)} samples")
    
    def __len__(self): return len(self.samples)
    
    def __getitem__(self, idx):
        img_idx, rotation = self.samples[idx]
        try:
            with Image.open(self.images[img_idx]) as pil_img:
                img = np.array(pil_img.convert('RGB'))
            if rotation == 90: img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            elif rotation == 180: img = cv2.rotate(img, cv2.ROTATE_180)
            elif rotation == 270: img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
            if self.transform: img = self.transform(image=img)['image']
            return img, img_idx
        except:
            return self.__getitem__(random.randint(0, len(self.samples)-1))
    
    def get_num_classes(self): return self.num_cards

print("‚úì Dataset classes ready")

## 5Ô∏è‚É£ Training Configuration (IMPROVED)

In [None]:
# IMPROVED CONFIG - prevents embedding collapse
CONFIG = {
    'epochs': 100,
    'batch_size': 128,         # Larger batch for A100
    'learning_rate': 3e-4,     # Lower LR - more stable
    'weight_decay': 5e-4,      # More regularization
    'embedding_dim': 512,
    'patience': 10,            # Stop earlier if overfitting
    'unfreeze_epoch': 20,      # Keep backbone frozen longer
    'margin': 0.5,             # Higher margin for separation
    'scale': 64.0,             # Higher scale for gradients
}

print("‚úì Improved Config:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

In [None]:
# Create dataloaders
def create_dataloaders(image_dir, batch_size=64, val_split=0.15):
    train_ds = CardDatasetWithRotation(image_dir, get_train_transforms())
    val_ds = CardDatasetWithRotation(image_dir, get_val_transforms(), rotations=[0])
    
    indices = np.random.permutation(train_ds.num_cards)
    split = int((1 - val_split) * train_ds.num_cards)
    train_idx = set(indices[:split])
    val_idx = set(indices[split:])
    
    train_samples = [i for i, (c, _) in enumerate(train_ds.samples) if c in train_idx]
    val_samples = [i for i, (c, _) in enumerate(val_ds.samples) if c in val_idx]
    
    train_loader = DataLoader(torch.utils.data.Subset(train_ds, train_samples),
                              batch_size=batch_size, shuffle=True, num_workers=2, 
                              pin_memory=True, drop_last=True)
    val_loader = DataLoader(torch.utils.data.Subset(val_ds, val_samples),
                            batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    
    print(f"Train: {len(train_samples):,} | Val: {len(val_samples):,}")
    return train_loader, val_loader, train_ds.get_num_classes(), train_ds

train_loader, val_loader, num_classes, train_ds = create_dataloaders(IMAGE_DIR, CONFIG['batch_size'])
print(f"‚úì Classes: {num_classes:,}")

In [None]:
# Initialize model and training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

model = CardEmbeddingNetV2(embedding_dim=CONFIG['embedding_dim']).to(device)

# Freeze backbone initially
for p in model.backbone.parameters():
    p.requires_grad = False
print("‚úì Backbone frozen")

# Loss with improved margin
criterion = CosFaceLoss(num_classes, CONFIG['embedding_dim'], 
                        scale=CONFIG['scale'], margin=CONFIG['margin']).to(device)

# Optimizer
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                              lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['epochs'])
scaler = torch.amp.GradScaler('cuda')

print("‚úì Ready to train")

## 6Ô∏è‚É£ Training Loop

In [None]:
# Training loop with improved settings
best_loss = float('inf')
patience_counter = 0
history = {'train': [], 'val': []}

RESUME_PATH = f"{CHECKPOINT_DIR}/best_model.pth"

for epoch in range(1, CONFIG['epochs'] + 1):
    # Unfreeze backbone at specified epoch
    if epoch == CONFIG['unfreeze_epoch']:
        print(f"\nüîì Unfreezing backbone at epoch {epoch}...")
        for p in model.backbone.parameters():
            p.requires_grad = True
        optimizer = torch.optim.AdamW([
            {'params': model.backbone.parameters(), 'lr': CONFIG['learning_rate'] / 10},
            {'params': model.gem.parameters()},
            {'params': model.color_branch.parameters()},
            {'params': model.fc.parameters()},
            {'params': model.bn.parameters()},
        ], lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['epochs']-epoch)
    
    # Training
    model.train()
    train_loss = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch}", leave=False):
        images, labels = images.to(device), labels.to(device)
        with torch.amp.autocast('cuda'):
            loss = criterion(model(images), labels)
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            val_loss += criterion(model(images), labels).item()
    val_loss /= len(val_loader)
    
    scheduler.step()
    history['train'].append(train_loss)
    history['val'].append(val_loss)
    
    print(f"Epoch {epoch}: Train={train_loss:.4f}, Val={val_loss:.4f}, LR={optimizer.param_groups[0]['lr']:.2e}")
    
    # Save best model
    if val_loss < best_loss:
        best_loss = val_loss
        patience_counter = 0
        torch.save({
            'epoch': epoch, 'model_state_dict': model.state_dict(),
            'val_loss': val_loss, 'num_classes': num_classes, 'config': CONFIG
        }, RESUME_PATH)
        print(f"  üíæ Saved best model")
    else:
        patience_counter += 1
        if patience_counter >= CONFIG['patience']:
            print(f"\n‚ö†Ô∏è Early stopping at epoch {epoch}!")
            break

print(f"\n‚úì Training complete! Best val loss: {best_loss:.4f}")

## 7Ô∏è‚É£ Save Results

In [None]:
import matplotlib.pyplot as plt

# Plot training curves
if history['train']:
    plt.figure(figsize=(10, 4))
    plt.plot(history['train'], label='Train')
    plt.plot(history['val'], label='Val')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend()
    plt.title('Training Progress')
    plt.savefig(f"{CHECKPOINT_DIR}/training.png")
    plt.show()

In [None]:
# Save to Google Drive
import shutil
for f in ['best_model.pth', 'training.png']:
    src = f"{CHECKPOINT_DIR}/{f}"
    if os.path.exists(src):
        shutil.copy(src, DRIVE_OUTPUT)
        print(f"‚úì Saved {f}")

print(f"\n‚úì All files saved to: {DRIVE_OUTPUT}")

## 8Ô∏è‚É£ Test Model

In [None]:
# Build reference embeddings
print("Building reference embeddings...")

# Reload best model
ckpt = torch.load(RESUME_PATH)
model.load_state_dict(ckpt['model_state_dict'])
model.eval()

test_transform = get_val_transforms()
reference_embeddings = []
reference_names = []

with torch.no_grad():
    for img_path in tqdm(train_ds.images, desc="Building refs"):
        try:
            img = np.array(Image.open(img_path).convert('RGB'))
            img_tensor = test_transform(image=img)['image'].unsqueeze(0).to(device)
            emb = model(img_tensor)
            reference_embeddings.append(emb.cpu())
            reference_names.append(img_path.stem)
        except Exception as e:
            print(f"Error: {img_path.name}")

reference_embeddings = torch.cat(reference_embeddings, dim=0)
print(f"‚úì {len(reference_embeddings):,} embeddings built")

In [None]:
# Test on random cards with detailed output
test_cards = random.sample(list(train_ds.images), min(10, len(train_ds.images)))

print("="*70)
print("IDENTIFICATION RESULTS")
print("="*70)

correct = 0
for i, card_path in enumerate(test_cards):
    img = np.array(Image.open(card_path).convert('RGB'))
    with torch.no_grad():
        query = model(test_transform(image=img)['image'].unsqueeze(0).to(device)).cpu()
    sims = F.cosine_similarity(query, reference_embeddings)
    
    top_indices = sims.argsort(descending=True)[:5]
    actual = card_path.stem
    predicted = reference_names[top_indices[0]]
    is_correct = actual == predicted
    if is_correct: correct += 1
    
    status = "CORRECT" if is_correct else "WRONG"
    print(f"\n[{status}] Card #{i+1}")
    print(f"   Actual:    {actual}")
    print(f"   Predicted: {predicted} ({sims[top_indices[0]].item()*100:.1f}%)")
    print(f"   Top 5:")
    for rank, idx in enumerate(top_indices):
        marker = "->" if reference_names[idx] == actual else "  "
        print(f"     {marker} {rank+1}. {reference_names[idx]} ({sims[idx].item()*100:.1f}%)")

print(f"\n{'='*70}")
print(f"ACCURACY: {correct}/{len(test_cards)} = {100*correct/len(test_cards):.1f}%")
print("="*70)

## ‚úÖ Done!

**Model saved to:** `MyDrive/CardRecognition_Models/best_model.pth`

**Corrupted images list:** `MyDrive/CardData/corrupted_images.json`