# üé¥ Card Recognition Training

**Train on Colab ‚Üí Deploy on Jetson Nano**

## Features:
- MobileNetV3-Small backbone (60 FPS on Jetson)
- Color histogram branch (distinguishes similar cards)
- CosFace loss (stable for fine-grained recognition)
- On-the-fly augmentation (rotation + sim-to-real)

---

## 1Ô∏è‚É£ Setup - Clone from GitHub

In [None]:
# Check GPU
!nvidia-smi

import torch
print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# ===== YOUR GITHUB REPO =====
GITHUB_REPO = "Krishan552Patel/Card-recognition-fab"

import os

# IMPORTANT: Reset to /content first to avoid directory errors
os.chdir('/content')

WORK_DIR = "/content/card_recognition"

# Clean up if exists
if os.path.exists(WORK_DIR):
    !rm -rf {WORK_DIR}

# Clone repository
!git clone https://github.com/{GITHUB_REPO}.git {WORK_DIR}

# Change to work directory
os.chdir(WORK_DIR)
print(f"\n‚úì Working directory: {os.getcwd()}")
!ls -la

In [None]:
# Install dependencies
!pip install -q timm albumentations opencv-python-headless tqdm tensorboard imagehash

## 2Ô∏è‚É£ Load Card Data from Google Drive

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# ===== CONFIGURE YOUR DATA PATH =====
ZIP_PATH = "/content/drive/MyDrive/CardData/card_images.zip"
IMAGE_DIR = "/content/card_images"

import zipfile
import os

if os.path.exists(ZIP_PATH):
    print(f"Found: {ZIP_PATH}")
    print("Extracting... (this may take a few minutes)")
    
    if os.path.exists(IMAGE_DIR):
        !rm -rf {IMAGE_DIR}
    
    os.makedirs(IMAGE_DIR, exist_ok=True)
    with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
        zip_ref.extractall(IMAGE_DIR)
    
    images = [f for f in os.listdir(IMAGE_DIR) if f.endswith(('.jpg', '.png', '.jpeg'))]
    print(f"\n‚úì Extracted {len(images):,} card images")
else:
    print(f"‚ùå ZIP not found: {ZIP_PATH}")

In [None]:
# Validate images and remove corrupted ones
from PIL import Image
from pathlib import Path
from tqdm.notebook import tqdm

print("Validating images (removing corrupted files)...")

image_dir = Path(IMAGE_DIR)
all_images = list(image_dir.glob("*.jpg")) + list(image_dir.glob("*.png")) + list(image_dir.glob("*.jpeg"))

valid_count = 0
removed_count = 0

for img_path in tqdm(all_images, desc="Checking"):
    try:
        with Image.open(img_path) as img:
            img.verify()  # Check if image is valid
        # Also try to actually load it
        with Image.open(img_path) as img:
            img.load()
        valid_count += 1
    except Exception as e:
        print(f"  ‚ö†Ô∏è Removing corrupted: {img_path.name}")
        img_path.unlink()  # Delete corrupt file
        removed_count += 1

print(f"\n‚úì Valid images: {valid_count:,}")
if removed_count > 0:
    print(f"‚ö†Ô∏è Removed {removed_count} corrupted images")

## 3Ô∏è‚É£ Model Architecture

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import numpy as np
import cv2


class GeM(nn.Module):
    def __init__(self, p=3.0, eps=1e-6):
        super().__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps
    
    def forward(self, x):
        x = x.clamp(min=self.eps).pow(self.p)
        x = F.adaptive_avg_pool2d(x, 1).pow(1.0 / self.p)
        return x.view(x.size(0), -1)


class ColorHistogramBranch(nn.Module):
    def __init__(self, bins=32, output_dim=64):
        super().__init__()
        self.bins = bins
        self.fc = nn.Sequential(
            nn.Linear(bins * 3, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, output_dim)
        )
        self.register_buffer('mean', torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
        self.register_buffer('std', torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
    
    def forward(self, x):
        x_denorm = (x * self.std + self.mean) * 255
        x_denorm = x_denorm.clamp(0, 255)
        
        batch_size = x.shape[0]
        histograms = []
        
        for i in range(batch_size):
            img = x_denorm[i].permute(1, 2, 0).cpu().numpy().astype(np.uint8)
            hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
            
            h_hist = np.histogram(hsv[:,:,0], bins=self.bins, range=(0, 180))[0]
            s_hist = np.histogram(hsv[:,:,1], bins=self.bins, range=(0, 256))[0]
            v_hist = np.histogram(hsv[:,:,2], bins=self.bins, range=(0, 256))[0]
            
            hist = np.concatenate([h_hist, s_hist, v_hist]).astype(np.float32)
            hist = hist / (hist.sum() + 1e-8)
            histograms.append(hist)
        
        return self.fc(torch.tensor(np.stack(histograms), device=x.device, dtype=torch.float32))


class CardEmbeddingNetV2(nn.Module):
    def __init__(self, embedding_dim=512, color_dim=64, pretrained=True):
        super().__init__()
        
        self.backbone = timm.create_model('mobilenetv3_small_100', pretrained=pretrained,
                                          num_classes=0, global_pool='')
        
        with torch.no_grad():
            self.num_features = self.backbone(torch.randn(1, 3, 224, 224)).shape[1]
        
        self.gem = GeM(p=3.0)
        self.color_branch = ColorHistogramBranch(bins=32, output_dim=color_dim)
        self.fc = nn.Linear(self.num_features + color_dim, embedding_dim)
        self.bn = nn.BatchNorm1d(embedding_dim)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        visual = self.gem(self.backbone(x))
        color = self.color_branch(x)
        combined = torch.cat([visual, color], dim=1)
        embedding = self.dropout(self.bn(self.fc(combined)))
        return F.normalize(embedding, p=2, dim=1)


model = CardEmbeddingNetV2()
out = model(torch.randn(2, 3, 224, 224))
print(f"‚úì Model output: {out.shape}, Params: {sum(p.numel() for p in model.parameters()):,}")

## 4Ô∏è‚É£ CosFace Loss

In [None]:
class CosFaceLoss(nn.Module):
    def __init__(self, num_classes, embedding_dim, scale=30.0, margin=0.35):
        super().__init__()
        self.scale = scale
        self.margin = margin
        self.weight = nn.Parameter(torch.FloatTensor(num_classes, embedding_dim))
        nn.init.xavier_uniform_(self.weight)
    
    def forward(self, embeddings, labels):
        W = F.normalize(self.weight, p=2, dim=1)
        cosine = F.linear(embeddings, W)
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, labels.view(-1, 1), 1.0)
        output = (cosine - one_hot * self.margin) * self.scale
        return F.cross_entropy(output, labels)

## 5Ô∏è‚É£ Dataset with Error Handling

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pathlib import Path
import random


def get_train_transforms(size=224):
    return A.Compose([
        A.Resize(size, size),
        A.Perspective(scale=(0.02, 0.05), p=0.3),
        A.Affine(scale=(0.97, 1.03), rotate=(-2, 2), p=0.3),
        A.OneOf([A.GaussianBlur(blur_limit=(3, 5)), A.MotionBlur(blur_limit=(3, 5))], p=0.2),
        A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.4),
        A.HueSaturationValue(hue_shift_limit=3, sat_shift_limit=10, val_shift_limit=10, p=0.2),
        A.GaussNoise(var_limit=(5, 20), p=0.2),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])


def get_val_transforms(size=224):
    return A.Compose([
        A.Resize(size, size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])


class CardDatasetWithRotation(Dataset):
    """Dataset with robust error handling for corrupted images."""
    
    def __init__(self, image_dir, transform=None, rotations=[0, 90, 180, 270]):
        self.image_dir = Path(image_dir)
        self.transform = transform
        self.rotations = rotations
        
        self.images = sorted([
            f for f in self.image_dir.iterdir()
            if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp']
        ])
        
        self.num_cards = len(self.images)
        self.filename_to_idx = {img.stem: idx for idx, img in enumerate(self.images)}
        self.idx_to_filename = {idx: img.stem for idx, img in enumerate(self.images)}
        
        self.samples = []
        for img_idx, img_path in enumerate(self.images):
            for rot in self.rotations:
                self.samples.append((img_idx, rot))
        
        print(f"Dataset: {self.num_cards} cards √ó {len(self.rotations)} rotations = {len(self.samples)} samples")
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_idx, rotation = self.samples[idx]
        img_path = self.images[img_idx]
        
        try:
            # Load image with error handling
            with Image.open(img_path) as pil_img:
                img = np.array(pil_img.convert('RGB'))
            
            # Apply rotation
            if rotation == 90:
                img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            elif rotation == 180:
                img = cv2.rotate(img, cv2.ROTATE_180)
            elif rotation == 270:
                img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
            
            if self.transform:
                img = self.transform(image=img)['image']
            
            return img, img_idx
            
        except Exception as e:
            # Return a random valid sample if this one fails
            print(f"\n‚ö†Ô∏è Error loading {img_path.name}: {e}")
            new_idx = random.randint(0, len(self.samples) - 1)
            return self.__getitem__(new_idx)
    
    def get_num_classes(self):
        return self.num_cards


def create_dataloaders(image_dir, batch_size=64, num_workers=2, val_split=0.15):
    train_ds = CardDatasetWithRotation(image_dir, get_train_transforms())
    val_ds = CardDatasetWithRotation(image_dir, get_val_transforms(), rotations=[0])
    
    n_cards = train_ds.num_cards
    indices = np.random.permutation(n_cards)
    split = int((1 - val_split) * n_cards)
    
    train_card_indices = set(indices[:split])
    val_card_indices = set(indices[split:])
    
    train_sample_indices = [i for i, (card_idx, _) in enumerate(train_ds.samples) if card_idx in train_card_indices]
    val_sample_indices = [i for i, (card_idx, _) in enumerate(val_ds.samples) if card_idx in val_card_indices]
    
    # Use num_workers=0 to avoid multiprocessing issues with error handling
    train_loader = DataLoader(
        torch.utils.data.Subset(train_ds, train_sample_indices),
        batch_size=batch_size, shuffle=True, num_workers=0,
        pin_memory=True, drop_last=True
    )
    val_loader = DataLoader(
        torch.utils.data.Subset(val_ds, val_sample_indices),
        batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True
    )
    
    print(f"Train: {len(train_sample_indices)} samples ({len(train_card_indices)} cards)")
    print(f"Val: {len(val_sample_indices)} samples ({len(val_card_indices)} cards)")
    
    return train_loader, val_loader, train_ds.get_num_classes(), train_ds


if os.path.exists(IMAGE_DIR):
    train_loader, val_loader, num_classes, train_ds = create_dataloaders(IMAGE_DIR, batch_size=4)
    print(f"‚úì Classes: {num_classes}")

## 6Ô∏è‚É£ Training Configuration

In [None]:
CONFIG = {
    'epochs': 100,
    'batch_size': 64,
    'learning_rate': 1e-3,
    'weight_decay': 1e-4,
    'embedding_dim': 512,
    'patience': 15,
    'unfreeze_epoch': 6,
}

print("Configuration:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

## 7Ô∏è‚É£ Train!

In [None]:
from tqdm.notebook import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

CHECKPOINT_DIR = '/content/checkpoints'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

train_loader, val_loader, num_classes, train_ds = create_dataloaders(IMAGE_DIR, CONFIG['batch_size'])

model = CardEmbeddingNetV2(embedding_dim=CONFIG['embedding_dim']).to(device)

for p in model.backbone.parameters():
    p.requires_grad = False

print(f"Trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

criterion = CosFaceLoss(num_classes, CONFIG['embedding_dim']).to(device)
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                              lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['epochs'])

# Use new autocast syntax
scaler = torch.cuda.amp.GradScaler()

In [None]:
# Training loop
best_loss = float('inf')
patience_counter = 0
history = {'train': [], 'val': []}

for epoch in range(1, CONFIG['epochs'] + 1):
    if epoch == CONFIG['unfreeze_epoch']:
        print(f"\nüîì Unfreezing backbone...")
        for p in model.backbone.parameters():
            p.requires_grad = True
        optimizer = torch.optim.AdamW([
            {'params': model.backbone.parameters(), 'lr': CONFIG['learning_rate'] / 10},
            {'params': model.gem.parameters()},
            {'params': model.color_branch.parameters()},
            {'params': model.fc.parameters()},
            {'params': model.bn.parameters()},
        ], lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
    
    model.train()
    train_loss = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch}", leave=False):
        images, labels = images.to(device), labels.to(device)
        
        # Updated autocast syntax
        with torch.amp.autocast('cuda'):
            loss = criterion(model(images), labels)
        
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            val_loss += criterion(model(images), labels).item()
    val_loss /= len(val_loader)
    
    scheduler.step()
    history['train'].append(train_loss)
    history['val'].append(val_loss)
    
    print(f"Epoch {epoch}: Train={train_loss:.4f}, Val={val_loss:.4f}")
    
    if val_loss < best_loss:
        best_loss = val_loss
        patience_counter = 0
        torch.save({
            'epoch': epoch, 'model_state_dict': model.state_dict(),
            'val_loss': val_loss, 'num_classes': num_classes, 'config': CONFIG
        }, f"{CHECKPOINT_DIR}/best_model.pth")
        print(f"  üíæ Saved best model")
    else:
        patience_counter += 1
        if patience_counter >= CONFIG['patience']:
            print(f"\n‚ö†Ô∏è Early stopping!")
            break

print(f"\n‚úì Training complete! Best val loss: {best_loss:.4f}")

## 8Ô∏è‚É£ Export & Save

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 4))
plt.plot(history['train'], label='Train')
plt.plot(history['val'], label='Val')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training Progress')
plt.savefig(f"{CHECKPOINT_DIR}/training.png")
plt.show()

In [None]:
# Export to ONNX
checkpoint = torch.load(f"{CHECKPOINT_DIR}/best_model.pth")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

dummy = torch.randn(1, 3, 224, 224).to(device)
onnx_path = f"{CHECKPOINT_DIR}/card_recognition.onnx"

torch.onnx.export(model, dummy, onnx_path,
                  input_names=['image'], output_names=['embedding'],
                  dynamic_axes={'image': {0: 'batch'}, 'embedding': {0: 'batch'}},
                  opset_version=11)

print(f"‚úì ONNX: {onnx_path} ({os.path.getsize(onnx_path)/1024/1024:.1f} MB)")

In [None]:
# Save to Google Drive
DRIVE_OUTPUT = '/content/drive/MyDrive/CardRecognition_Models'
os.makedirs(DRIVE_OUTPUT, exist_ok=True)

import shutil
for f in ['best_model.pth', 'card_recognition.onnx', 'training.png']:
    src = f"{CHECKPOINT_DIR}/{f}"
    if os.path.exists(src):
        shutil.copy(src, DRIVE_OUTPUT)

print(f"\n‚úì Saved to: {DRIVE_OUTPUT}")
!ls -lh {DRIVE_OUTPUT}

## 9Ô∏è‚É£ Test the Model

In [None]:
print("Building reference embeddings...")

model.eval()
reference_embeddings = []
reference_names = []
test_transform = get_val_transforms()

with torch.no_grad():
    for img_path in tqdm(train_ds.images, desc="Building refs"):
        try:
            img = np.array(Image.open(img_path).convert('RGB'))
            img_tensor = test_transform(image=img)['image'].unsqueeze(0).to(device)
            embedding = model(img_tensor)
            reference_embeddings.append(embedding.cpu())
            reference_names.append(img_path.stem)
        except:
            pass

reference_embeddings = torch.cat(reference_embeddings, dim=0)
print(f"‚úì Built {len(reference_embeddings)} reference embeddings")

In [None]:
def identify_card(image_path, top_k=5):
    img = np.array(Image.open(image_path).convert('RGB'))
    img_tensor = test_transform(image=img)['image'].unsqueeze(0).to(device)
    
    with torch.no_grad():
        query_embedding = model(img_tensor).cpu()
    
    similarities = F.cosine_similarity(query_embedding, reference_embeddings)
    top_indices = similarities.argsort(descending=True)[:top_k]
    
    return [{'name': reference_names[idx], 'similarity': similarities[idx].item() * 100} for idx in top_indices]

print("‚úì Identification function ready")

In [None]:
# Test on random cards
import random
import matplotlib.pyplot as plt

test_cards = random.sample(list(train_ds.images), min(5, len(train_ds.images)))

fig, axes = plt.subplots(1, len(test_cards), figsize=(4*len(test_cards), 5))
if len(test_cards) == 1:
    axes = [axes]

print("\n" + "="*60)
print("IDENTIFICATION RESULTS")
print("="*60)

correct = 0
for i, card_path in enumerate(test_cards):
    results = identify_card(card_path)
    actual_name = card_path.stem
    is_correct = results[0]['name'] == actual_name
    if is_correct: correct += 1
    
    img = Image.open(card_path)
    axes[i].imshow(img)
    axes[i].axis('off')
    axes[i].set_title(f"{'‚úÖ' if is_correct else '‚ùå'} {results[0]['similarity']:.1f}%", fontsize=12)
    
    print(f"\nCard: {actual_name}")
    print(f"Status: {'CORRECT ‚úÖ' if is_correct else 'WRONG ‚ùå'}")
    for j, r in enumerate(results):
        print(f"  {'‚Üí' if r['name'] == actual_name else ' '} {j+1}. {r['name']} ({r['similarity']:.1f}%)")

plt.tight_layout()
plt.savefig(f"{CHECKPOINT_DIR}/test_results.png")
plt.show()

print(f"\n" + "="*60)
print(f"Accuracy: {correct}/{len(test_cards)} = {100*correct/len(test_cards):.1f}%")
print("="*60)

## ‚úÖ Done!

**Files saved to Google Drive:**
- `best_model.pth` - PyTorch checkpoint
- `card_recognition.onnx` - For Jetson Nano
- `training.png` - Training curves

**Deploy to Jetson Nano:**
```bash
trtexec --onnx=card_recognition.onnx --saveEngine=card.engine --fp16
```