# üé¥ Card Recognition Training

**Train on Colab ‚Üí Deploy on Jetson Nano**

## Features:
- MobileNetV3-Small backbone (60 FPS on Jetson)
- Color histogram branch (distinguishes similar cards)
- CosFace loss (stable for fine-grained recognition)
- Sim-to-Real augmentation (train on scans, run on camera)

---

## 1Ô∏è‚É£ Setup - Clone from GitHub

In [None]:
# Check GPU
!nvidia-smi

import torch
print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# ===== CONFIGURE YOUR GITHUB REPO =====
GITHUB_REPO = "YOUR_USERNAME/YOUR_REPO"  # <-- CHANGE THIS!
BRANCH = "main"

# Clone repository
import os

WORK_DIR = "/content/card_recognition"
if os.path.exists(WORK_DIR):
    !rm -rf {WORK_DIR}

!git clone https://github.com/{GITHUB_REPO}.git {WORK_DIR}
os.chdir(WORK_DIR)
print(f"\n‚úì Working directory: {os.getcwd()}")
!ls -la

In [None]:
# Install dependencies
!pip install -q timm albumentations opencv-python-headless tqdm tensorboard imagehash

## 2Ô∏è‚É£ Load Card Data from Google Drive

**Your card data should be in Google Drive as:**
```
MyDrive/CardData/card_images.zip
```

Where `card_images.zip` contains your `D:\SIAMESE DATASET\LARGE SCALE OUTPUT` folder.

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# ===== CONFIGURE YOUR DATA PATH =====
# Option 1: ZIP file (recommended for large datasets)
ZIP_PATH = "/content/drive/MyDrive/CardData/card_images.zip"

# Option 2: Folder (if already unzipped)
FOLDER_PATH = "/content/drive/MyDrive/CardData/LARGE SCALE OUTPUT"

# Local destination
IMAGE_DIR = "/content/card_recognition/Images"

import os
import shutil

# Check which source exists
if os.path.exists(ZIP_PATH):
    print(f"Found ZIP: {ZIP_PATH}")
    print("Extracting... (this may take a few minutes)")
    
    import zipfile
    with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
        zip_ref.extractall("/content/card_recognition/")
    
    # Find the extracted folder
    for item in os.listdir("/content/card_recognition/"):
        item_path = os.path.join("/content/card_recognition/", item)
        if os.path.isdir(item_path) and item not in ['.git', '__pycache__', 'model']:
            # Check if it contains card folders
            subfolders = [f for f in os.listdir(item_path) if os.path.isdir(os.path.join(item_path, f))]
            if len(subfolders) > 10:  # Likely card data
                if item != "Images":
                    os.rename(item_path, IMAGE_DIR)
                print(f"‚úì Extracted to: {IMAGE_DIR}")
                break

elif os.path.exists(FOLDER_PATH):
    print(f"Found folder: {FOLDER_PATH}")
    print("Creating symlink... (fast, no copy)")
    
    if os.path.exists(IMAGE_DIR):
        os.remove(IMAGE_DIR)
    os.symlink(FOLDER_PATH, IMAGE_DIR)
    print(f"‚úì Linked to: {IMAGE_DIR}")

else:
    print("‚ùå Data not found!")
    print(f"   Expected ZIP at: {ZIP_PATH}")
    print(f"   Or folder at: {FOLDER_PATH}")
    print("\nüì§ Please upload your card data to Google Drive.")

In [None]:
# Verify data
if os.path.exists(IMAGE_DIR):
    folders = [f for f in os.listdir(IMAGE_DIR) if os.path.isdir(os.path.join(IMAGE_DIR, f))]
    print(f"‚úì Found {len(folders):,} card folders")
    
    # Count images
    total = 0
    for folder in folders[:100]:  # Sample first 100
        folder_path = os.path.join(IMAGE_DIR, folder)
        images = [f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg'))]
        total += len(images)
    
    avg_per_card = total / min(100, len(folders))
    print(f"‚úì ~{avg_per_card:.1f} images per card")
    print(f"‚úì Sample folders: {folders[:5]}")
else:
    print("‚ùå Image directory not found!")

## 3Ô∏è‚É£ Model Architecture

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import numpy as np
import cv2


class GeM(nn.Module):
    """Generalized Mean Pooling."""
    def __init__(self, p=3.0, eps=1e-6):
        super().__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps
    
    def forward(self, x):
        x = x.clamp(min=self.eps).pow(self.p)
        x = F.adaptive_avg_pool2d(x, 1).pow(1.0 / self.p)
        return x.view(x.size(0), -1)


class ColorHistogramBranch(nn.Module):
    """Explicit color feature extraction for similar card distinction."""
    
    def __init__(self, bins=32, output_dim=64):
        super().__init__()
        self.bins = bins
        self.fc = nn.Sequential(
            nn.Linear(bins * 3, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, output_dim)
        )
        self.register_buffer('mean', torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
        self.register_buffer('std', torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
    
    def forward(self, x):
        x_denorm = (x * self.std + self.mean) * 255
        x_denorm = x_denorm.clamp(0, 255)
        
        batch_size = x.shape[0]
        histograms = []
        
        for i in range(batch_size):
            img = x_denorm[i].permute(1, 2, 0).cpu().numpy().astype(np.uint8)
            hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
            
            h_hist = np.histogram(hsv[:,:,0], bins=self.bins, range=(0, 180))[0]
            s_hist = np.histogram(hsv[:,:,1], bins=self.bins, range=(0, 256))[0]
            v_hist = np.histogram(hsv[:,:,2], bins=self.bins, range=(0, 256))[0]
            
            hist = np.concatenate([h_hist, s_hist, v_hist]).astype(np.float32)
            hist = hist / (hist.sum() + 1e-8)
            histograms.append(hist)
        
        return self.fc(torch.tensor(np.stack(histograms), device=x.device, dtype=torch.float32))


class CardEmbeddingNetV2(nn.Module):
    """Color-aware card embedding network."""
    
    def __init__(self, embedding_dim=512, color_dim=64, pretrained=True):
        super().__init__()
        
        # Visual backbone
        self.backbone = timm.create_model('mobilenetv3_small_100', pretrained=pretrained,
                                          num_classes=0, global_pool='')
        
        with torch.no_grad():
            self.num_features = self.backbone(torch.randn(1, 3, 224, 224)).shape[1]
        
        self.gem = GeM(p=3.0)
        self.color_branch = ColorHistogramBranch(bins=32, output_dim=color_dim)
        self.fc = nn.Linear(self.num_features + color_dim, embedding_dim)
        self.bn = nn.BatchNorm1d(embedding_dim)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        visual = self.gem(self.backbone(x))
        color = self.color_branch(x)
        combined = torch.cat([visual, color], dim=1)
        embedding = self.dropout(self.bn(self.fc(combined)))
        return F.normalize(embedding, p=2, dim=1)


# Test model
model = CardEmbeddingNetV2()
out = model(torch.randn(2, 3, 224, 224))
print(f"‚úì Model output: {out.shape}")
print(f"‚úì Parameters: {sum(p.numel() for p in model.parameters()):,}")

## 4Ô∏è‚É£ CosFace Loss

In [None]:
class CosFaceLoss(nn.Module):
    """CosFace: Additive Cosine Margin Loss - more stable than ArcFace."""
    
    def __init__(self, num_classes, embedding_dim, scale=30.0, margin=0.35):
        super().__init__()
        self.scale = scale
        self.margin = margin
        self.weight = nn.Parameter(torch.FloatTensor(num_classes, embedding_dim))
        nn.init.xavier_uniform_(self.weight)
    
    def forward(self, embeddings, labels):
        W = F.normalize(self.weight, p=2, dim=1)
        cosine = F.linear(embeddings, W)
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, labels.view(-1, 1), 1.0)
        output = (cosine - one_hot * self.margin) * self.scale
        return F.cross_entropy(output, labels)

## 5Ô∏è‚É£ Dataset with Sim-to-Real Augmentation

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pathlib import Path


def get_train_transforms(size=224):
    """Sim-to-Real augmentations - simulate camera from perfect scans."""
    return A.Compose([
        A.Resize(size, size),
        A.Perspective(scale=(0.02, 0.05), p=0.3),
        A.Affine(scale=(0.97, 1.03), rotate=(-2, 2), p=0.3),
        A.OneOf([A.GaussianBlur(blur_limit=(3, 5)), A.MotionBlur(blur_limit=(3, 5))], p=0.2),
        A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.4),
        A.HueSaturationValue(hue_shift_limit=3, sat_shift_limit=10, val_shift_limit=10, p=0.2),
        A.GaussNoise(var_limit=(5, 20), p=0.2),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])


def get_val_transforms(size=224):
    return A.Compose([
        A.Resize(size, size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])


class CardDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)
        self.transform = transform
        
        self.card_folders = sorted([d for d in self.root_dir.iterdir() if d.is_dir()])
        self.class_to_idx = {f.name: i for i, f in enumerate(self.card_folders)}
        
        self.samples = []
        for folder in self.card_folders:
            for img in folder.glob("*"):
                if img.suffix.lower() in ['.png', '.jpg', '.jpeg']:
                    self.samples.append((img, self.class_to_idx[folder.name]))
        
        print(f"Dataset: {len(self.card_folders)} cards, {len(self.samples)} images")
    
    def __len__(self): return len(self.samples)
    
    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = np.array(Image.open(path).convert('RGB'))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, label
    
    def get_num_classes(self): return len(self.card_folders)


def create_dataloaders(root_dir, batch_size=64, num_workers=2):
    train_ds = CardDataset(root_dir, get_train_transforms())
    val_ds = CardDataset(root_dir, get_val_transforms())
    
    n = len(train_ds)
    indices = np.random.permutation(n)
    split = int(0.85 * n)
    
    train_loader = DataLoader(
        torch.utils.data.Subset(train_ds, indices[:split]),
        batch_size=batch_size, shuffle=True, num_workers=num_workers,
        pin_memory=True, drop_last=True
    )
    val_loader = DataLoader(
        torch.utils.data.Subset(val_ds, indices[split:]),
        batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True
    )
    
    return train_loader, val_loader, train_ds.get_num_classes()


# Test
if os.path.exists(IMAGE_DIR):
    train_loader, val_loader, num_classes = create_dataloaders(IMAGE_DIR, batch_size=4)
    print(f"‚úì Classes: {num_classes}")

## 6Ô∏è‚É£ Training Configuration

In [None]:
CONFIG = {
    'epochs': 100,
    'batch_size': 64,
    'learning_rate': 1e-3,
    'weight_decay': 1e-4,
    'embedding_dim': 512,
    'patience': 15,
    'unfreeze_epoch': 6,  # Unfreeze backbone after this epoch
}

print("Configuration:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

## 7Ô∏è‚É£ Train!

In [None]:
from torch.cuda.amp import autocast, GradScaler
from tqdm.notebook import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

# Directories
CHECKPOINT_DIR = '/content/card_recognition/checkpoints'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Data
train_loader, val_loader, num_classes = create_dataloaders(IMAGE_DIR, CONFIG['batch_size'])

# Model
model = CardEmbeddingNetV2(embedding_dim=CONFIG['embedding_dim']).to(device)

# Freeze backbone initially
for p in model.backbone.parameters():
    p.requires_grad = False

print(f"Trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

# Loss & Optimizer
criterion = CosFaceLoss(num_classes, CONFIG['embedding_dim']).to(device)
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                              lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['epochs'])
scaler = GradScaler()

In [None]:
# Training loop
best_loss = float('inf')
patience_counter = 0
history = {'train': [], 'val': []}

for epoch in range(1, CONFIG['epochs'] + 1):
    # Unfreeze backbone
    if epoch == CONFIG['unfreeze_epoch']:
        print(f"\nüîì Unfreezing backbone...")
        for p in model.backbone.parameters():
            p.requires_grad = True
        optimizer = torch.optim.AdamW([
            {'params': model.backbone.parameters(), 'lr': CONFIG['learning_rate'] / 10},
            {'params': model.gem.parameters()},
            {'params': model.color_branch.parameters()},
            {'params': model.fc.parameters()},
            {'params': model.bn.parameters()},
        ], lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
    
    # Train
    model.train()
    train_loss = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch}", leave=False):
        images, labels = images.to(device), labels.to(device)
        
        with autocast():
            loss = criterion(model(images), labels)
        
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    
    # Validate
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            val_loss += criterion(model(images), labels).item()
    val_loss /= len(val_loader)
    
    scheduler.step()
    history['train'].append(train_loss)
    history['val'].append(val_loss)
    
    print(f"Epoch {epoch}: Train={train_loss:.4f}, Val={val_loss:.4f}")
    
    # Save best
    if val_loss < best_loss:
        best_loss = val_loss
        patience_counter = 0
        torch.save({
            'epoch': epoch, 'model_state_dict': model.state_dict(),
            'val_loss': val_loss, 'num_classes': num_classes, 'config': CONFIG
        }, f"{CHECKPOINT_DIR}/best_model.pth")
        print(f"  üíæ Saved best model")
    else:
        patience_counter += 1
        if patience_counter >= CONFIG['patience']:
            print(f"\n‚ö†Ô∏è Early stopping!")
            break

print(f"\n‚úì Training complete! Best val loss: {best_loss:.4f}")

## 8Ô∏è‚É£ Export & Save

In [None]:
# Plot training curves
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 4))
plt.plot(history['train'], label='Train')
plt.plot(history['val'], label='Val')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training Progress')
plt.savefig(f"{CHECKPOINT_DIR}/training.png")
plt.show()

In [None]:
# Export to ONNX
checkpoint = torch.load(f"{CHECKPOINT_DIR}/best_model.pth")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

dummy = torch.randn(1, 3, 224, 224).to(device)
onnx_path = f"{CHECKPOINT_DIR}/card_recognition.onnx"

torch.onnx.export(model, dummy, onnx_path,
                  input_names=['image'], output_names=['embedding'],
                  dynamic_axes={'image': {0: 'batch'}, 'embedding': {0: 'batch'}},
                  opset_version=11)

print(f"‚úì ONNX: {onnx_path} ({os.path.getsize(onnx_path)/1024/1024:.1f} MB)")

In [None]:
# Save to Google Drive
DRIVE_OUTPUT = '/content/drive/MyDrive/CardRecognition_Models'
os.makedirs(DRIVE_OUTPUT, exist_ok=True)

import shutil
for f in ['best_model.pth', 'card_recognition.onnx', 'training.png']:
    src = f"{CHECKPOINT_DIR}/{f}"
    if os.path.exists(src):
        shutil.copy(src, DRIVE_OUTPUT)

print(f"\n‚úì Saved to: {DRIVE_OUTPUT}")
!ls -lh {DRIVE_OUTPUT}

---

## ‚úÖ Done!

**Next steps:**
1. Download from Google Drive: `CardRecognition_Models/`
2. On Jetson Nano, convert to TensorRT:
   ```bash
   trtexec --onnx=card_recognition.onnx --saveEngine=card.engine --fp16
   ```
3. Run inference at ~50 FPS!