# üé¥ Card Recognition V6 - Controlled Environment

**Optimized for card sorting machine with camera**

- Card always in frame (minimal geometric distortion)
- Handles all orientations: 0¬∞, 90¬∞, 180¬∞, 270¬∞
- Focus on lighting, blur, noise variations
- Frozen backbone (prevents overfitting)

---

## 1Ô∏è‚É£ Setup

In [None]:
!nvidia-smi
import torch
print(f"PyTorch: {torch.__version__}, CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
!pip install -q timm albumentations==1.3.1 opencv-python-headless tqdm imagehash

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 2Ô∏è‚É£ Load Data

In [None]:
import os, zipfile, json, random
from pathlib import Path
from PIL import Image, ImageOps
from tqdm.notebook import tqdm
from collections import Counter
import numpy as np
import cv2

ZIP_PATH = "/content/drive/MyDrive/CardData/card_images.zip"
IMAGE_DIR = "/content/card_images"
CHECKPOINT_DIR = '/content/checkpoints'
DRIVE_OUTPUT = '/content/drive/MyDrive/CardRecognition_Models'
CARD_JSON = '/content/drive/MyDrive/CardData/card-flattened-with-phash.json'

for d in [CHECKPOINT_DIR, DRIVE_OUTPUT]:
    os.makedirs(d, exist_ok=True)

if os.path.exists(f"{IMAGE_DIR}/.extracted"):
    print(f"‚úì Already extracted")
elif os.path.exists(ZIP_PATH):
    print("Extracting...")
    !rm -rf {IMAGE_DIR}
    os.makedirs(IMAGE_DIR, exist_ok=True)
    with zipfile.ZipFile(ZIP_PATH, 'r') as z:
        z.extractall(IMAGE_DIR)
    Path(f"{IMAGE_DIR}/.extracted").touch()
    print(f"‚úì Done")

In [None]:
if os.path.exists(f"{IMAGE_DIR}/.validated"):
    print("‚úì Already validated")
else:
    print("Validating...")
    corrupted = []
    for p in tqdm(list(Path(IMAGE_DIR).glob('*'))):
        if p.suffix.lower() in ['.jpg','.jpeg','.png','.webp']:
            try:
                with Image.open(p) as img: img.verify()
                with Image.open(p) as img: img.load()
            except:
                corrupted.append(p.name)
                p.unlink()
    Path(f"{IMAGE_DIR}/.validated").touch()
    print(f"‚úì Removed {len(corrupted)} corrupted")

In [None]:
with open(CARD_JSON, 'r') as f:
    all_cards = json.load(f)

card_lookup = {c['printing_unique_id']: c for c in all_cards}

def get_card_name(printing_id):
    card = card_lookup.get(printing_id, {})
    name = card.get('name', printing_id[:20])
    foil = card.get('foiling', '')
    if foil and foil != 'S':
        return f"{name} ({foil})"
    return name

print(f"‚úì Loaded {len(all_cards):,} cards")

## 3Ô∏è‚É£ Controlled Environment Augmentation

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

def get_controlled_augmentation(size=224):
    """
    Augmentation for CONTROLLED card sorting environment:
    - Card is always in frame and centered
    - 90/180/270 rotations handled in dataset
    - Focus on lighting, camera, and color variations
    """
    return A.Compose([
        A.Resize(size, size),
        
        # === MINIMAL GEOMETRIC (slight card placement variation) ===
        A.Rotate(limit=5, border_mode=cv2.BORDER_CONSTANT, value=0, p=0.3),
        A.Affine(scale=(0.95, 1.05), p=0.2),
        
        # === BLUR (camera focus, slight motion) ===
        A.OneOf([
            A.MotionBlur(blur_limit=(3, 5), p=1.0),
            A.GaussianBlur(blur_limit=(3, 5), p=1.0),
        ], p=0.4),
        
        # === LIGHTING (different ambient light conditions) ===
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=1.0),
            A.RandomGamma(gamma_limit=(70, 130), p=1.0),
            A.CLAHE(clip_limit=3.0, p=1.0),
        ], p=0.6),
        
        # === COLOR (LED vs daylight, white balance) ===
        A.OneOf([
            A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=15, p=1.0),
            A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=1.0),
            A.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=0.05, p=1.0),
        ], p=0.5),
        
        # === NOISE (camera sensor noise) ===
        A.OneOf([
            A.GaussNoise(p=1.0),
            A.ISONoise(intensity=(0.1, 0.3), p=1.0),
        ], p=0.3),
        
        # === NORMALIZE ===
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

def get_val_transforms(size=224):
    """Clean validation - no augmentation."""
    return A.Compose([
        A.Resize(size, size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

print("‚úì Controlled environment augmentation ready")
print("   Rotations: 0¬∞, 90¬∞, 180¬∞, 270¬∞ (in dataset)")
print("   Effects: blur, lighting, color, noise (in augmentation)")

In [None]:
# Visualize ALL rotations and augmentations
import matplotlib.pyplot as plt

sample_images = list(Path(IMAGE_DIR).glob('*.png'))[:1]  # One card
aug = get_controlled_augmentation()

print("="*60)
print("ROTATION SAMPLES (0¬∞, 90¬∞, 180¬∞, 270¬∞)")
print("="*60)

fig, axes = plt.subplots(1, 4, figsize=(16, 4))
img = np.array(Image.open(sample_images[0]).convert('RGB'))

rotations = [
    (img, '0¬∞ (Original)'),
    (cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE), '90¬∞'),
    (cv2.rotate(img, cv2.ROTATE_180), '180¬∞'),
    (cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE), '270¬∞'),
]

for i, (rotated, label) in enumerate(rotations):
    axes[i].imshow(rotated)
    axes[i].set_title(label, fontsize=14)
    axes[i].axis('off')

plt.suptitle('Card Rotations (Applied in Dataset)', fontsize=16)
plt.tight_layout()
plt.show()

print("\n" + "="*60)
print("AUGMENTATION SAMPLES (lighting, blur, color, noise)")
print("="*60)

fig, axes = plt.subplots(2, 4, figsize=(16, 8))

for row in range(2):
    for col in range(4):
        augmented = aug(image=img)['image']
        display = augmented.permute(1, 2, 0).numpy()
        display = display * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
        display = np.clip(display, 0, 1)
        axes[row, col].imshow(display)
        axes[row, col].set_title(f'Aug {row*4 + col + 1}')
        axes[row, col].axis('off')

plt.suptitle('Augmentation Effects (Lighting, Blur, Color, Noise)', fontsize=16)
plt.tight_layout()
plt.show()

print("\n‚úì Each card √ó 4 rotations = 4 training samples")
print("‚úì Each sample gets random augmentation (lighting/blur/color/noise)")

## 4Ô∏è‚É£ Model (Frozen Backbone)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

class GeM(nn.Module):
    def __init__(self, p=3.0):
        super().__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
    def forward(self, x):
        return F.adaptive_avg_pool2d(x.clamp(min=1e-6).pow(self.p), 1).pow(1./self.p).view(x.size(0), -1)

class CardNet(nn.Module):
    def __init__(self, emb_dim=512):
        super().__init__()
        self.backbone = timm.create_model('mobilenetv3_large_100', pretrained=True, 
                                          num_classes=0, global_pool='')
        # Freeze backbone permanently
        for p in self.backbone.parameters():
            p.requires_grad = False
        
        with torch.no_grad():
            self.n_feat = self.backbone(torch.randn(1, 3, 224, 224)).shape[1]
        
        self.gem = GeM()
        self.head = nn.Sequential(
            nn.Linear(self.n_feat, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, emb_dim),
            nn.BatchNorm1d(emb_dim),
        )
    
    def forward(self, x):
        features = self.gem(self.backbone(x))
        emb = self.head(features)
        return F.normalize(emb, p=2, dim=1)

print("‚úì CardNet ready (backbone FROZEN)")

## 5Ô∏è‚É£ Dataset

In [None]:
from torch.utils.data import Dataset, DataLoader

class CardDataset(Dataset):
    def __init__(self, image_dir, transform, rotations=[0, 90, 180, 270]):
        self.images = sorted([f for f in Path(image_dir).iterdir() 
                              if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp']])
        self.samples = [(i, r) for i in range(len(self.images)) for r in rotations]
        self.transform = transform
        print(f"Dataset: {len(self.images)} cards √ó {len(rotations)} rotations = {len(self.samples)} samples")
    
    def __len__(self): return len(self.samples)
    
    def __getitem__(self, idx):
        img_idx, rot = self.samples[idx]
        try:
            img = np.array(Image.open(self.images[img_idx]).convert('RGB'))
            # Apply 90/180/270 degree rotations
            if rot == 90: img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            elif rot == 180: img = cv2.rotate(img, cv2.ROTATE_180)
            elif rot == 270: img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
            return self.transform(image=img)['image'], img_idx
        except:
            return self.__getitem__(random.randint(0, len(self.samples) - 1))
    
    def get_num_classes(self): return len(self.images)

print("‚úì CardDataset ready (with 0¬∞, 90¬∞, 180¬∞, 270¬∞ rotations)")

## 6Ô∏è‚É£ Accuracy Monitor

In [None]:
class AccuracyMonitor:
    def __init__(self, model, images, device, n_test=100):
        self.model = model
        self.images = images
        self.device = device
        self.n_test = n_test
        self.transform = get_val_transforms()
        self.ref_embeddings = None
        self.ref_ids = None
        self.confusions = Counter()
    
    def build_references(self):
        self.model.eval()
        embeddings, ids = [], []
        with torch.no_grad():
            for img_path in self.images:
                try:
                    img = np.array(Image.open(img_path).convert('RGB'))
                    emb = self.model(self.transform(image=img)['image'].unsqueeze(0).to(self.device))
                    embeddings.append(emb.cpu())
                    ids.append(img_path.stem)
                except: pass
        self.ref_embeddings = torch.cat(embeddings, dim=0)
        self.ref_ids = ids
    
    def compute_accuracy(self):
        if self.ref_embeddings is None:
            self.build_references()
        
        self.model.eval()
        self.confusions.clear()
        
        test_idx = random.sample(range(len(self.images)), min(self.n_test, len(self.images)))
        top1, top5, top10 = 0, 0, 0
        
        with torch.no_grad():
            for idx in test_idx:
                img_path = self.images[idx]
                actual_id = img_path.stem
                try:
                    img = np.array(Image.open(img_path).convert('RGB'))
                    query = self.model(self.transform(image=img)['image'].unsqueeze(0).to(self.device)).cpu()
                    sims = F.cosine_similarity(query, self.ref_embeddings)
                    top_idx = sims.argsort(descending=True)[:10]
                    top_ids = [self.ref_ids[i] for i in top_idx]
                    
                    if actual_id == top_ids[0]: top1 += 1
                    if actual_id in top_ids[:5]: top5 += 1
                    if actual_id in top_ids[:10]: top10 += 1
                    
                    if actual_id != top_ids[0]:
                        self.confusions[(get_card_name(actual_id), get_card_name(top_ids[0]))] += 1
                except: pass
        
        n = len(test_idx)
        return {'top1': 100*top1/n, 'top5': 100*top5/n, 'top10': 100*top10/n}
    
    def print_confusions(self, n=5):
        top = self.confusions.most_common(n)
        if top:
            print(f"\n   üîÑ TOP {len(top)} CONFUSIONS:")
            for (a, p), c in top:
                print(f"      '{a}' ‚Üí '{p}' ({c}x)")

print("‚úì AccuracyMonitor ready")

## 7Ô∏è‚É£ Training Setup

In [None]:
class CosFaceLoss(nn.Module):
    def __init__(self, num_classes, emb_dim, scale=30.0, margin=0.35):
        super().__init__()
        self.scale, self.margin = scale, margin
        self.weight = nn.Parameter(torch.FloatTensor(num_classes, emb_dim))
        nn.init.xavier_uniform_(self.weight)
    
    def forward(self, emb, labels):
        W = F.normalize(self.weight, p=2, dim=1)
        cos = F.linear(emb, W)
        one_hot = torch.zeros_like(cos).scatter_(1, labels.view(-1, 1), 1.0)
        return F.cross_entropy((cos - one_hot * self.margin) * self.scale, labels)

CONFIG = {
    'epochs': 50,
    'batch_size': 64,
    'lr': 5e-4,
    'weight_decay': 1e-4,
    'emb_dim': 512,
    'patience': 20,
    'check_interval': 5,
}
print("‚úì Config:", CONFIG)

In [None]:
# Create datasets
train_ds = CardDataset(IMAGE_DIR, get_controlled_augmentation())
val_ds = CardDataset(IMAGE_DIR, get_val_transforms(), rotations=[0])

# Split
indices = np.random.permutation(len(train_ds.images))
split = int(0.85 * len(train_ds.images))
train_idx, val_idx = set(indices[:split]), set(indices[split:])

train_samples = [i for i, (c, _) in enumerate(train_ds.samples) if c in train_idx]
val_samples = [i for i, (c, _) in enumerate(val_ds.samples) if c in val_idx]

train_loader = DataLoader(torch.utils.data.Subset(train_ds, train_samples),
                          batch_size=CONFIG['batch_size'], shuffle=True, 
                          num_workers=2, pin_memory=True, drop_last=True)
val_loader = DataLoader(torch.utils.data.Subset(val_ds, val_samples),
                        batch_size=CONFIG['batch_size'], shuffle=False, 
                        num_workers=2, pin_memory=True)

num_classes = train_ds.get_num_classes()
print(f"‚úì Train: {len(train_samples):,} | Val: {len(val_samples):,} | Classes: {num_classes:,}")

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CardNet(emb_dim=CONFIG['emb_dim']).to(device)

trainable_params = [p for p in model.parameters() if p.requires_grad]
print(f"‚úì Trainable parameters: {sum(p.numel() for p in trainable_params):,}")

criterion = CosFaceLoss(num_classes, CONFIG['emb_dim']).to(device)
optimizer = torch.optim.AdamW(trainable_params + list(criterion.parameters()),
                              lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['epochs'])
scaler = torch.amp.GradScaler('cuda')

accuracy_monitor = AccuracyMonitor(model, train_ds.images, device, n_test=100)

print("‚úì Ready to train!")

## 8Ô∏è‚É£ Training

In [None]:
best_loss = float('inf')
patience_counter = 0
history = {'train': [], 'val': [], 'top1': [], 'top5': []}
RESUME_PATH = f"{CHECKPOINT_DIR}/best_model.pth"

print("="*70)
print("V6: CONTROLLED ENVIRONMENT TRAINING")
print("   Rotations: 0¬∞, 90¬∞, 180¬∞, 270¬∞")
print("   Augmentation: lighting, blur, color, noise")
print("="*70)

for epoch in range(1, CONFIG['epochs'] + 1):
    # Training
    model.train()
    train_loss = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch}", leave=False):
        images, labels = images.to(device), labels.to(device)
        with torch.amp.autocast('cuda'):
            loss = criterion(model(images), labels)
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            val_loss += criterion(model(images), labels).item()
    val_loss /= len(val_loader)
    
    scheduler.step()
    history['train'].append(train_loss)
    history['val'].append(val_loss)
    
    # Accuracy check
    if epoch % CONFIG['check_interval'] == 0:
        acc = accuracy_monitor.compute_accuracy()
        history['top1'].append(acc['top1'])
        history['top5'].append(acc['top5'])
        print(f"\nEpoch {epoch}: Loss={train_loss:.2f}/{val_loss:.2f} | "
              f"Top-1: {acc['top1']:.1f}% | Top-5: {acc['top5']:.1f}%")
        accuracy_monitor.print_confusions(n=5)
    else:
        print(f"Epoch {epoch}: Train={train_loss:.4f}, Val={val_loss:.4f}")
    
    # Save best
    if val_loss < best_loss:
        best_loss = val_loss
        patience_counter = 0
        torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(),
                    'val_loss': val_loss, 'num_classes': num_classes}, RESUME_PATH)
        print("  üíæ Saved")
    else:
        patience_counter += 1
        if patience_counter >= CONFIG['patience']:
            print("\n‚ö†Ô∏è Early stop!")
            break

print(f"\n‚úì Done! Best: {best_loss:.4f}")

## 9Ô∏è‚É£ Results

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(14, 4))

axes[0].plot(history['train'], label='Train')
axes[0].plot(history['val'], label='Val')
axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Loss'); axes[0].legend()
axes[0].set_title('Loss')

if history['top1']:
    x = list(range(CONFIG['check_interval'], len(history['top1'])*CONFIG['check_interval']+1, CONFIG['check_interval']))
    axes[1].plot(x, history['top1'], 'o-', label='Top-1')
    axes[1].plot(x, history['top5'], 's-', label='Top-5')
    axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('Accuracy %'); axes[1].legend()
    axes[1].set_title('Accuracy')

plt.tight_layout()
plt.savefig(f"{CHECKPOINT_DIR}/training.png")
plt.show()

In [None]:
import shutil
for f in ['best_model.pth', 'training.png']:
    src = f"{CHECKPOINT_DIR}/{f}"
    if os.path.exists(src):
        shutil.copy(src, DRIVE_OUTPUT)
        print(f"‚úì Saved {f} to Drive")

## üîü Test with Real Images

In [None]:
print("Building reference embeddings...")
ckpt = torch.load(RESUME_PATH)
model.load_state_dict(ckpt['model_state_dict'])
model.eval()

ref_embeddings, ref_names = [], []
with torch.no_grad():
    for img_path in tqdm(train_ds.images, desc="Building refs"):
        try:
            img = np.array(Image.open(img_path).convert('RGB'))
            emb = model(get_val_transforms()(image=img)['image'].unsqueeze(0).to(device))
            ref_embeddings.append(emb.cpu())
            ref_names.append(img_path.stem)
        except: pass

ref_embeddings = torch.cat(ref_embeddings, dim=0)
print(f"‚úì {len(ref_embeddings):,} reference embeddings")

In [None]:
def identify_card(img_path, top_k=5):
    """
    Identify a card - works with any orientation!
    No preprocessing needed.
    """
    pil_img = Image.open(img_path).convert('RGB')
    img_array = np.array(pil_img)
    
    model.eval()
    with torch.no_grad():
        tensor = get_val_transforms()(image=img_array)['image'].unsqueeze(0).to(device)
        query_emb = model(tensor).cpu()
    
    sims = F.cosine_similarity(query_emb, ref_embeddings)
    top_indices = sims.argsort(descending=True)[:top_k]
    
    results = []
    for idx in top_indices:
        printing_id = ref_names[idx]
        card = card_lookup.get(printing_id, {})
        results.append({
            'name': card.get('name', 'Unknown'),
            'confidence': sims[idx].item(),
            'set': card.get('set_id', ''),
            'foil': card.get('foiling', '')
        })
    
    return results

print("‚úì identify_card() ready (handles any orientation)")

In [None]:
from google.colab import files

print("Upload camera images to test (any orientation):")
uploaded = files.upload()

for filename in uploaded.keys():
    print(f"\n{'='*60}")
    results = identify_card(filename, top_k=5)
    
    print(f"üé¥ TOP 5 MATCHES:")
    for i, r in enumerate(results):
        status = "‚úì" if i == 0 else " "
        print(f"   {status} {i+1}. {r['name']} ({r['confidence']*100:.1f}%)")
        print(f"        Set: {r['set']} | Foil: {r['foil']}")
    
    plt.figure(figsize=(6, 8))
    plt.imshow(Image.open(filename))
    plt.title(f"Top: {results[0]['name']} ({results[0]['confidence']*100:.1f}%)")
    plt.axis('off')
    plt.show()

## ‚úÖ V6 Complete!

**Handles all orientations:**
- 0¬∞ (upright)
- 90¬∞ (horizontal right)
- 180¬∞ (upside down)
- 270¬∞ (horizontal left)

**Controlled environment augmentation:**
- Minimal geometric distortion
- Lighting, blur, color, noise variations

**No preprocessing at inference!**