# Emoji Vendor Classification

This notebook trains a model to classify emoji images by their visual origin (Apple, Google, Samsung, Facebook, WhatsApp, Messenger, Mozilla).

**Authors**: [Your Team Names]

**Competition**: Computer Vision 2025 Final Project

## 1. Setup and Imports

In [None]:
# Install dependencies (uncomment if running on Colab/Kaggle)
# !pip install torch torchvision pandas numpy Pillow scikit-learn tqdm matplotlib seaborn timm albumentations

In [None]:
import os
import random
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR

import torchvision.transforms as T
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

import timm

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Configuration

In [None]:
# ============== CONFIGURATION ==============
# Random seed for reproducibility
SEED = 42

# Paths - UPDATE THESE FOR YOUR SETUP
DATA_DIR = Path("2-computer-vision-2025-b-sc-aidams-final-proj")
TRAIN_DIR = DATA_DIR / "train"
TEST_DIR = DATA_DIR / "test"
TRAIN_LABELS_PATH = DATA_DIR / "train_labels.csv"

# For Kaggle, use:
# DATA_DIR = Path("/kaggle/input/2-computer-vision-2025-b-sc-aidams-final-proj")

# Class mappings
LABEL_TO_IDX = {
    'apple': 0, 'facebook': 1, 'google': 2, 'messenger': 3,
    'mozilla': 4, 'samsung': 5, 'whatsapp': 6
}
IDX_TO_LABEL = {v: k for k, v in LABEL_TO_IDX.items()}
NUM_CLASSES = 7

# Training settings
IMG_SIZE = 224  # For pretrained models
BATCH_SIZE = 32
NUM_EPOCHS = 25
LEARNING_RATE = 1e-4
VAL_SPLIT = 0.2

# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

In [None]:
def set_seed(seed=SEED):
    """Set random seeds for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(SEED)

## 3. Data Loading and Exploration

In [None]:
# Load training labels
df = pd.read_csv(TRAIN_LABELS_PATH)
df['label_idx'] = df['Label'].map(LABEL_TO_IDX)
df['Id'] = df['Id'].astype(str).str.zfill(5)

print(f"Total training samples: {len(df)}")
print(f"\nClass distribution:")
print(df['Label'].value_counts())

In [None]:
# Visualize class distribution
plt.figure(figsize=(10, 5))
df['Label'].value_counts().plot(kind='bar')
plt.title('Class Distribution')
plt.xlabel('Vendor')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Show sample images from each class
fig, axes = plt.subplots(2, 7, figsize=(14, 4))

for idx, label in enumerate(LABEL_TO_IDX.keys()):
    samples = df[df['Label'] == label].sample(2, random_state=SEED)
    for row, (_, sample) in enumerate(samples.iterrows()):
        img_path = TRAIN_DIR / f"{sample['Id']}.png"
        img = Image.open(img_path)
        axes[row, idx].imshow(img)
        axes[row, idx].axis('off')
        if row == 0:
            axes[row, idx].set_title(label, fontsize=10)

plt.suptitle('Sample Emojis by Vendor', fontsize=12)
plt.tight_layout()
plt.show()

## 4. Dataset and Transforms

In [None]:
class EmojiDataset(Dataset):
    """Dataset class for emoji images."""
    
    def __init__(self, image_ids, labels=None, image_dir=TRAIN_DIR, transform=None):
        self.image_ids = image_ids
        self.labels = labels
        self.image_dir = Path(image_dir)
        self.transform = transform
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_path = self.image_dir / f"{img_id}.png"
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        if self.labels is not None:
            return image, self.labels[idx]
        return image, -1

In [None]:
# Define transforms
train_transform = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(degrees=15),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Split data
train_df, val_df = train_test_split(
    df, test_size=VAL_SPLIT, random_state=SEED, stratify=df['label_idx']
)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")

In [None]:
# Create datasets and dataloaders
train_dataset = EmojiDataset(
    image_ids=train_df['Id'].tolist(),
    labels=train_df['label_idx'].tolist(),
    image_dir=TRAIN_DIR,
    transform=train_transform
)

val_dataset = EmojiDataset(
    image_ids=val_df['Id'].tolist(),
    labels=val_df['label_idx'].tolist(),
    image_dir=TRAIN_DIR,
    transform=val_transform
)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

## 5. Model Definition

In [None]:
# Create EfficientNet-B0 model with pretrained weights
model = timm.create_model(
    'efficientnet_b0',
    pretrained=True,
    num_classes=NUM_CLASSES,
    drop_rate=0.3
)

model = model.to(DEVICE)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

## 6. Training

In [None]:
# Calculate class weights to handle imbalanced data
def get_class_weights(df):
    """Calculate inverse frequency weights for each class."""
    class_counts = df['label_idx'].value_counts().sort_index()
    total = len(df)
    weights = total / (len(class_counts) * class_counts)
    return torch.FloatTensor(weights.values)

class_weights = get_class_weights(train_df).to(DEVICE)
print("Class weights (higher = rarer class):")
for idx, weight in enumerate(class_weights):
    print(f"  {IDX_TO_LABEL[idx]:12s}: {weight:.3f}")

# Loss with class weights, optimizer, scheduler
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)

In [None]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return running_loss / total, 100. * correct / total


@torch.no_grad()
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    class_correct = defaultdict(int)
    class_total = defaultdict(int)
    
    for images, labels in tqdm(loader, desc="Validating", leave=False):
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        for label, pred in zip(labels.cpu().numpy(), predicted.cpu().numpy()):
            class_total[label] += 1
            if label == pred:
                class_correct[label] += 1
    
    class_acc = {IDX_TO_LABEL[k]: 100. * class_correct[k] / class_total[k] 
                 for k in sorted(class_total.keys())}
    
    return running_loss / total, 100. * correct / total, class_acc

In [None]:
# Training loop
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
best_val_acc = 0.0
best_model_state = None

print(f"Training on {DEVICE}")
print("=" * 60)

start_time = time.time()

for epoch in range(NUM_EPOCHS):
    epoch_start = time.time()
    
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, DEVICE)
    val_loss, val_acc, class_acc = validate(model, val_loader, criterion, DEVICE)
    
    scheduler.step()
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    epoch_time = time.time() - epoch_start
    
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} ({epoch_time:.1f}s)")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = model.state_dict().copy()
        print(f"  -> New best model! (Val Acc: {val_acc:.2f}%)")
    
    print()

total_time = time.time() - start_time
print("=" * 60)
print(f"Training completed in {total_time/60:.1f} minutes")
print(f"Best validation accuracy: {best_val_acc:.2f}%")

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(history['train_loss'], label='Train')
axes[0].plot(history['val_loss'], label='Validation')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()

axes[1].plot(history['train_acc'], label='Train')
axes[1].plot(history['val_acc'], label='Validation')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()

plt.tight_layout()
plt.show()

## 7. Per-Class Analysis

In [None]:
# Load best model and evaluate
model.load_state_dict(best_model_state)
_, _, class_acc = validate(model, val_loader, criterion, DEVICE)

print("Per-class accuracy on validation set:")
print("-" * 30)
for label, acc in sorted(class_acc.items(), key=lambda x: x[1], reverse=True):
    print(f"{label:12s}: {acc:5.2f}%")

In [None]:
# Visualize per-class accuracy
plt.figure(figsize=(10, 5))
labels = list(class_acc.keys())
accs = list(class_acc.values())
plt.bar(labels, accs)
plt.axhline(y=best_val_acc, color='r', linestyle='--', label=f'Overall: {best_val_acc:.1f}%')
plt.xlabel('Vendor')
plt.ylabel('Accuracy (%)')
plt.title('Per-Class Validation Accuracy')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 8. Generate Submission

In [None]:
# Get test IDs
test_files = sorted(TEST_DIR.glob("*.png"))
test_ids = [f.stem for f in test_files]
print(f"Test samples: {len(test_ids)}")

# Create test dataset and loader
test_dataset = EmojiDataset(
    image_ids=test_ids,
    labels=None,
    image_dir=TEST_DIR,
    transform=val_transform
)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [None]:
# Make predictions
@torch.no_grad()
def predict(model, loader, device):
    model.eval()
    predictions = []
    
    for images, _ in tqdm(loader, desc="Predicting"):
        images = images.to(device)
        outputs = model(images)
        _, preds = outputs.max(1)
        predictions.extend(preds.cpu().numpy())
    
    return np.array(predictions)

predictions = predict(model, test_loader, DEVICE)
print(f"Generated {len(predictions)} predictions")

In [None]:
# Create submission file
submission = pd.DataFrame({
    'ID': test_ids,
    'Label': predictions
})

submission.to_csv('submission.csv', index=False)
print("Submission saved to submission.csv")
print(f"\nPrediction distribution:")
print(pd.Series([IDX_TO_LABEL[p] for p in predictions]).value_counts())

In [None]:
# Preview submission
submission.head(10)

## 9. Save Model

In [None]:
# Save the best model
torch.save({
    'model_state_dict': best_model_state,
    'best_val_acc': best_val_acc,
    'history': history,
    'config': {
        'model': 'efficientnet_b0',
        'img_size': IMG_SIZE,
        'num_classes': NUM_CLASSES,
        'batch_size': BATCH_SIZE,
        'learning_rate': LEARNING_RATE,
        'epochs': NUM_EPOCHS
    }
}, 'best_model.pth')

print("Model saved to best_model.pth")