# Google ViT 
## Initalization

In [None]:
# Cell 0 
import sys
from pathlib import Path

notebook_dir = Path.cwd()
project_root = notebook_dir.parent if notebook_dir.name == 'notebooks' else notebook_dir

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from transformers import ViTForImageClassification
import random
from torchvision import transforms
import albumentations as A
from src.transforms import base_transform
from src.dataset import FER2013Dataset
from src.config import (
    DEVICE, 
    NUM_LABELS, 
    EMOTION_LABELS,
    DEFAULT_BATCH_SIZE,
    DEFAULT_LEARNING_RATE
)
from tqdm.notebook import tqdm
import torch
from torch.optim import AdamW
from src.train import train_model

print(f"Using device: {DEVICE}")

MODEL_NAME = "google/vit-base-patch16-224-in21k"

### Weights and Biases 

In [None]:
# Cell 1 
from src.wandb_utils import login, check_wandb_mode, sync_offline_runs

# "online", "offline", or "disabled"
# If set to offlien dont forget to sink
WANDB_MODE = "online" 

print("Initializing Weights & Biases...")
current_mode = login(
    project="emotion-classifier-vit",
    mode=WANDB_MODE
)

print(f"W&B initialized successfully in {current_mode.upper()} mode!")

In [None]:

from src.wandb_utils import *

# Weights and Biases Util Commands 

# Check current mode
# check_wandb_mode()

# Sync offline runs (when you have internet)
sync_offline_runs()

# List available offline runs
# list_offline_runs()

# Change mode 
# set_wandb_mode("online")  

# Set Confirm to False for a Dry Run
# clear_offline_runs(confirm=True)


---
##  Fine Tuning Section
Using FER2013 dataset.

### Tranformations 

In [None]:

# Simpler transformation sets without deprecated parameters
transform_configs = {
    "none": base_transform(),  # Use the base transforms from transforms.py
    
    "light": A.Compose([
        A.HorizontalFlip(p=0.3),
        A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.3),
        A.Affine(translate_percent=0.05, scale=(0.95, 1.05), rotate=(-10, 10), p=0.3),
        *base_transform()  # Include base transforms at the end
    ]),
    
    "medium": A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
        A.Affine(translate_percent=0.1, scale=(0.9, 1.1), rotate=(-15, 15), p=0.5),
        A.GaussianBlur(blur_limit=(3, 7), p=0.3),
        *base_transform()  # Include base transforms at the end
    ]),
    
    "heavy": A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
        A.Affine(translate_percent=0.15, scale=(0.85, 1.15), rotate=(-20, 20), p=0.5),
        A.GaussianBlur(blur_limit=(3, 7), p=0.4),
        A.GridDropout(ratio=0.1, p=0.3),
        *base_transform()  # Include base transforms at the end
    ])
}

print("Transformation Configs Loaded")

### Hyper Parameter Queue

In [None]:
# Define experiment configurations
EPOCHS = 6

experiment_configs = [
    # Baseline with different transforms
    {
        "name": "baseline_none",
        "transform_key": "none",
        "epochs": EPOCHS,
        "learning_rate": DEFAULT_LEARNING_RATE,
        "batch_size": DEFAULT_BATCH_SIZE,
        "weight_decay": 0.01
    },
    {
        "name": "baseline_light",
        "transform_key": "light", 
        "epochs": EPOCHS,
        "learning_rate": DEFAULT_LEARNING_RATE,
        "batch_size": DEFAULT_BATCH_SIZE,
        "weight_decay": 0.01
    },
    {
        "name": "baseline_medium",
        "transform_key": "medium",
        "epochs": EPOCHS, 
        "learning_rate": DEFAULT_LEARNING_RATE,
        "batch_size": DEFAULT_BATCH_SIZE,
        "weight_decay": 0.01
    },
    {
        "name": "baseline_heavy",
        "transform_key": "heavy",
        "epochs": EPOCHS,
        "learning_rate": DEFAULT_LEARNING_RATE, 
        "batch_size": DEFAULT_BATCH_SIZE,
        "weight_decay": 0.01
    },
]

print(f"{len(experiment_configs)} Experiment Configs Loaded")

### Training Loop

In [None]:

all_results = {}

for i, config in enumerate(tqdm(experiment_configs, desc="Training Experiments")):
    print(f"\n Experiment {i+1}/{len(experiment_configs)}: {config['name']}")
    print(f"   Transform: {config['transform_key']}, LR: {config['learning_rate']}, Epochs: {config['epochs']}")
    
    transform = transform_configs[config['transform_key']]
    
    train = FER2013Dataset(
        split="train",
        transform=transform
    )
    valid = FER2013Dataset(
        split="valid", 
        transform=base_transform()  
    )
    
    model = ViTForImageClassification.from_pretrained(
        MODEL_NAME,
        num_labels=NUM_LABELS,
        ignore_mismatched_sizes=True
    ).to(DEVICE)
    
    optimizer = AdamW(
        model.parameters(), 
        lr=config['learning_rate'],
        weight_decay=config['weight_decay']
    )
    
    print(f"Training with {sum(p.numel() for p in model.parameters()):,} parameters")
    
    model_exp, history_exp, run_folder_exp = train_model(
        model=model,
        optimizer=optimizer,
        train_dataset=train,
        val_dataset=valid,
        num_epochs=config['epochs'],
        batch_size=config['batch_size'],
        device=DEVICE,
        model_name=config['name'],  
        use_wandb=True,
        wandb_config={
            "learning_rate": config['learning_rate'],
            "batch_size": config['batch_size'],
            "epochs": config['epochs'],
            "weight_decay": config['weight_decay'],
            "model_name": "vit_base_patch16_224",
            "architecture": "ViT", 
            "dataset": "FER2013",
            "transform_set": config['transform_key'],
            "experiment_name": config['name']
        }
    )
    

    all_results[config['name']] = {
        'model': model_exp,
        'history': history_exp,
        'run_folder': run_folder_exp,
        'config': config,
        'best_val_accuracy': max(history_exp['val_acc']),      
        'best_val_loss': min(history_exp['val_loss']),
        'final_train_accuracy': history_exp['train_acc'][-1],  
        'final_train_loss': history_exp['train_loss'][-1]
    }
    
    print(f"   Completed: {config['name']}")
    print(f"   Best Val Accuracy: {all_results[config['name']]['best_val_accuracy']:.4f}")
    print(f"   Run folder: {run_folder_exp}")
    
    # Clean up to free memory (optional but helpful)
    del model_exp, optimizer
    torch.cuda.empty_cache() if str(DEVICE) == 'cuda' else None  # Fixed device check

---
## Evaluation
### Metrics

In [None]:
# Cell 9: Independent evaluation (can run after kernel restart)
from src.evaluate import evaluate_all_saved_models
from src.dataset import FER2013Dataset
from src.transforms import base_transform
import matplotlib.pyplot as plt

print("üß™ Starting INDEPENDENT evaluation of all saved models...")

# Load test dataset
test_ds = FER2013Dataset(
    split="test", 
    transform=base_transform()
)

print(f"Test dataset size: {len(test_ds)}")

# Evaluate all saved models (no need for all_results in memory)
summary_data = evaluate_all_saved_models(test_ds)

print("\n‚úÖ All saved models evaluated and summarized!")
print(f"üìä Performance plot saved to: experiment_performance_comparison.png")

# Show best model details
if summary_data:
    best_exp = summary_data[0]
    print(f"\nüèÜ Best model: {best_exp['experiment']}")
    print(f"   Test Accuracy: {best_exp['test_accuracy']:.4f}")
    print(f"   Transform: {best_exp['transform']}")
    print(f"   Run Folder: {best_exp['run_folder']}")
else:
    print("‚ùå No models were successfully evaluated")

In [None]:
# Cell 9A: Evaluate specific experiments using your experiment_configs
from src.evaluate import evaluate_from_experiment_configs
from src.dataset import FER2013Dataset
from src.transforms import base_transform
import matplotlib.pyplot as plt

print("üß™ Evaluating specific experiments from config...")

# Load test dataset
test_ds = FER2013Dataset(
    split="test", 
    transform=base_transform()
)

print(f"Test dataset size: {len(test_ds)}")

# Evaluate using your experiment_configs (finds latest runs automatically)
summary_data = evaluate_from_experiment_configs(experiment_configs, test_ds)

print("\n‚úÖ Specific experiments evaluated!")
print(f"üìä Performance plot saved to: experiment_performance_comparison.png")

# Show best model details
if summary_data:
    best_exp = summary_data[0]
    print(f"\nüèÜ Best model: {best_exp['experiment']}")
    print(f"   Run: {best_exp['run_name']}")
    print(f"   Test Accuracy: {best_exp['test_accuracy']:.4f}")
    print(f"   Transform: {best_exp['transform']}")

---
##  Test Predictions
Let's visualize some predictions from the trained model.

In [None]:
# Visualize random predictions from test set
def predict_and_visualize(dataset, index, model, processor):
    """Get an image from the dataset, run model prediction, and display results."""
    
    img, true_label = dataset[index]
    img_pil = transforms.ToPILImage()(img)
    
    # Run model
    model.eval()
    model.to(DEVICE)
    inputs = processor(images=img_pil, return_tensors="pt")
    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)

    # Post-process
    probs = torch.softmax(outputs.logits, dim=-1)[0]
    pred_label = torch.argmax(probs).item()
    confidence = probs[pred_label].item()
     
    # Visualize
    print(f"Predicted Label: {EMOTION_LABELS[pred_label]} (Confidence: {confidence:.2%})")
    print(f"True Label:      {EMOTION_LABELS[true_label]}")
    
    # Show top 3 predictions
    top3_probs, top3_idx = torch.topk(probs, 3)
    print("\nTop 3 Predictions:")
    for i, (prob, idx) in enumerate(zip(top3_probs, top3_idx)):
        print(f"  {i+1}. {EMOTION_LABELS[idx]}: {prob:.2%}")
    
    plt.figure(figsize=(6, 6))
    plt.imshow(img_pil, cmap='gray')
    plt.title(f"Predicted: {EMOTION_LABELS[pred_label]}\nTrue: {EMOTION_LABELS[true_label]}")
    plt.axis("off")
    plt.tight_layout()
    plt.show()
    
    return true_label, pred_label, confidence


print("Testing predictions AFTER training:\n")

num_samples = 5
for i in range(num_samples):
    print(f"\n{'='*70}")
    print(f"Sample {i+1}/{num_samples}")
    print('='*70)
    idx = random.randint(0, len(test_ds)-1)
    true, pred, conf = predict_and_visualize(test_ds, idx, model, processor)

## Debug 
### Reload Failed Models from Backup Checkpoint

In [None]:
# Resume Training from Last Backup
from src.backup import resume_training
import json
from pathlib import Path


CHECKPOINTS_DIR = Path("C:/Users/rayrc/OneDrive/Documents/ML/Emotion Classifier ViT/checkpoints")

MODELS_TO_RESUME = [
    "baseline_heavy",
]

for model_folder in MODELS_TO_RESUME:
    print(f"\n{'='*70}")
    print(f"Resuming: {model_folder}")
    print(f"{'='*70}")
    
    try:
        run_folder = CHECKPOINTS_DIR / model_folder
        
        # Load training parameters to get original settings
        params_path = run_folder / "training_parameters.json"
        with open(params_path, 'r') as f:
            training_params = json.load(f)
        
        # Create fresh model and datasets
        model = ViTForImageClassification.from_pretrained(
            "google/vit-base-patch16-224-in21k",
            num_labels=7,
            ignore_mismatched_sizes=True
        ).to("cuda")
        
        # Determine transform
        transform_key = "none"
        if 'heavy' in model_folder.lower():
            transform_key = "heavy"
        elif 'medium' in model_folder.lower():
            transform_key = "medium"
        elif 'light' in model_folder.lower():
            transform_key = "light"
        
        transform = transform_configs[transform_key]
        
        train_ds = FER2013Dataset(split="train", transform=transform)
        val_ds = FER2013Dataset(split="valid", transform=base_transform())
        
        optimizer = AdamW(
            model.parameters(), 
            lr=training_params['learning_rate'],
            weight_decay=training_params['optimizer_params']['weight_decay']
        )
        
        # Resume training
        model_resumed, history, new_run_folder = resume_training(
            run_folder=run_folder,
            model=model,
            optimizer=optimizer,
            train_dataset=train_ds,
            val_dataset=val_ds,
            num_epochs=training_params['num_epochs'], 
            batch_size=training_params['batch_size'],
            device="cuda",
            model_name=f"resumed_{model_folder}",
            use_wandb=False
        )
        
        print(f"Successfully resumed: {model_folder}")
        print(f"New run folder: {new_run_folder}")
        
    except Exception as e:
        print(f"Failed to resume {model_folder}: {e}")