In [None]:
# Check GPU
!nvidia-smi

import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Clone your repo (or upload files)
# Option 1: Clone from GitHub
!git clone https://github.com/Surya-Narayan-M/kortex_5th_sem.git
%cd kortex_5th_sem/sign_to_text

# Option 2: If files are in Drive, copy them
# !cp -r /content/drive/MyDrive/kortex_5th_sem .

In [None]:
# Install dependencies
!pip install tqdm pandas numpy scipy

In [None]:
# Setup paths for Colab
import os
import sys
from pathlib import Path

# Add project to path
sys.path.insert(0, '/content/kortex_5th_sem/sign_to_text')

# Create symlinks or copy data
# You need to upload your preprocessed data to Drive first!
DRIVE_DATA_PATH = "/content/drive/MyDrive/kortex_data"  # Change this to your path

# Check if data exists
if os.path.exists(DRIVE_DATA_PATH):
    print(f"Data found at {DRIVE_DATA_PATH}")
    !ln -sf {DRIVE_DATA_PATH}/output_preprocessed /content/output_preprocessed
    !ln -sf {DRIVE_DATA_PATH}/iSign_v1.1.csv /content/iSign_v1.1.csv
else:
    print(f"⚠️ Data not found! Upload to: {DRIVE_DATA_PATH}")
    print("Required files:")
    print("  - output_preprocessed/ (folder with .npy files)")
    print("  - iSign_v1.1.csv")

In [None]:
# Modified training config for Colab
import json
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.amp import autocast
from tqdm.notebook import tqdm  # Notebook version of tqdm
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(message)s', datefmt='%H:%M:%S')
logger = logging.getLogger(__name__)

class ColabConfig:
    """Training configuration for Colab GPU (T4/V100)"""
    
    # Data paths - COLAB PATHS
    data_dir = Path("/content/output_preprocessed")
    csv_path = Path("/content/iSign_v1.1.csv")
    vocab_path = Path("/content/kortex_5th_sem/sign_to_text/vocabulary.pkl")
    checkpoint_dir = Path("/content/drive/MyDrive/kortex_checkpoints")  # Save to Drive!
    
    # Model architecture (same as local)
    input_dim = 414
    hidden_dim = 384
    embedding_dim = 256
    encoder_layers = 3
    decoder_layers = 2
    num_heads = 4
    dropout = 0.3
    
    # Training - Colab optimized (T4 has 16GB, V100 has 16-32GB)
    batch_size = 16  # Larger batch on Colab!
    gradient_accumulation = 2  # Effective batch = 32
    epochs = 100
    learning_rate = 1e-3
    min_lr = 1e-6
    weight_decay = 1e-5
    warmup_epochs = 5
    
    # CTC/Attention balance
    ctc_weight_start = 0.5
    ctc_weight_end = 0.2
    ctc_weight_decay_epochs = 30
    
    # Label smoothing
    label_smoothing = 0.1
    
    # Teacher forcing schedule
    tf_start = 1.0
    tf_end = 0.3
    tf_decay_epochs = 40
    
    # Early stopping
    patience = 15
    min_delta = 0.001
    
    # Device
    device = "cuda"
    use_amp = True
    
    # Data loading - Colab can use more workers
    num_workers = 2  # Colab has limited CPU
    pin_memory = True
    prefetch_factor = 2
    persistent_workers = True
    
    # Sequence length limits
    max_src_len = 500
    max_tgt_len = 100
    
    # Validation split
    val_ratio = 0.1
    seed = 42

print("Colab config loaded!")
print(f"Batch size: {ColabConfig.batch_size} x {ColabConfig.gradient_accumulation} = {ColabConfig.batch_size * ColabConfig.gradient_accumulation}")

In [None]:
# Import model and training components from your repo
from model_hybrid import HybridCTCAttentionModel

# Test model creation
model = HybridCTCAttentionModel(
    input_dim=414,
    hidden_dim=384,
    vocab_size=73
).cuda()

print(f"Model parameters: {model.get_num_params():,}")
print(f"Model size: {model.get_model_size_mb():.2f} MB")

# Quick forward test
with torch.no_grad():
    x = torch.randn(4, 100, 414).cuda()
    lens = torch.tensor([100, 90, 80, 70]).cuda()
    tgt = torch.randint(0, 73, (4, 20)).cuda()
    ctc_out, attn_out = model(x, lens, tgt)
    print(f"CTC output: {ctc_out.shape}")
    print(f"Attn output: {attn_out.shape}")
print("✅ Model works!")

In [None]:
# Import training components
from train_hybrid import (
    SignLanguageDataset, 
    collate_fn, 
    HybridLoss,
    Trainer
)

# Patch the Trainer to use Colab config
class ColabTrainer(Trainer):
    def __init__(self):
        # Use Colab config instead
        super().__init__(ColabConfig())

print("Training components imported!")

In [None]:
# Create trainer and start training!
trainer = ColabTrainer()

print(f"Train samples: {len(trainer.train_dataset)}")
print(f"Val samples: {len(trainer.val_dataset)}")
print(f"Batches per epoch: {len(trainer.train_loader)}")

In [None]:
# Start training!
# To resume from checkpoint: trainer.train(resume_from="/content/drive/MyDrive/kortex_checkpoints/latest.pth")
trainer.train()

In [None]:
# Visualize training (run after training or during breaks)
import matplotlib.pyplot as plt

history = trainer.history

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Loss
axes[0,0].plot(history['train_loss'], label='Train')
axes[0,0].plot(history['val_loss'], label='Val')
axes[0,0].set_title('Total Loss')
axes[0,0].legend()

# Accuracy
axes[0,1].plot([a*100 for a in history['train_acc']], label='Train')
axes[0,1].plot([a*100 for a in history['val_acc']], label='Val')
axes[0,1].set_title('Accuracy (%)')
axes[0,1].legend()

# CTC Loss
axes[1,0].plot(history['train_ctc_loss'], label='Train CTC')
axes[1,0].plot(history['val_ctc_loss'], label='Val CTC')
axes[1,0].set_title('CTC Loss')
axes[1,0].legend()

# Attention Loss
axes[1,1].plot(history['train_attn_loss'], label='Train Attn')
axes[1,1].plot(history['val_attn_loss'], label='Val Attn')
axes[1,1].set_title('Attention Loss')
axes[1,1].legend()

plt.tight_layout()
plt.savefig('/content/drive/MyDrive/kortex_checkpoints/training_curves.png')
plt.show()

print(f"Best Val Loss: {min(history['val_loss']):.4f}")
print(f"Best Val Acc: {max(history['val_acc'])*100:.2f}%")

In [None]:
# Save final model to Drive
!cp /content/kortex_5th_sem/sign_to_text/checkpoints_hybrid/* /content/drive/MyDrive/kortex_checkpoints/
print("Checkpoints saved to Google Drive!")