In [1]:
# Cell 1: Imports & Configuration
import os, gc, warnings, psutil
import numpy as np, pandas as pd
from PIL import Image
from os.path import exists
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torchvision.transforms as T, torchvision.models as models
from tqdm.auto import tqdm
import librosa
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")

CONFIG = {
    "data_path": "/kaggle/input/processed-media/processed_data",
    "existing_models_path": "/kaggle/input/3-models-trained",  # Add your saved models dataset here
    "models_path": "/kaggle/working/models",
    "batch_size": 24,  # Slightly increased for faster training
    "video_batch": 6,   # Increased for efficiency
    "epochs": 12,       # Reduced epochs with optimizations
    "lr": 1.5e-3,       # Slightly higher learning rate
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "img_size": (224, 224),
    "seed": 42,
    "sr": 16000,
    "n_mels": 128,
    "audio_dur": 2.5    # Slightly reduced audio duration for faster processing
}

os.makedirs(CONFIG["models_path"], exist_ok=True)
torch.manual_seed(CONFIG["seed"])
np.random.seed(CONFIG["seed"])
print("Device:", CONFIG["device"])
print("Available GPU memory:", torch.cuda.get_device_properties(0).total_memory // 1024**3, "GB" if torch.cuda.is_available() else "N/A")
print("Processed files:", os.listdir(CONFIG["data_path"]))


Device: cuda
Available GPU memory: 14 GB
Processed files: ['ff_c23_processed.pkl', 'asvspoof_21_cqt_processed.csv', 'celebdf_v2_processed.csv', 'processing_summary.csv', 'in_the_wild_audio_processed.csv', 'dfdc_faces_processed.csv']


In [2]:
# Cell 2: Enhanced Utilities with Memory Optimization
def free_mem():
    """Aggressive memory cleanup"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
    gc.collect()

def show_mem(tag=""):
    """Enhanced memory monitoring"""
    m = psutil.virtual_memory().percent
    if torch.cuda.is_available():
        g_allocated = torch.cuda.memory_allocated() / 1e9
        g_reserved = torch.cuda.memory_reserved() / 1e9
        print(f"{tag} CPU {m}% | GPU Allocated {g_allocated:.2f}GB | Reserved {g_reserved:.2f}GB")
    else:
        print(f"{tag} CPU {m}%")

def load_and_filter_csv(path, key):
    """Load and filter CSV with detailed logging"""
    df = pd.read_csv(path)
    before = len(df)
    df = df[df[key].apply(exists)]
    remaining = len(df)
    print(f"{os.path.basename(path)}: {before} → {remaining} samples ({before-remaining} missing)")
    
    if remaining > 0:
        print(f"  Label distribution: {dict(df['label'].value_counts())}")
    
    return df.reset_index(drop=True)

def audio_to_mel(path):
    """Convert audio file to log-scaled mel-spectrogram."""
    try:
        # Load a fixed-length audio segment
        y, sr = librosa.load(path, sr=CONFIG["sr"], duration=CONFIG["audio_dur"])
        target_len = int(sr * CONFIG["audio_dur"])
        if len(y) > target_len:
            y = y[:target_len]
        else:
            y = np.pad(y, (0, target_len - len(y)), mode='constant')
        
        # Correct call: positional y, sr= sampling rate
        S = librosa.feature.melspectrogram(
            y=y,
            sr=sr,
            n_mels=CONFIG["n_mels"],
            n_fft=2048,
            hop_length=512,
            power=2.0
        )
        return librosa.power_to_db(S, ref=np.max)
    
    except Exception as e:
        print(f"Audio processing error: {e}")
        # Fallback silent spectrogram
        shape = (CONFIG["n_mels"], int(CONFIG["audio_dur"] * CONFIG["sr"] / 512) + 1)
        return np.zeros(shape)

def check_existing_models():
    """Check which models are already trained"""
    existing = set()
    
    # Check existing models from previous training
    if os.path.exists(CONFIG["existing_models_path"]):
        for f in os.listdir(CONFIG["existing_models_path"]):
            if f.endswith("_best.pth"):
                model_key = f.replace("_best.pth", "")
                existing.add(model_key)
                print(f"Found existing model: {model_key}")
    
    # Check current working directory
    if os.path.exists(CONFIG["models_path"]):
        for f in os.listdir(CONFIG["models_path"]):
            if f.endswith("_best.pth"):
                model_key = f.replace("_best.pth", "")
                existing.add(model_key)
                print(f"Found current model: {model_key}")
    
    return existing


In [3]:
# Cell 3: Optimized Dataset Classes
class ImageDataset(Dataset):
    """Optimized image dataset with caching"""
    def __init__(self, df, transform, cache_size=1000):
        self.df = df
        self.transform = transform
        self.cache = {}
        self.cache_size = cache_size
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row["image_path"]
        
        # Try cache first for frequently accessed images
        if img_path in self.cache:
            img = self.cache[img_path]
        else:
            try:
                img = Image.open(img_path).convert("RGB")
                # Cache if we have space
                if len(self.cache) < self.cache_size:
                    self.cache[img_path] = img
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
                img = Image.new("RGB", CONFIG["img_size"], 0)
        
        return self.transform(img), row["label"]

class VideoDataset(Dataset):
    """Optimized video dataset"""
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        try:
            frames = row["frames"]
            # Limit frames to prevent memory issues
            if len(frames) > 8:
                step = len(frames) // 8
                frames = frames[::step][:8]
            
            processed = []
            for frame in frames:
                if isinstance(frame, np.ndarray):
                    frame_img = Image.fromarray(frame.astype(np.uint8))
                    processed.append(self.transform(frame_img))
            
            if processed:
                return torch.stack(processed), row["label"]
            else:
                # Fallback dummy frames
                return torch.zeros(5, 3, *CONFIG["img_size"]), row["label"]
                
        except Exception as e:
            print(f"Video processing error: {e}")
            return torch.zeros(5, 3, *CONFIG["img_size"]), row["label"]

class AudioDataset(Dataset):
    """Optimized audio dataset"""
    def __init__(self, df):
        self.df = df
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        try:
            mel = audio_to_mel(row["audio_path"])
            return torch.tensor(mel).unsqueeze(0).float(), row["label"]
        except Exception as e:
            print(f"Audio error: {e}")
            # Silent spectrogram fallback
            silent = np.zeros((CONFIG["n_mels"], int(CONFIG["audio_dur"] * CONFIG["sr"] // 512) + 1))
            return torch.tensor(silent).unsqueeze(0).float(), row["label"]


In [4]:
# Cell 4: Enhanced Model Definitions with Better Error Handling
def safe_efficientnet(pretrained=True):
    """Safe EfficientNet loading with multiple fallbacks"""
    try:
        print("Attempting to load EfficientNet with pretrained weights...")
        return models.efficientnet_b0(pretrained=pretrained)
    except Exception as e:
        print(f"Pretrained loading failed ({e}), using random initialization...")
        return models.efficientnet_b0(pretrained=False)

class ImgNet(nn.Module):
    """Enhanced image network"""
    def __init__(self):
        super().__init__()
        backbone = safe_efficientnet(True)
        # Enhanced classifier with batch normalization
        backbone.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(1280, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 2)
        )
        self.net = backbone
    
    def forward(self, x):
        return self.net(x)

class VidNet(nn.Module):
    """Enhanced video network with gradient clipping"""
    def __init__(self):
        super().__init__()
        backbone = safe_efficientnet(False)
        backbone.classifier = nn.Identity()
        self.backbone = backbone
        
        # Optimized LSTM
        self.lstm = nn.LSTM(
            input_size=1280, 
            hidden_size=256, 
            num_layers=2, 
            batch_first=True, 
            dropout=0.3,
            bidirectional=False
        )
        
        # Enhanced classifier
        self.classifier = nn.Sequential(
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 2)
        )
    
    def forward(self, x):
        batch_size, num_frames, c, h, w = x.shape
        
        # Process frames
        x = x.view(batch_size * num_frames, c, h, w)
        features = self.backbone(x)
        features = features.view(batch_size, num_frames, -1)
        
        # LSTM processing
        lstm_out, (hidden, _) = self.lstm(features)
        
        # Use last hidden state
        return self.classifier(hidden[-1])

class AudNet(nn.Module):
    """Enhanced audio network"""
    def __init__(self):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(32),
            
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(64),
            
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.BatchNorm2d(128)
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 2)
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        return self.classifier(x)


In [5]:
# Cell 5: Smart Data Loading - Only Load Remaining Datasets
def prepare_remaining_dataloaders():
    """Prepare dataloaders only for datasets that need training"""
    
    existing_models = check_existing_models()
    dataloaders = {}
    
    print(f"\nExisting models: {existing_models}")
    print("Preparing dataloaders for remaining datasets...\n")
    
    # 1. FF-C23 (video) - Skip if already trained
    if "ff_c23_video" not in existing_models:
        pkl_path = os.path.join(CONFIG["data_path"], "ff_c23_processed.pkl")
        if os.path.exists(pkl_path):
            print("Loading ff_c23 video dataset...")
            df = pd.read_pickle(pkl_path)
            df = df[df["frames"].apply(lambda x: isinstance(x, list) and len(x) > 0)]
            
            if len(df) > 0:
                train_df, val_df = train_test_split(
                    df, test_size=0.2, stratify=df["label"], random_state=CONFIG["seed"]
                )
                
                video_transform = T.Compose([
                    T.Resize(CONFIG["img_size"]),
                    T.ToTensor(),
                    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                ])
                
                dataloaders["ff_c23"] = {
                    "train": DataLoader(VideoDataset(train_df, video_transform), 
                                      CONFIG["video_batch"], shuffle=True, 
                                      num_workers=2, drop_last=True, pin_memory=True),
                    "val": DataLoader(VideoDataset(val_df, video_transform), 
                                    CONFIG["video_batch"], shuffle=False, 
                                    num_workers=2, pin_memory=True),
                    "type": "video"
                }
                print(f"  ff_c23 prepared: {len(train_df)} train, {len(val_df)} val")
    
    # 2. CelebDF-v2 (image) - Skip if already trained  
    if "celebdf_v2_image" not in existing_models:
        csv_path = os.path.join(CONFIG["data_path"], "celebdf_v2_processed.csv")
        if os.path.exists(csv_path):
            print("Loading celebdf_v2 image dataset...")
            df = load_and_filter_csv(csv_path, "image_path")
            
            if len(df) > 0:
                train_df, val_df = train_test_split(
                    df, test_size=0.2, stratify=df["label"], random_state=CONFIG["seed"]
                )
                
                train_transform = T.Compose([
                    T.Resize(256),
                    T.RandomCrop(CONFIG["img_size"]),
                    T.RandomHorizontalFlip(0.5),
                    T.ColorJitter(brightness=0.2, contrast=0.2),
                    T.ToTensor(),
                    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                ])
                
                val_transform = T.Compose([
                    T.Resize(CONFIG["img_size"]),
                    T.ToTensor(),
                    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                ])
                
                dataloaders["celebdf_v2"] = {
                    "train": DataLoader(ImageDataset(train_df, train_transform), 
                                      CONFIG["batch_size"], shuffle=True, 
                                      num_workers=2, pin_memory=True),
                    "val": DataLoader(ImageDataset(val_df, val_transform), 
                                    CONFIG["batch_size"], shuffle=False, 
                                    num_workers=2, pin_memory=True),
                    "type": "image"
                }
                print(f"  celebdf_v2 prepared: {len(train_df)} train, {len(val_df)} val")
    
    # 3. DFDC Faces (image) - Skip if already trained
    if "dfdc_faces_image" not in existing_models:
        csv_path = os.path.join(CONFIG["data_path"], "dfdc_faces_processed.csv")
        if os.path.exists(csv_path):
            print("Loading dfdc_faces image dataset...")
            df = load_and_filter_csv(csv_path, "image_path")
            
            if len(df) > 0:
                train_df, val_df = train_test_split(
                    df, test_size=0.2, stratify=df["label"], random_state=CONFIG["seed"]
                )
                
                transform = T.Compose([
                    T.Resize(CONFIG["img_size"]),
                    T.ToTensor(),
                    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                ])
                
                dataloaders["dfdc_faces"] = {
                    "train": DataLoader(ImageDataset(train_df, transform), 
                                      CONFIG["batch_size"], shuffle=True, 
                                      num_workers=2, pin_memory=True),
                    "val": DataLoader(ImageDataset(val_df, transform), 
                                    CONFIG["batch_size"], shuffle=False, 
                                    num_workers=2, pin_memory=True),
                    "type": "image"
                }
                print(f"  dfdc_faces prepared: {len(train_df)} train, {len(val_df)} val")

    # Image: asvspoof_21_cqt
    if "asvspoof_21_cqt_image" not in existing_models:
        csv_path = os.path.join(CONFIG["data_path"], "asvspoof_21_cqt_processed.csv")
        if os.path.exists(csv_path):
            print("Loading asvspoof_21_cqt image dataset...")
            df = load_and_filter_csv(csv_path, "image_path")
            if len(df):
                train_df, val_df = train_test_split(
                    df, test_size=0.2, stratify=df["label"], random_state=CONFIG["seed"]
                )
                train_tf = T.Compose([
                    T.Resize(256),
                    T.RandomCrop(CONFIG["img_size"]),
                    T.RandomHorizontalFlip(0.5),
                    T.ColorJitter(0.3, 0.3, 0.3, 0.1),
                    T.ToTensor(),
                    T.Normalize([0.485, 0.456, 0.406],
                                [0.229, 0.224, 0.225])
                ])
                val_tf = T.Compose([
                    T.Resize(CONFIG["img_size"]),
                    T.ToTensor(),
                    T.Normalize([0.485, 0.456, 0.406],
                                [0.229, 0.224, 0.225])
                ])
                weights = compute_class_weight(
                    "balanced",
                    classes=np.unique(train_df["label"]),
                    y=train_df["label"]
                )
                sampler = WeightedRandomSampler(
                    [weights[label] for label in train_df["label"]],
                    len(train_df)
                )
                dataloaders["asvspoof_21_cqt_image"] = {
                    "train": DataLoader(ImageDataset(train_df, train_tf),
                                        batch_size=CONFIG["batch_size"],
                                        shuffle=False,   # must be False with sampler
                                        sampler=sampler,
                                        num_workers=2,
                                        pin_memory=True,
                                        drop_last=True),
                    "val": DataLoader(ImageDataset(val_df, val_tf),
                                      batch_size=CONFIG["batch_size"],
                                      shuffle=False,
                                      num_workers=2,
                                      pin_memory=True),
                    "type": "image"
                }
                print(f"  asvspoof_21_cqt_image prepared: {len(train_df)} train, {len(val_df)} val")
    
    # 4. In-the-Wild Audio (audio) - Skip if already trained
    if "in_the_wild_audio_audio" not in existing_models:
        csv_path = os.path.join(CONFIG["data_path"], "in_the_wild_audio_processed.csv")
        if os.path.exists(csv_path):
            print("Loading in_the_wild_audio dataset...")
            df = load_and_filter_csv(csv_path, "audio_path")
            
            if len(df) > 0:
                train_df, val_df = train_test_split(
                    df, test_size=0.2, stratify=df["label"], random_state=CONFIG["seed"]
                )
                
                dataloaders["in_the_wild_audio"] = {
                    "train": DataLoader(AudioDataset(train_df), 
                                      CONFIG["batch_size"], shuffle=True, 
                                      num_workers=2, pin_memory=True),
                    "val": DataLoader(AudioDataset(val_df), 
                                    CONFIG["batch_size"], shuffle=False, 
                                    num_workers=2, pin_memory=True),
                    "type": "audio"
                }
                print(f"  in_the_wild_audio prepared: {len(train_df)} train, {len(val_df)} val")
    
    print(f"\nDataloaders prepared for: {list(dataloaders.keys())}")
    print(f"Skipped {len(existing_models)} already trained models")
    
    return dataloaders

# Prepare dataloaders
dataloaders = prepare_remaining_dataloaders()


Found existing model: ff_c23_video_detector
Found existing model: dfdc_faces_image
Found existing model: celebdf_v2_image

Existing models: {'ff_c23_video_detector', 'celebdf_v2_image', 'dfdc_faces_image'}
Preparing dataloaders for remaining datasets...

Loading ff_c23 video dataset...
  ff_c23 prepared: 4000 train, 1000 val
Loading asvspoof_21_cqt image dataset...
asvspoof_21_cqt_processed.csv: 611828 → 611828 samples (0 missing)
  Label distribution: {1: 589211, 0: 22617}
  asvspoof_21_cqt_image prepared: 489462 train, 122366 val
Loading in_the_wild_audio dataset...
in_the_wild_audio_processed.csv: 31779 → 31779 samples (0 missing)
  Label distribution: {0: 19963, 1: 11816}
  in_the_wild_audio prepared: 25423 train, 6356 val

Dataloaders prepared for: ['ff_c23', 'asvspoof_21_cqt_image', 'in_the_wild_audio']
Skipped 3 already trained models


In [6]:
# Cell 6: Optimized Training Function with Better Monitoring
def plot_history(train_losses, val_losses, train_accs, val_accs, name):
    """Plot training history"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    ax1.plot(train_losses, label="Train", color='blue')
    ax1.plot(val_losses, label="Validation", color='red')
    ax1.set_title(f"{name} - Loss")
    ax1.set_xlabel("Epoch")
    ax1.set_ylabel("Loss")
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    ax2.plot(train_accs, label="Train", color='blue')
    ax2.plot(val_accs, label="Validation", color='red')
    ax2.set_title(f"{name} - Accuracy")
    ax2.set_xlabel("Epoch")
    ax2.set_ylabel("Accuracy")
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(os.path.join(CONFIG["models_path"], f"{name}_history.png"), dpi=150, bbox_inches='tight')
    plt.close()  # Important: close to free memory

def train_model_optimized(model, dataloaders_dict, name):
    """Optimized training function"""
    device = CONFIG["device"]
    model.to(device)
    
    # Adaptive loss function
    if "asvspoof" in name:
        # Heavy weighting for imbalanced asvspoof dataset
        weights = torch.tensor([1.0, 0.05]).to(device)
        criterion = nn.CrossEntropyLoss(weight=weights)
    else:
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Label smoothing for regularization
    
    # Adaptive learning rate and optimizer
    if dataloaders_dict["type"] == "video":
        lr = CONFIG["lr"] * 0.5  # Lower LR for video
        optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    else:
        optimizer = optim.AdamW(model.parameters(), lr=CONFIG["lr"], weight_decay=1e-4)
    
    # Enhanced scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=3, verbose=True
    )
    
    # Training tracking
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    best_val_acc = 0.0
    patience_counter = 0
    max_patience = 5
    
    print(f"\nStarting optimized training for {name}")
    print(f"Train samples: {len(dataloaders_dict['train'].dataset)}")
    print(f"Val samples: {len(dataloaders_dict['val'].dataset)}")
    print(f"Batch size: {dataloaders_dict['train'].batch_size}")
    
    for epoch in range(CONFIG["epochs"]):
        epoch_start_time = time.time()
        print(f"\nEpoch {epoch + 1}/{CONFIG['epochs']}")
        print("-" * 50)
        
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        train_bar = tqdm(dataloaders_dict["train"], desc=f"Training {name}", leave=False)
        
        for batch_idx, (inputs, labels) in enumerate(train_bar):
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            
            # Gradient clipping for stability
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Update progress bar
            current_acc = 100 * correct / total
            train_bar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Acc': f'{current_acc:.2f}%',
                'LR': f'{optimizer.param_groups[0]["lr"]:.2e}'
            })
            
            # Memory cleanup every 100 batches
            if batch_idx % 100 == 0:
                free_mem()
        
        epoch_loss = running_loss / len(dataloaders_dict["train"])
        epoch_acc = 100 * correct / total
        
        train_losses.append(epoch_loss)
        train_accs.append(epoch_acc)
        
        # Validation phase
        model.eval()
        val_running_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            val_bar = tqdm(dataloaders_dict["val"], desc="Validation", leave=False)
            for inputs, labels in val_bar:
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
                
                val_bar.set_postfix({'Val Loss': f'{loss.item():.4f}'})
        
        val_epoch_loss = val_running_loss / len(dataloaders_dict["val"])
        val_epoch_acc = 100 * val_correct / val_total
        
        val_losses.append(val_epoch_loss)
        val_accs.append(val_epoch_acc)
        
        # Update scheduler
        scheduler.step(val_epoch_acc)
        
        # Epoch summary
        epoch_time = time.time() - epoch_start_time
        show_mem(f"Epoch {epoch+1}")
        print(f"Train - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%")
        print(f"Val   - Loss: {val_epoch_loss:.4f}, Acc: {val_epoch_acc:.2f}%")
        print(f"Time: {epoch_time:.1f}s")
        
        # Save best model
        if val_epoch_acc > best_val_acc:
            best_val_acc = val_epoch_acc
            patience_counter = 0
            
            # Save comprehensive checkpoint
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_val_acc': best_val_acc,
                'train_losses': train_losses,
                'val_losses': val_losses,
                'train_accs': train_accs,
                'val_accs': val_accs,
                'config': CONFIG
            }, os.path.join(CONFIG["models_path"], f"{name}_best.pth"))
            
            print(f"🎉 New best model saved! Val Acc: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
        
        # Early stopping check
        if patience_counter >= max_patience:
            print(f"Early stopping triggered after {patience_counter} epochs without improvement")
            break
        
        # Aggressive memory cleanup
        free_mem()
    
    # Final summary
    print(f"\n✅ Training completed for {name}!")
    print(f"Best validation accuracy: {best_val_acc:.2f}%")
    
    # Plot and save training history
    plot_history(train_losses, val_losses, train_accs, val_accs, name)
    
    return best_val_acc

# Add time import
import time


In [7]:
# Cell 7: Execute Training for Remaining Datasets
results = {}
total_start_time = time.time()

print(f"Starting training session at {time.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Target completion time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time() + 10*3600))}")  # 10 hours from now

for dataset_name, dataloader_info in dataloaders.items():
    dataset_start_time = time.time()
    print(f"\n{'='*70}")
    print(f"🚀 Training {dataset_name.upper()} ({dataloader_info['type']})")
    print(f"{'='*70}")
    
    try:
        # Initialize appropriate model
        if dataloader_info["type"] == "image":
            model = ImgNet()
            model_name = f"{dataset_name}_image"
        elif dataloader_info["type"] == "video":
            model = VidNet()
            model_name = f"{dataset_name}_video"
        elif dataloader_info["type"] == "audio":
            model = AudNet()
            model_name = f"{dataset_name}_audio"
        else:
            print(f"❌ Unknown data type: {dataloader_info['type']}")
            continue
        
        # Count parameters
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"Model parameters: {total_params:,} total, {trainable_params:,} trainable")
        
        # Train model
        best_acc = train_model_optimized(model, dataloader_info, model_name)
        results[model_name] = best_acc
        
        # Cleanup
        del model
        free_mem()
        
        dataset_time = time.time() - dataset_start_time
        print(f"✅ {model_name} completed in {dataset_time/60:.1f} minutes")
        print(f"   Best accuracy: {best_acc:.2f}%")
        
    except Exception as e:
        print(f"❌ Error training {dataset_name}: {str(e)}")
        import traceback
        traceback.print_exc()
        
        # Cleanup on error
        free_mem()
        continue

total_time = time.time() - total_start_time
print(f"\n{'='*80}")
print("🎯 TRAINING SUMMARY")
print(f"{'='*80}")
print(f"Total time: {total_time/3600:.2f} hours ({total_time/60:.1f} minutes)")
print(f"Completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}")

for model_name, accuracy in results.items():
    print(f"📊 {model_name}: {accuracy:.2f}% accuracy")

print(f"\n🎉 Successfully trained {len(results)} models!")


Starting training session at 2025-07-19 17:01:08
Target completion time: 2025-07-20 03:01:08

🚀 Training FF_C23 (video)
Attempting to load EfficientNet with pretrained weights...
Model parameters: 6,142,206 total, 6,142,206 trainable

Starting optimized training for ff_c23_video
Train samples: 4000
Val samples: 1000
Batch size: 6

Epoch 1/12
--------------------------------------------------


Training ff_c23_video:   0%|          | 0/666 [00:00<?, ?it/s]

Validation:   0%|          | 0/167 [00:00<?, ?it/s]

Epoch 1 CPU 22.0% | GPU Allocated 0.13GB | Reserved 3.15GB
Train - Loss: 0.5797, Acc: 79.58%
Val   - Loss: 0.5478, Acc: 80.00%
Time: 106.1s
🎉 New best model saved! Val Acc: 80.00%

Epoch 2/12
--------------------------------------------------


Training ff_c23_video:   0%|          | 0/666 [00:00<?, ?it/s]

Validation:   0%|          | 0/167 [00:00<?, ?it/s]

Epoch 2 CPU 22.9% | GPU Allocated 0.13GB | Reserved 3.15GB
Train - Loss: 0.5707, Acc: 80.01%
Val   - Loss: 0.5408, Acc: 80.00%
Time: 122.2s

Epoch 3/12
--------------------------------------------------


Training ff_c23_video:   0%|          | 0/666 [00:00<?, ?it/s]

Validation:   0%|          | 0/167 [00:00<?, ?it/s]

Epoch 3 CPU 22.9% | GPU Allocated 0.13GB | Reserved 3.15GB
Train - Loss: 0.5662, Acc: 79.95%
Val   - Loss: 0.5481, Acc: 80.00%
Time: 122.7s

Epoch 4/12
--------------------------------------------------


Training ff_c23_video:   0%|          | 0/666 [00:00<?, ?it/s]

Validation:   0%|          | 0/167 [00:00<?, ?it/s]

Epoch 4 CPU 22.8% | GPU Allocated 0.13GB | Reserved 3.15GB
Train - Loss: 0.5615, Acc: 80.01%
Val   - Loss: 0.5595, Acc: 80.00%
Time: 123.0s

Epoch 5/12
--------------------------------------------------


Training ff_c23_video:   0%|          | 0/666 [00:00<?, ?it/s]

Validation:   0%|          | 0/167 [00:00<?, ?it/s]

Epoch 5 CPU 22.9% | GPU Allocated 0.13GB | Reserved 3.15GB
Train - Loss: 0.5619, Acc: 80.01%
Val   - Loss: 0.5398, Acc: 80.00%
Time: 122.8s

Epoch 6/12
--------------------------------------------------


Training ff_c23_video:   0%|          | 0/666 [00:00<?, ?it/s]

Validation:   0%|          | 0/167 [00:00<?, ?it/s]

Epoch 6 CPU 22.8% | GPU Allocated 0.13GB | Reserved 3.15GB
Train - Loss: 0.5516, Acc: 79.98%
Val   - Loss: 0.5470, Acc: 80.00%
Time: 122.8s
Early stopping triggered after 5 epochs without improvement

✅ Training completed for ff_c23_video!
Best validation accuracy: 80.00%
✅ ff_c23_video completed in 13.2 minutes
   Best accuracy: 80.00%

🚀 Training ASVSPOOF_21_CQT_IMAGE (image)
Attempting to load EfficientNet with pretrained weights...


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 210MB/s]


Model parameters: 4,665,470 total, 4,665,470 trainable

Starting optimized training for asvspoof_21_cqt_image_image
Train samples: 489462
Val samples: 122366
Batch size: 24

Epoch 1/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 1 CPU 23.0% | GPU Allocated 0.10GB | Reserved 2.56GB
Train - Loss: 0.0604, Acc: 86.17%
Val   - Loss: 0.3391, Acc: 84.58%
Time: 3790.4s
🎉 New best model saved! Val Acc: 84.58%

Epoch 2/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 2 CPU 24.2% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0286, Acc: 94.27%
Val   - Loss: 0.6431, Acc: 78.32%
Time: 3439.8s

Epoch 3/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 3 CPU 24.2% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0214, Acc: 95.86%
Val   - Loss: 0.4167, Acc: 84.55%
Time: 3367.5s

Epoch 4/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 4 CPU 24.2% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0171, Acc: 96.70%
Val   - Loss: 0.1494, Acc: 93.25%
Time: 3332.4s
🎉 New best model saved! Val Acc: 93.25%

Epoch 5/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 5 CPU 24.2% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0146, Acc: 97.29%
Val   - Loss: 0.3166, Acc: 87.66%
Time: 3454.5s

Epoch 6/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 6 CPU 24.2% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0131, Acc: 97.55%
Val   - Loss: 0.1794, Acc: 91.44%
Time: 3537.3s

Epoch 7/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 7 CPU 24.3% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0116, Acc: 97.84%
Val   - Loss: 0.3746, Acc: 86.52%
Time: 3336.0s

Epoch 8/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 8 CPU 24.2% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0105, Acc: 98.06%
Val   - Loss: 0.1680, Acc: 92.44%
Time: 3340.4s

Epoch 9/12
--------------------------------------------------


Training asvspoof_21_cqt_image_image:   0%|          | 0/20394 [00:00<?, ?it/s]

Validation:   0%|          | 0/5099 [00:00<?, ?it/s]

Epoch 9 CPU 24.3% | GPU Allocated 0.10GB | Reserved 2.58GB
Train - Loss: 0.0055, Acc: 99.05%
Val   - Loss: 0.2500, Acc: 92.02%
Time: 3301.6s
Early stopping triggered after 5 epochs without improvement

✅ Training completed for asvspoof_21_cqt_image_image!
Best validation accuracy: 93.25%
✅ asvspoof_21_cqt_image_image completed in 516.7 minutes
   Best accuracy: 93.25%

🚀 Training IN_THE_WILD_AUDIO (audio)
Model parameters: 101,506 total, 101,506 trainable

Starting optimized training for in_the_wild_audio_audio
Train samples: 25423
Val samples: 6356
Batch size: 24

Epoch 1/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 1 CPU 24.8% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2663, Acc: 97.18%
Val   - Loss: 0.3603, Acc: 89.18%
Time: 343.8s
🎉 New best model saved! Val Acc: 89.18%

Epoch 2/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 2 CPU 25.3% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2337, Acc: 98.91%
Val   - Loss: 0.2818, Acc: 94.70%
Time: 236.2s
🎉 New best model saved! Val Acc: 94.70%

Epoch 3/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 3 CPU 25.3% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2286, Acc: 99.13%
Val   - Loss: 0.6768, Acc: 65.62%
Time: 149.2s

Epoch 4/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 4 CPU 25.3% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2254, Acc: 99.13%
Val   - Loss: 0.2110, Acc: 99.62%
Time: 145.5s
🎉 New best model saved! Val Acc: 99.62%

Epoch 5/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 5 CPU 25.3% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2214, Acc: 99.35%
Val   - Loss: 0.2120, Acc: 99.65%
Time: 146.7s
🎉 New best model saved! Val Acc: 99.65%

Epoch 6/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 6 CPU 25.4% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2192, Acc: 99.46%
Val   - Loss: 0.2372, Acc: 99.43%
Time: 145.6s

Epoch 7/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 7 CPU 25.4% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2168, Acc: 99.61%
Val   - Loss: 0.2091, Acc: 99.72%
Time: 147.9s
🎉 New best model saved! Val Acc: 99.72%

Epoch 8/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 8 CPU 25.4% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2145, Acc: 99.70%
Val   - Loss: 0.2216, Acc: 99.15%
Time: 144.7s

Epoch 9/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 9 CPU 25.4% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2143, Acc: 99.67%
Val   - Loss: 0.2106, Acc: 99.65%
Time: 145.9s

Epoch 10/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 10 CPU 25.5% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2137, Acc: 99.73%
Val   - Loss: 0.2111, Acc: 99.62%
Time: 210.0s

Epoch 11/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 11 CPU 25.5% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2130, Acc: 99.73%
Val   - Loss: 0.2048, Acc: 99.80%
Time: 149.4s
🎉 New best model saved! Val Acc: 99.80%

Epoch 12/12
--------------------------------------------------


Training in_the_wild_audio_audio:   0%|          | 0/1060 [00:00<?, ?it/s]

Validation:   0%|          | 0/265 [00:00<?, ?it/s]

Epoch 12 CPU 25.3% | GPU Allocated 0.02GB | Reserved 0.33GB
Train - Loss: 0.2124, Acc: 99.76%
Val   - Loss: 0.2040, Acc: 99.83%
Time: 148.1s
🎉 New best model saved! Val Acc: 99.83%

✅ Training completed for in_the_wild_audio_audio!
Best validation accuracy: 99.83%
✅ in_the_wild_audio_audio completed in 37.5 minutes
   Best accuracy: 99.83%

🎯 TRAINING SUMMARY
Total time: 9.46 hours (567.4 minutes)
Completed at: 2025-07-20 02:28:34
📊 ff_c23_video: 80.00% accuracy
📊 asvspoof_21_cqt_image_image: 93.25% accuracy
📊 in_the_wild_audio_audio: 99.83% accuracy

🎉 Successfully trained 3 models!


In [8]:
# Cell 8: Verification and Final Output
print("📁 Generated model files and visualizations:")
print("-" * 50)

all_files = []
if os.path.exists(CONFIG["models_path"]):
    all_files = sorted(os.listdir(CONFIG["models_path"]))
    
    best_models = [f for f in all_files if f.endswith('_best.pth')]
    history_plots = [f for f in all_files if f.endswith('_history.png')]
    
    print(f"🏆 Best model checkpoints ({len(best_models)}):")
    for f in best_models:
        file_size = os.path.getsize(os.path.join(CONFIG["models_path"], f)) / (1024*1024)
        print(f"   ✓ {f} ({file_size:.1f} MB)")
    
    print(f"\n📈 Training history plots ({len(history_plots)}):")
    for f in history_plots:
        print(f"   ✓ {f}")
    
    print(f"\n📋 All files ({len(all_files)}):")
    for f in all_files:
        print(f"   • {f}")

else:
    print("⚠️ Models directory not found")

# Memory final cleanup
show_mem("Final")
free_mem()

print(f"\n✅ Resume training notebook completed successfully!")
print(f"💾 Save your /kaggle/working/models folder as a Kaggle Dataset to preserve your trained models")


📁 Generated model files and visualizations:
--------------------------------------------------
🏆 Best model checkpoints (3):
   ✓ asvspoof_21_cqt_image_image_best.pth (53.9 MB)
   ✓ ff_c23_video_best.pth (70.8 MB)
   ✓ in_the_wild_audio_audio_best.pth (1.2 MB)

📈 Training history plots (3):
   ✓ asvspoof_21_cqt_image_image_history.png
   ✓ ff_c23_video_history.png
   ✓ in_the_wild_audio_audio_history.png

📋 All files (6):
   • asvspoof_21_cqt_image_image_best.pth
   • asvspoof_21_cqt_image_image_history.png
   • ff_c23_video_best.pth
   • ff_c23_video_history.png
   • in_the_wild_audio_audio_best.pth
   • in_the_wild_audio_audio_history.png
Final CPU 22.7% | GPU Allocated 0.02GB | Reserved 0.03GB

✅ Resume training notebook completed successfully!
💾 Save your /kaggle/working/models folder as a Kaggle Dataset to preserve your trained models
