In [1]:
import os
import glob
import gc
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm
from collections import defaultdict
from scipy.ndimage import gaussian_filter1d
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {DEVICE}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f} GB")

Device: cuda
GPU: Tesla T4
Memory: 15.8 GB


In [2]:
# Paths
BASE_PATH = "/kaggle/input/pixel-play-26"
DATA_ROOT = os.path.join(BASE_PATH, os.listdir(BASE_PATH)[0])
AVENUE_PATH = os.path.join(DATA_ROOT, "Avenue_Corrupted", "Dataset")
TRAIN_VIDEOS = os.path.join(AVENUE_PATH, "training_videos")
TEST_VIDEOS = os.path.join(AVENUE_PATH, "testing_videos")
OUTPUT_DIR = "/kaggle/working"

# Multi-scale configuration
SCALES = {
    'small': {'size': (64, 64), 'latent': 64, 'batch_size': 64},
    'medium': {'size': (128, 128), 'latent': 128, 'batch_size': 32},
    'large': {'size': (256, 256), 'latent': 256, 'batch_size': 8},  # Small batch!
}

CONFIG = {
    'num_epochs': 25,
    'learning_rate': 2e-4,
    'patience': 5,
    'seeds': [42, 123],  # 2 seeds per scale
}

print("Memory-optimized configuration ready")

Memory-optimized configuration ready


In [3]:
def discover_frames(video_dir):
    frames = defaultdict(list)
    if not os.path.exists(video_dir):
        return frames
    for vf in sorted(glob.glob(os.path.join(video_dir, '*'))):
        if not os.path.isdir(vf):
            continue
        try:
            vid = int(os.path.basename(vf))
        except:
            continue
        for ff in sorted(glob.glob(os.path.join(vf, '*.jpg'))):
            fname = os.path.splitext(os.path.basename(ff))[0]
            if fname.startswith('frame_'):
                fname = fname.replace('frame_', '')
            try:
                fnum = int(fname)
                frames[vid].append((fnum, ff))
            except:
                continue
        frames[vid].sort(key=lambda x: x[0])
    return dict(frames)

train_frames = discover_frames(TRAIN_VIDEOS)
test_frames = discover_frames(TEST_VIDEOS)
print(f"Train: {len(train_frames)} videos, {sum(len(v) for v in train_frames.values())} frames")
print(f"Test: {len(test_frames)} videos, {sum(len(v) for v in test_frames.values())} frames")

test_frame_ids = []
test_frame_info = []
for vid in sorted(test_frames.keys()):
    for fnum, _ in test_frames[vid]:
        test_frame_ids.append(f"{vid}_{fnum}")
        test_frame_info.append((vid, fnum))

test_video_fnums = defaultdict(list)
for vid, fnum in test_frame_info:
    test_video_fnums[vid].append(fnum)
for vid in test_video_fnums:
    test_video_fnums[vid] = sorted(set(test_video_fnums[vid]))

print(f"Test IDs: {len(test_frame_ids)}")

Train: 16 videos, 9204 frames
Test: 21 videos, 11706 frames
Test IDs: 11706


In [4]:
def load_to_cpu(frames_dict, image_size):
    """Load frames to CPU memory (not GPU)."""
    total = sum(len(v) for v in frames_dict.values())
    H, W = image_size
    tensors = torch.zeros(total, 3, H, W, dtype=torch.float32)  # CPU!
    info = []
    idx = 0
    
    pbar = tqdm(total=total, desc=f"Loading {H}x{W} to CPU")
    for vid in sorted(frames_dict.keys()):
        for fnum, path in frames_dict[vid]:
            img = Image.open(path).convert('RGB').resize((W, H), Image.BILINEAR)
            arr = np.array(img, dtype=np.float32) / 127.5 - 1.0
            tensors[idx] = torch.from_numpy(arr).permute(2, 0, 1)
            info.append((vid, fnum))
            idx += 1
            pbar.update(1)
    pbar.close()
    return tensors, info

print("CPU loading function ready")

CPU loading function ready


In [5]:
class SmallAE(nn.Module):
    """For 64x64 input."""
    def __init__(self, latent_dim=64):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.LeakyReLU(0.2, True),
            nn.Conv2d(32, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, True),
            nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, True),
            nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, True),
            nn.Conv2d(256, 512, 4, 2, 1), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, True),
        )
        self.fc_enc = nn.Sequential(nn.Flatten(), nn.Linear(512*2*2, latent_dim))
        self.fc_dec = nn.Sequential(nn.Linear(latent_dim, 512*2*2), nn.LeakyReLU(0.2, True))
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.ReLU(True),
            nn.ConvTranspose2d(32, 3, 4, 2, 1), nn.Tanh(),
        )
    def forward(self, x):
        z = self.fc_enc(self.encoder(x))
        return self.decoder(self.fc_dec(z).view(-1, 512, 2, 2))


class MediumAE(nn.Module):
    """For 128x128 input."""
    def __init__(self, latent_dim=128):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.LeakyReLU(0.2, True),
            nn.Conv2d(32, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, True),
            nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, True),
            nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, True),
            nn.Conv2d(256, 512, 4, 2, 1), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, True),
        )
        self.fc_enc = nn.Sequential(nn.Flatten(), nn.Linear(512*4*4, latent_dim))
        self.fc_dec = nn.Sequential(nn.Linear(latent_dim, 512*4*4), nn.LeakyReLU(0.2, True))
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.ReLU(True),
            nn.ConvTranspose2d(32, 3, 4, 2, 1), nn.Tanh(),
        )
    def forward(self, x):
        z = self.fc_enc(self.encoder(x))
        return self.decoder(self.fc_dec(z).view(-1, 512, 4, 4))


class LargeAE(nn.Module):
    """For 256x256 input."""
    def __init__(self, latent_dim=256):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.LeakyReLU(0.2, True),
            nn.Conv2d(32, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, True),
            nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, True),
            nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, True),
            nn.Conv2d(256, 512, 4, 2, 1), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, True),
        )
        self.fc_enc = nn.Sequential(nn.Flatten(), nn.Linear(512*8*8, latent_dim))
        self.fc_dec = nn.Sequential(nn.Linear(latent_dim, 512*8*8), nn.LeakyReLU(0.2, True))
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.ReLU(True),
            nn.ConvTranspose2d(32, 3, 4, 2, 1), nn.Tanh(),
        )
    def forward(self, x):
        z = self.fc_enc(self.encoder(x))
        return self.decoder(self.fc_dec(z).view(-1, 512, 8, 8))


MODEL_CLASSES = {'small': SmallAE, 'medium': MediumAE, 'large': LargeAE}
print("Models defined")

Models defined


In [6]:
def train_model_cpu_data(ModelClass, latent_dim, train_tensors_cpu, seed, batch_size, config, device):
    """Train with data on CPU, transfer batches to GPU."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed_all(seed)
    
    model = ModelClass(latent_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    criterion = nn.MSELoss()
    
    # DataLoader with pin_memory for faster CPU->GPU transfer
    loader = DataLoader(
        TensorDataset(train_tensors_cpu), 
        batch_size=batch_size, 
        shuffle=True,
        pin_memory=True,
        num_workers=0
    )
    
    best_loss = float('inf')
    patience = 0
    best_state = None
    
    for epoch in range(config['num_epochs']):
        model.train()
        total_loss = 0
        
        for (batch_cpu,) in loader:
            batch = batch_cpu.to(device, non_blocking=True)
            
            optimizer.zero_grad(set_to_none=True)
            loss = criterion(model(batch), batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            del batch  # Free GPU memory
        
        avg_loss = total_loss / len(loader)
        
        if avg_loss < best_loss - 1e-5:
            best_loss = avg_loss
            patience = 0
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        else:
            patience += 1
        
        if patience >= config['patience']:
            break
    
    if best_state:
        model.load_state_dict({k: v.to(device) for k, v in best_state.items()})
    
    return model, best_loss


@torch.no_grad()
def get_max_scores_cpu_data(model, tensors_cpu, frame_info, batch_size, device):
    """Score with data on CPU."""
    model.eval()
    scores = {}
    
    for start in range(0, len(tensors_cpu), batch_size):
        end = min(start + batch_size, len(tensors_cpu))
        batch = tensors_cpu[start:end].to(device, non_blocking=True)
        recon = model(batch)
        max_err = torch.amax((batch - recon) ** 2, dim=(1, 2, 3))
        
        for i, s in enumerate(max_err.cpu().numpy()):
            vid, fnum = frame_info[start + i]
            scores[f"{vid}_{fnum}"] = float(s)
        
        del batch, recon  # Free GPU memory
    
    return scores

print("Memory-optimized training functions ready")

Memory-optimized training functions ready


In [7]:
def normalize(scores, clip_pct=99):
    vals = np.array(list(scores.values()))
    keys = list(scores.keys())
    clip_val = np.percentile(vals, clip_pct)
    vals = np.clip(vals, 0, clip_val)
    v_min, v_max = vals.min(), vals.max()
    if v_max > v_min:
        norm = (vals - v_min) / (v_max - v_min)
    else:
        norm = np.ones_like(vals) * 0.5
    return {k: float(norm[i]) for i, k in enumerate(keys)}

def ensemble_mean(score_list):
    keys = list(score_list[0].keys())
    return {k: np.mean([s[k] for s in score_list]) for k in keys}

# Store scores for each scale
scale_scores = {}

In [8]:
# Process SMALL scale
print("=" * 60)
print("PROCESSING SMALL SCALE (64x64)")
print("=" * 60)

scale_cfg = SCALES['small']
ModelClass = MODEL_CLASSES['small']

# Load data
train_cpu, train_info = load_to_cpu(train_frames, scale_cfg['size'])
test_cpu, test_info = load_to_cpu(test_frames, scale_cfg['size'])
print(f"Data loaded: train={train_cpu.shape}, test={test_cpu.shape}")

# Train models
all_scores = []
for seed in CONFIG['seeds']:
    print(f"  Training seed {seed}...", end=" ", flush=True)
    model, loss = train_model_cpu_data(
        ModelClass, scale_cfg['latent'], train_cpu, seed, 
        scale_cfg['batch_size'], CONFIG, DEVICE
    )
    print(f"loss={loss:.5f}")
    
    # Score
    scores = get_max_scores_cpu_data(model, test_cpu, test_info, scale_cfg['batch_size'], DEVICE)
    all_scores.append(normalize(scores))
    
    del model
    torch.cuda.empty_cache()

scale_scores['small'] = normalize(ensemble_mean(all_scores), clip_pct=100)
del train_cpu, test_cpu
gc.collect()
torch.cuda.empty_cache()
print("Small scale done!")

PROCESSING SMALL SCALE (64x64)


Loading 64x64 to CPU:   0%|          | 0/9204 [00:00<?, ?it/s]

Loading 64x64 to CPU:   0%|          | 0/11706 [00:00<?, ?it/s]

Data loaded: train=torch.Size([9204, 3, 64, 64]), test=torch.Size([11706, 3, 64, 64])
  Training seed 42... loss=0.00157
  Training seed 123... loss=0.00157
Small scale done!


In [9]:
# Process MEDIUM scale
print("=" * 60)
print("PROCESSING MEDIUM SCALE (128x128)")
print("=" * 60)

scale_cfg = SCALES['medium']
ModelClass = MODEL_CLASSES['medium']

train_cpu, train_info = load_to_cpu(train_frames, scale_cfg['size'])
test_cpu, test_info = load_to_cpu(test_frames, scale_cfg['size'])
print(f"Data loaded: train={train_cpu.shape}, test={test_cpu.shape}")

all_scores = []
for seed in CONFIG['seeds']:
    print(f"  Training seed {seed}...", end=" ", flush=True)
    model, loss = train_model_cpu_data(
        ModelClass, scale_cfg['latent'], train_cpu, seed, 
        scale_cfg['batch_size'], CONFIG, DEVICE
    )
    print(f"loss={loss:.5f}")
    
    scores = get_max_scores_cpu_data(model, test_cpu, test_info, scale_cfg['batch_size'], DEVICE)
    all_scores.append(normalize(scores))
    
    del model
    torch.cuda.empty_cache()

scale_scores['medium'] = normalize(ensemble_mean(all_scores), clip_pct=100)
del train_cpu, test_cpu
gc.collect()
torch.cuda.empty_cache()
print("Medium scale done!")

PROCESSING MEDIUM SCALE (128x128)


Loading 128x128 to CPU:   0%|          | 0/9204 [00:00<?, ?it/s]

Loading 128x128 to CPU:   0%|          | 0/11706 [00:00<?, ?it/s]

Data loaded: train=torch.Size([9204, 3, 128, 128]), test=torch.Size([11706, 3, 128, 128])
  Training seed 42... loss=0.00159
  Training seed 123... loss=0.00161
Medium scale done!


In [10]:
# Process LARGE scale
print("=" * 60)
print("PROCESSING LARGE SCALE (256x256)")
print("=" * 60)

scale_cfg = SCALES['large']
ModelClass = MODEL_CLASSES['large']

train_cpu, train_info = load_to_cpu(train_frames, scale_cfg['size'])
test_cpu, test_info = load_to_cpu(test_frames, scale_cfg['size'])
print(f"Data loaded: train={train_cpu.shape}, test={test_cpu.shape}")

all_scores = []
for seed in CONFIG['seeds']:
    print(f"  Training seed {seed}...", end=" ", flush=True)
    model, loss = train_model_cpu_data(
        ModelClass, scale_cfg['latent'], train_cpu, seed, 
        scale_cfg['batch_size'], CONFIG, DEVICE
    )
    print(f"loss={loss:.5f}")
    
    scores = get_max_scores_cpu_data(model, test_cpu, test_info, scale_cfg['batch_size'], DEVICE)
    all_scores.append(normalize(scores))
    
    del model
    torch.cuda.empty_cache()

scale_scores['large'] = normalize(ensemble_mean(all_scores), clip_pct=100)
del train_cpu, test_cpu
gc.collect()
torch.cuda.empty_cache()
print("Large scale done!")

print("\n" + "=" * 60)
print("ALL SCALES PROCESSED!")
print("=" * 60)

PROCESSING LARGE SCALE (256x256)


Loading 256x256 to CPU:   0%|          | 0/9204 [00:00<?, ?it/s]

Loading 256x256 to CPU:   0%|          | 0/11706 [00:00<?, ?it/s]

Data loaded: train=torch.Size([9204, 3, 256, 256]), test=torch.Size([11706, 3, 256, 256])
  Training seed 42... loss=0.00157
  Training seed 123... loss=0.00158
Large scale done!

ALL SCALES PROCESSED!


In [11]:
def gaussian_smooth(scores, test_video_fnums, sigma):
    if sigma == 0:
        return scores.copy()
    smoothed = {}
    for vid, fnums in test_video_fnums.items():
        vals = np.array([scores.get(f"{vid}_{fn}", 0) for fn in fnums])
        if len(vals) > 1:
            vals = gaussian_filter1d(vals, sigma=sigma)
        for i, fn in enumerate(fnums):
            smoothed[f"{vid}_{fn}"] = float(vals[i])
    return smoothed

def fuse_scales(scale_scores, weights):
    fused = {}
    for fid in test_frame_ids:
        fused[fid] = sum(w * scale_scores[s].get(fid, 0) for s, w in weights.items())
    return normalize(fused, clip_pct=100)

def max_across_scales(scale_scores):
    fused = {}
    for fid in test_frame_ids:
        fused[fid] = max(scale_scores[s].get(fid, 0) for s in scale_scores.keys())
    return normalize(fused, clip_pct=100)

In [12]:
submissions = {}

# Individual scales + smoothing
for scale_name in SCALES.keys():
    smoothed = gaussian_smooth(scale_scores[scale_name], test_video_fnums, sigma=3)
    submissions[f'{scale_name}_only'] = normalize(smoothed, clip_pct=100)
    print(f"Created: {scale_name}_only")

# Equal fusion
fused = fuse_scales(scale_scores, {'small': 1/3, 'medium': 1/3, 'large': 1/3})
submissions['multiscale_equal'] = normalize(gaussian_smooth(fused, test_video_fnums, sigma=3), clip_pct=100)
print("Created: multiscale_equal")

# Medium-heavy
fused = fuse_scales(scale_scores, {'small': 0.2, 'medium': 0.6, 'large': 0.2})
submissions['multiscale_medium_heavy'] = normalize(gaussian_smooth(fused, test_video_fnums, sigma=3), clip_pct=100)
print("Created: multiscale_medium_heavy")

# MAX across scales
fused = max_across_scales(scale_scores)
submissions['multiscale_max'] = normalize(gaussian_smooth(fused, test_video_fnums, sigma=3), clip_pct=100)
print("Created: multiscale_max")

print(f"\nTotal: {len(submissions)} submissions")

Created: small_only
Created: medium_only
Created: large_only
Created: multiscale_equal
Created: multiscale_medium_heavy
Created: multiscale_max

Total: 6 submissions


In [13]:
def save_sub(scores, filename):
    df = pd.DataFrame([{'Id': fid, 'Predicted': scores.get(fid, 0)} for fid in test_frame_ids])
    df.to_csv(os.path.join(OUTPUT_DIR, filename), index=False)
    print(f"Saved: {filename}")

print("\nSaving submissions...")
for name, scores in submissions.items():
    save_sub(scores, f'sub_{name}.csv')


Saving submissions...
Saved: sub_small_only.csv
Saved: sub_medium_only.csv
Saved: sub_large_only.csv
Saved: sub_multiscale_equal.csv
Saved: sub_multiscale_medium_heavy.csv
Saved: sub_multiscale_max.csv
