In [1]:
import os
import glob
import gc
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm
from collections import defaultdict
from scipy.ndimage import gaussian_filter1d, median_filter
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {DEVICE}")

Device: cuda


In [3]:
# Paths
BASE_PATH = "/kaggle/input/pixel-play-26"
DATA_ROOT = os.path.join(BASE_PATH, os.listdir(BASE_PATH)[0])
AVENUE_PATH = os.path.join(DATA_ROOT, "Avenue_Corrupted", "Dataset")
TRAIN_VIDEOS = os.path.join(AVENUE_PATH, "training_videos")
TEST_VIDEOS = os.path.join(AVENUE_PATH, "testing_videos")
OUTPUT_DIR = "/kaggle/working"

CONFIG = {
    'image_size': (128, 128),
    'latent_dim': 128,
    'batch_size': 64,
    'num_epochs': 25,
    'learning_rate': 2e-4,
    'patience': 5,
    'seeds': [42, 123, 456, 789],
}
print("Config ready")

Config ready


In [4]:
def discover_frames(video_dir):
    frames = defaultdict(list)
    if not os.path.exists(video_dir):
        return frames
    for vf in sorted(glob.glob(os.path.join(video_dir, '*'))):
        if not os.path.isdir(vf):
            continue
        try:
            vid = int(os.path.basename(vf))
        except:
            continue
        for ff in sorted(glob.glob(os.path.join(vf, '*.jpg'))):
            fname = os.path.splitext(os.path.basename(ff))[0]
            if fname.startswith('frame_'):
                fname = fname.replace('frame_', '')
            try:
                fnum = int(fname)
                frames[vid].append((fnum, ff))
            except:
                continue
        frames[vid].sort(key=lambda x: x[0])
    return dict(frames)

train_frames = discover_frames(TRAIN_VIDEOS)
test_frames = discover_frames(TEST_VIDEOS)
print(f"Train: {sum(len(v) for v in train_frames.values())} frames")
print(f"Test: {sum(len(v) for v in test_frames.values())} frames")

Train: 9204 frames
Test: 11706 frames


In [5]:
test_frame_ids = []
test_frame_info = []
for vid in sorted(test_frames.keys()):
    for fnum, _ in test_frames[vid]:
        test_frame_ids.append(f"{vid}_{fnum}")
        test_frame_info.append((vid, fnum))

test_video_fnums = defaultdict(list)
for vid, fnum in test_frame_info:
    test_video_fnums[vid].append(fnum)
for vid in test_video_fnums:
    test_video_fnums[vid] = sorted(set(test_video_fnums[vid]))

print(f"Test IDs: {len(test_frame_ids)}")
print(f"Test videos: {len(test_video_fnums)}")

Test IDs: 11706
Test videos: 21


In [6]:
def load_to_gpu(frames_dict, image_size, device):
    total = sum(len(v) for v in frames_dict.values())
    H, W = image_size
    tensors = torch.zeros(total, 3, H, W, dtype=torch.float32, device=device)
    info = []
    idx = 0
    pbar = tqdm(total=total, desc="Loading")
    for vid in sorted(frames_dict.keys()):
        for fnum, path in frames_dict[vid]:
            img = Image.open(path).convert('RGB').resize((W, H), Image.BILINEAR)
            arr = np.array(img, dtype=np.float32) / 127.5 - 1.0
            tensors[idx] = torch.from_numpy(arr).permute(2, 0, 1)
            info.append((vid, fnum))
            idx += 1
            pbar.update(1)
    pbar.close()
    return tensors, info

print("Loading data...")
train_tensors, train_info = load_to_gpu(train_frames, CONFIG['image_size'], DEVICE)
test_tensors, test_info = load_to_gpu(test_frames, CONFIG['image_size'], DEVICE)
print(f"GPU: {torch.cuda.memory_allocated()/1e9:.2f} GB")

Loading data...


Loading:   0%|          | 0/9204 [00:00<?, ?it/s]

Loading:   0%|          | 0/11706 [00:00<?, ?it/s]

GPU: 4.11 GB


In [7]:
class SimpleAE(nn.Module):
    def __init__(self, latent_dim=128):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.LeakyReLU(0.2, True),
            nn.Conv2d(32, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, True),
            nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, True),
            nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, True),
            nn.Conv2d(256, 512, 4, 2, 1), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, True),
        )
        self.fc_enc = nn.Sequential(nn.Flatten(), nn.Linear(512*4*4, latent_dim))
        self.fc_dec = nn.Sequential(nn.Linear(latent_dim, 512*4*4), nn.LeakyReLU(0.2, True))
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.ReLU(True),
            nn.ConvTranspose2d(32, 3, 4, 2, 1), nn.Tanh(),
        )
    
    def forward(self, x):
        z = self.fc_enc(self.encoder(x))
        return self.decoder(self.fc_dec(z).view(-1, 512, 4, 4))

def train_model(seed, train_tensors, config, device):
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed_all(seed)
    
    model = SimpleAE(config['latent_dim']).to(device)
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    criterion = nn.MSELoss()
    loader = DataLoader(TensorDataset(train_tensors), batch_size=config['batch_size'], shuffle=True)
    
    best_loss, patience, best_state = float('inf'), 0, None
    for epoch in range(config['num_epochs']):
        model.train()
        total_loss = sum(criterion(model(b[0]), b[0]).item() for b in loader)
        avg_loss = total_loss / len(loader)
        
        # Proper training step
        for (batch,) in loader:
            optimizer.zero_grad(set_to_none=True)
            loss = criterion(model(batch), batch)
            loss.backward()
            optimizer.step()
        
        if avg_loss < best_loss - 1e-5:
            best_loss, patience = avg_loss, 0
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        else:
            patience += 1
        if patience >= config['patience']:
            break
    
    if best_state:
        model.load_state_dict({k: v.to(device) for k, v in best_state.items()})
    return model, best_loss

In [8]:
print("Training ensemble...")
models = []
for seed in CONFIG['seeds']:
    print(f"  Seed {seed}...", end=" ")
    model, loss = train_model(seed, train_tensors, CONFIG, DEVICE)
    models.append(model)
    print(f"loss={loss:.5f}")
print(f"Trained {len(models)} models")

Training ensemble...
  Seed 42... loss=0.00212
  Seed 123... loss=0.00219
  Seed 456... loss=0.00223
  Seed 789... loss=0.00219
Trained 4 models


In [9]:
@torch.no_grad()
def get_max_scores(model, tensors, frame_info, batch_size=128):
    model.eval()
    scores = {}
    for start in range(0, len(tensors), batch_size):
        end = min(start + batch_size, len(tensors))
        batch = tensors[start:end]
        recon = model(batch)
        max_err = torch.amax((batch - recon) ** 2, dim=(1, 2, 3))
        for i, s in enumerate(max_err.cpu().numpy()):
            vid, fnum = frame_info[start + i]
            scores[f"{vid}_{fnum}"] = float(s)
    return scores

print("Computing raw scores...")
all_raw_scores = []
for i, model in enumerate(models):
    scores = get_max_scores(model, test_tensors, test_info)
    all_raw_scores.append(scores)
    print(f"  Model {i+1} done")

# Average raw scores (before any normalization)
raw_ensemble = {}
for fid in test_frame_ids:
    raw_ensemble[fid] = np.mean([s[fid] for s in all_raw_scores])
print(f"Raw ensemble computed")

Computing raw scores...
  Model 1 done
  Model 2 done
  Model 3 done
  Model 4 done
Raw ensemble computed


In [10]:
def global_normalize(scores, clip_pct=99):
    """Standard global normalization (what we've been using)."""
    vals = np.array(list(scores.values()))
    keys = list(scores.keys())
    clip_val = np.percentile(vals, clip_pct)
    vals = np.clip(vals, 0, clip_val)
    v_min, v_max = vals.min(), vals.max()
    if v_max > v_min:
        norm = (vals - v_min) / (v_max - v_min)
    else:
        norm = np.ones_like(vals) * 0.5
    return {k: float(norm[i]) for i, k in enumerate(keys)}

def per_video_normalize(scores, test_video_fnums, clip_pct=99):
    """Normalize scores within each video independently."""
    normalized = {}
    
    for vid, fnums in test_video_fnums.items():
        # Get scores for this video
        vid_scores = [scores.get(f"{vid}_{fn}", 0) for fn in fnums]
        vals = np.array(vid_scores)
        
        # Normalize within video
        clip_val = np.percentile(vals, clip_pct)
        vals = np.clip(vals, 0, clip_val)
        v_min, v_max = vals.min(), vals.max()
        
        if v_max > v_min:
            norm = (vals - v_min) / (v_max - v_min)
        else:
            norm = np.ones_like(vals) * 0.5
        
        for i, fn in enumerate(fnums):
            normalized[f"{vid}_{fn}"] = float(norm[i])
    
    return normalized

def hybrid_normalize(scores, test_video_fnums, global_weight=0.5, clip_pct=99):
    """Combine global and per-video normalization."""
    global_norm = global_normalize(scores, clip_pct)
    per_video_norm = per_video_normalize(scores, test_video_fnums, clip_pct)
    
    hybrid = {}
    for fid in scores.keys():
        hybrid[fid] = global_weight * global_norm[fid] + (1 - global_weight) * per_video_norm[fid]
    
    return global_normalize(hybrid, clip_pct=100)

print("Normalization functions defined")

Normalization functions defined


In [11]:
def gaussian_smooth(scores, test_video_fnums, sigma):
    if sigma == 0:
        return scores.copy()
    smoothed = {}
    for vid, fnums in test_video_fnums.items():
        vals = np.array([scores.get(f"{vid}_{fn}", 0) for fn in fnums])
        if len(vals) > 1:
            vals = gaussian_filter1d(vals, sigma=sigma)
        for i, fn in enumerate(fnums):
            smoothed[f"{vid}_{fn}"] = float(vals[i])
    return smoothed

def median_smooth(scores, test_video_fnums, window):
    if window <= 1:
        return scores.copy()
    smoothed = {}
    for vid, fnums in test_video_fnums.items():
        vals = np.array([scores.get(f"{vid}_{fn}", 0) for fn in fnums])
        if len(vals) > window:
            vals = median_filter(vals, size=window)
        for i, fn in enumerate(fnums):
            smoothed[f"{vid}_{fn}"] = float(vals[i])
    return smoothed

def combined_smooth_gm(scores, test_video_fnums, gauss_sigma, median_window):
    """Gaussian first, then Median."""
    temp = gaussian_smooth(scores, test_video_fnums, gauss_sigma)
    return median_smooth(temp, test_video_fnums, median_window)

def combined_smooth_mg(scores, test_video_fnums, median_window, gauss_sigma):
    """Median first, then Gaussian."""
    temp = median_smooth(scores, test_video_fnums, median_window)
    return gaussian_smooth(temp, test_video_fnums, gauss_sigma)

print("Smoothing functions defined")

Smoothing functions defined


In [12]:
submissions = {}

# 1. Per-video normalization + Gaussian σ=3 (our best smoothing)
print("1. Per-video norm + Gaussian σ=3")
per_vid_norm = per_video_normalize(raw_ensemble, test_video_fnums)
per_vid_smooth = gaussian_smooth(per_vid_norm, test_video_fnums, sigma=3)
submissions['pervid_gauss3'] = global_normalize(per_vid_smooth, clip_pct=100)

# 2. Hybrid normalization (50% global, 50% per-video) + Gaussian σ=3
print("2. Hybrid norm + Gaussian σ=3")
hybrid_norm = hybrid_normalize(raw_ensemble, test_video_fnums, global_weight=0.5)
hybrid_smooth = gaussian_smooth(hybrid_norm, test_video_fnums, sigma=3)
submissions['hybrid_gauss3'] = global_normalize(hybrid_smooth, clip_pct=100)

# 3. Combined smoothing: Gaussian σ=3 then Median w=5
print("3. Combined: Gaussian→Median")
global_norm = global_normalize(raw_ensemble)
combined_gm = combined_smooth_gm(global_norm, test_video_fnums, gauss_sigma=3, median_window=5)
submissions['gauss3_median5'] = global_normalize(combined_gm, clip_pct=100)

# 4. Combined smoothing: Median w=5 then Gaussian σ=3
print("4. Combined: Median→Gaussian")
combined_mg = combined_smooth_mg(global_norm, test_video_fnums, median_window=5, gauss_sigma=3)
submissions['median5_gauss3'] = global_normalize(combined_mg, clip_pct=100)

# 5. Per-video norm + Combined smoothing (best of both ideas)
print("5. Per-video + Combined smoothing")
per_vid_combined = combined_smooth_gm(per_vid_norm, test_video_fnums, gauss_sigma=3, median_window=5)
submissions['pervid_combined'] = global_normalize(per_vid_combined, clip_pct=100)

print(f"\nCreated {len(submissions)} submissions")

1. Per-video norm + Gaussian σ=3
2. Hybrid norm + Gaussian σ=3
3. Combined: Gaussian→Median
4. Combined: Median→Gaussian
5. Per-video + Combined smoothing

Created 5 submissions


In [13]:
def save_sub(scores, filename):
    df = pd.DataFrame([{'Id': fid, 'Predicted': scores.get(fid, 0)} for fid in test_frame_ids])
    df.to_csv(os.path.join(OUTPUT_DIR, filename), index=False)
    print(f"Saved: {filename}")

print("\nSaving submissions...")
for name, scores in submissions.items():
    save_sub(scores, f'sub_{name}.csv')


Saving submissions...
Saved: sub_pervid_gauss3.csv
Saved: sub_hybrid_gauss3.csv
Saved: sub_gauss3_median5.csv
Saved: sub_median5_gauss3.csv
Saved: sub_pervid_combined.csv


In [14]:
# Cleanup
del train_tensors, test_tensors, models
gc.collect()
torch.cuda.empty_cache()
print("Done!")

Done!
