In [1]:
import os
import glob
import gc
import time
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from collections import defaultdict

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {DEVICE}")

Device: cuda


In [2]:
# Paths
BASE_PATH = "/kaggle/input/pixel-play-26"
DATA_ROOT = os.path.join(BASE_PATH, os.listdir(BASE_PATH)[0])
AVENUE_PATH = os.path.join(DATA_ROOT, "Avenue_Corrupted", "Dataset")
TRAIN_VIDEOS = os.path.join(AVENUE_PATH, "training_videos")
TEST_VIDEOS = os.path.join(AVENUE_PATH, "testing_videos")
OUTPUT_DIR = "/kaggle/working"

# Config
CONFIG = {
    'image_size': (128, 128),
    'latent_dim': 128,
    'batch_size': 64,
    'num_epochs': 25,
    'learning_rate': 2e-4,
    'patience': 5,
    'seeds': [42, 123, 456],  # Multiple seeds for ensemble
}

print("Config ready")

Config ready


In [3]:
def discover_frames(video_dir):
    frames = defaultdict(list)
    if not os.path.exists(video_dir):
        return frames
    for vf in sorted(glob.glob(os.path.join(video_dir, '*'))):
        if not os.path.isdir(vf):
            continue
        try:
            vid = int(os.path.basename(vf))
        except:
            continue
        for ff in sorted(glob.glob(os.path.join(vf, '*.jpg'))):
            fname = os.path.splitext(os.path.basename(ff))[0]
            if fname.startswith('frame_'):
                fname = fname.replace('frame_', '')
            try:
                fnum = int(fname)
                frames[vid].append((fnum, ff))
            except:
                continue
        frames[vid].sort(key=lambda x: x[0])
    return dict(frames)

train_frames = discover_frames(TRAIN_VIDEOS)
test_frames = discover_frames(TEST_VIDEOS)
print(f"Train: {sum(len(v) for v in train_frames.values())} frames")
print(f"Test: {sum(len(v) for v in test_frames.values())} frames")

Train: 9204 frames
Test: 11706 frames


In [4]:
# Build test frame IDs
test_frame_ids = []
test_frame_info = []
for vid in sorted(test_frames.keys()):
    for fnum, _ in test_frames[vid]:
        test_frame_ids.append(f"{vid}_{fnum}")
        test_frame_info.append((vid, fnum))
print(f"Test IDs: {len(test_frame_ids)}")

Test IDs: 11706


In [5]:
def load_frames_to_gpu(frames_dict, image_size, device):
    total = sum(len(v) for v in frames_dict.values())
    H, W = image_size
    all_frames = torch.zeros(total, 3, H, W, dtype=torch.float32, device=device)
    frame_info = []
    idx = 0
    for vid in sorted(frames_dict.keys()):
        for fnum, path in frames_dict[vid]:
            img = Image.open(path).convert('RGB').resize((W, H), Image.BILINEAR)
            arr = np.array(img, dtype=np.float32) / 127.5 - 1.0
            all_frames[idx] = torch.from_numpy(arr).permute(2, 0, 1)
            frame_info.append((vid, fnum))
            idx += 1
    return all_frames, frame_info

print("Loading data to GPU...")
train_tensors, train_info = load_frames_to_gpu(train_frames, CONFIG['image_size'], DEVICE)
test_tensors, test_info = load_frames_to_gpu(test_frames, CONFIG['image_size'], DEVICE)
print(f"Loaded. GPU: {torch.cuda.memory_allocated()/1e9:.2f} GB")

Loading data to GPU...
Loaded. GPU: 4.11 GB


In [6]:
class GPUDataset(Dataset):
    def __init__(self, tensors):
        self.tensors = tensors
    def __len__(self):
        return len(self.tensors)
    def __getitem__(self, idx):
        return self.tensors[idx]

train_dataset = GPUDataset(train_tensors)
train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=0)
print(f"Train loader: {len(train_loader)} batches")

Train loader: 144 batches


In [7]:
class SimpleAutoencoder(nn.Module):
    def __init__(self, latent_dim=128):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.LeakyReLU(0.2, True),
            nn.Conv2d(32, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, True),
            nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, True),
            nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, True),
            nn.Conv2d(256, 512, 4, 2, 1), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, True),
        )
        self.fc_enc = nn.Sequential(nn.Flatten(), nn.Linear(512*4*4, latent_dim))
        self.fc_dec = nn.Sequential(nn.Linear(latent_dim, 512*4*4), nn.LeakyReLU(0.2, True))
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1), nn.BatchNorm2d(32), nn.ReLU(True),
            nn.ConvTranspose2d(32, 3, 4, 2, 1), nn.Tanh(),
        )
    
    def forward(self, x):
        z = self.fc_enc(self.encoder(x))
        return self.decoder(self.fc_dec(z).view(-1, 512, 4, 4))

In [8]:
def train_model(seed, train_loader, config, device):
    """Train a single model with given seed."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    model = SimpleAutoencoder(config['latent_dim']).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    
    best_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(config['num_epochs']):
        model.train()
        epoch_loss = 0
        
        for batch in train_loader:
            optimizer.zero_grad(set_to_none=True)
            recon = model(batch)
            loss = criterion(recon, batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        avg_loss = epoch_loss / len(train_loader)
        
        if avg_loss < best_loss - 1e-4:
            best_loss = avg_loss
            patience_counter = 0
            best_state = model.state_dict().copy()
        else:
            patience_counter += 1
        
        if (epoch + 1) % 5 == 0:
            print(f"  Epoch {epoch+1}: loss={avg_loss:.5f}")
        
        if patience_counter >= config['patience']:
            print(f"  Early stop at epoch {epoch+1}")
            break
    
    model.load_state_dict(best_state)
    return model, best_loss

In [9]:
# Train ensemble of models
print("=" * 60)
print("TRAINING ENSEMBLE")
print("=" * 60)

models = []
for i, seed in enumerate(CONFIG['seeds']):
    print(f"\nModel {i+1}/{len(CONFIG['seeds'])} (seed={seed})")
    model, loss = train_model(seed, train_loader, CONFIG, DEVICE)
    models.append(model)
    print(f"  Best loss: {loss:.5f}")

print(f"\nTrained {len(models)} models")

TRAINING ENSEMBLE

Model 1/3 (seed=42)
  Epoch 5: loss=0.00623
  Epoch 10: loss=0.00381
  Epoch 15: loss=0.00289
  Epoch 20: loss=0.00242
  Epoch 25: loss=0.00213
  Best loss: 0.00217

Model 2/3 (seed=123)
  Epoch 5: loss=0.00646
  Epoch 10: loss=0.00393
  Epoch 15: loss=0.00297
  Epoch 20: loss=0.00246
  Epoch 25: loss=0.00214
  Best loss: 0.00214

Model 3/3 (seed=456)
  Epoch 5: loss=0.00635
  Epoch 10: loss=0.00391
  Epoch 15: loss=0.00301
  Epoch 20: loss=0.00250
  Epoch 25: loss=0.00222
  Best loss: 0.00231

Trained 3 models


In [10]:
@torch.no_grad()
def compute_scores(model, tensors, frame_info, batch_size=128):
    """Compute mean and max reconstruction error."""
    model.eval()
    scores_mean = {}
    scores_max = {}
    
    for start in range(0, len(tensors), batch_size):
        end = min(start + batch_size, len(tensors))
        batch = tensors[start:end]
        recon = model(batch)
        
        pixel_err = (batch - recon) ** 2
        mean_err = torch.mean(pixel_err, dim=(1,2,3))
        max_err = torch.amax(pixel_err, dim=(1,2,3))
        
        for i in range(len(batch)):
            vid, fnum = frame_info[start + i]
            fid = f"{vid}_{fnum}"
            scores_mean[fid] = float(mean_err[i].cpu())
            scores_max[fid] = float(max_err[i].cpu())
    
    return scores_mean, scores_max

In [11]:
# Compute scores from all models
print("Computing scores from all models...")

all_mean_scores = []
all_max_scores = []

for i, model in enumerate(models):
    print(f"Model {i+1}...")
    mean_s, max_s = compute_scores(model, test_tensors, test_info)
    all_mean_scores.append(mean_s)
    all_max_scores.append(max_s)

print("Done!")

Computing scores from all models...
Model 1...
Model 2...
Model 3...
Done!


In [12]:
def normalize(scores, clip_pct=99):
    vals = np.array(list(scores.values()))
    keys = list(scores.keys())
    clip_val = np.percentile(vals, clip_pct)
    vals = np.clip(vals, 0, clip_val)
    v_min, v_max = vals.min(), vals.max()
    if v_max - v_min > 1e-8:
        norm = (vals - v_min) / (v_max - v_min)
    else:
        norm = np.ones_like(vals) * 0.5
    return {k: float(norm[i]) for i, k in enumerate(keys)}

def ensemble_scores(score_list):
    """Average scores from multiple models."""
    keys = list(score_list[0].keys())
    ensembled = {}
    for k in keys:
        ensembled[k] = np.mean([s[k] for s in score_list])
    return ensembled

# Ensemble the scores
print("Ensembling scores from all models...")
mean_ensemble = ensemble_scores(all_mean_scores)
max_ensemble = ensemble_scores(all_max_scores)

# Normalize
mean_norm = normalize(mean_ensemble)
max_norm = normalize(max_ensemble)

print(f"Ensembled {len(models)} models")

Ensembling scores from all models...
Ensembled 3 models


In [13]:
# Try different mean/max ratios
RATIOS = {
    'mean100': (1.0, 0.0),   # Pure mean
    'mean80': (0.8, 0.2),
    'mean70': (0.7, 0.3),
    'mean60': (0.6, 0.4),    # Previous best
    'mean50': (0.5, 0.5),
    'mean40': (0.4, 0.6),
    'max100': (0.0, 1.0),    # Pure max
}

def create_fused(mean_scores, max_scores, mean_weight, max_weight):
    fused = {}
    for fid in mean_scores.keys():
        fused[fid] = mean_weight * mean_scores[fid] + max_weight * max_scores[fid]
    return normalize(fused, clip_pct=100)

fused_scores = {}
for name, (mw, xw) in RATIOS.items():
    fused_scores[name] = create_fused(mean_norm, max_norm, mw, xw)
    print(f"Created: {name} (mean={mw}, max={xw})")

Created: mean100 (mean=1.0, max=0.0)
Created: mean80 (mean=0.8, max=0.2)
Created: mean70 (mean=0.7, max=0.3)
Created: mean60 (mean=0.6, max=0.4)
Created: mean50 (mean=0.5, max=0.5)
Created: mean40 (mean=0.4, max=0.6)
Created: max100 (mean=0.0, max=1.0)


In [14]:
def save_sub(scores, filename):
    df = pd.DataFrame([{'Id': fid, 'Predicted': scores.get(fid, 0)} for fid in test_frame_ids])
    df.to_csv(os.path.join(OUTPUT_DIR, filename), index=False)
    print(f"Saved: {filename}")
    return df

# Save all ratio variations
for name, scores in fused_scores.items():
    save_sub(scores, f'sub_ensemble_{name}.csv')

print(f"\nGenerated {len(fused_scores)} submissions")

Saved: sub_ensemble_mean100.csv
Saved: sub_ensemble_mean80.csv
Saved: sub_ensemble_mean70.csv
Saved: sub_ensemble_mean60.csv
Saved: sub_ensemble_mean50.csv
Saved: sub_ensemble_mean40.csv
Saved: sub_ensemble_max100.csv

Generated 7 submissions


In [15]:
# Sometimes a single model beats ensemble - save best individual too
print("\nSaving individual model scores (60/40 ratio)...")

for i, (mean_s, max_s) in enumerate(zip(all_mean_scores, all_max_scores)):
    mean_n = normalize(mean_s)
    max_n = normalize(max_s)
    fused = create_fused(mean_n, max_n, 0.6, 0.4)
    save_sub(fused, f'sub_model{i+1}_mean60.csv')


Saving individual model scores (60/40 ratio)...
Saved: sub_model1_mean60.csv
Saved: sub_model2_mean60.csv
Saved: sub_model3_mean60.csv


In [16]:
# Cleanup
del train_tensors, test_tensors, models
gc.collect()
torch.cuda.empty_cache()
print("Done!")

Done!
