In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm


In [2]:
BASE_PATH = "/kaggle/input/pixel-play-26"
DATA_ROOT = os.path.join(BASE_PATH, os.listdir(BASE_PATH)[0])
AVENUE_PATH = os.path.join(DATA_ROOT, "Avenue_Corrupted", "Dataset")

TRAIN_VIDEOS = os.path.join(AVENUE_PATH, "training_videos")
TEST_VIDEOS  = os.path.join(AVENUE_PATH, "testing_videos")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

IMG_SIZE = 128
BATCH_SIZE = 32
EPOCHS = 8   # do NOT increase yet


In [3]:
class FrameDiffDataset(Dataset):
    def __init__(self, root_dir):
        self.samples = []

        for clip in sorted(os.listdir(root_dir)):
            clip_path = os.path.join(root_dir, clip)
            frames = sorted(os.listdir(clip_path))

            for i in range(1, len(frames)):
                self.samples.append((
                    os.path.join(clip_path, frames[i-1]),
                    os.path.join(clip_path, frames[i])
                ))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        prev_path, curr_path = self.samples[idx]

        prev = cv2.imread(prev_path, cv2.IMREAD_GRAYSCALE)
        curr = cv2.imread(curr_path, cv2.IMREAD_GRAYSCALE)

        prev = cv2.resize(prev, (IMG_SIZE, IMG_SIZE))
        curr = cv2.resize(curr, (IMG_SIZE, IMG_SIZE))

        diff = np.abs(curr.astype(np.float32) - prev.astype(np.float32))
        diff /= 255.0

        diff = torch.tensor(diff).unsqueeze(0)  # (1, H, W)
        return diff


In [4]:
train_dataset = FrameDiffDataset(TRAIN_VIDEOS)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)


In [5]:
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))


model = ConvAutoencoder().to(DEVICE)


In [6]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for batch in tqdm(train_loader):
        batch = batch.to(DEVICE)

        recon = model(batch)
        loss = criterion(recon, batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss / len(train_loader):.6f}")


100%|██████████| 288/288 [00:59<00:00,  4.82it/s]


Epoch 1/8 | Loss: 0.028406


100%|██████████| 288/288 [00:25<00:00, 11.37it/s]


Epoch 2/8 | Loss: 0.000908


100%|██████████| 288/288 [00:24<00:00, 11.83it/s]


Epoch 3/8 | Loss: 0.000481


100%|██████████| 288/288 [00:24<00:00, 11.57it/s]


Epoch 4/8 | Loss: 0.000429


100%|██████████| 288/288 [00:24<00:00, 11.95it/s]


Epoch 5/8 | Loss: 0.000402


100%|██████████| 288/288 [00:24<00:00, 11.68it/s]


Epoch 6/8 | Loss: 0.000378


100%|██████████| 288/288 [00:24<00:00, 11.62it/s]


Epoch 7/8 | Loss: 0.000363


100%|██████████| 288/288 [00:24<00:00, 11.78it/s]

Epoch 8/8 | Loss: 0.000352





In [7]:
def patch_error_map(recon, target, patch=16, topk=3):
    err = (recon - target).pow(2)
    err = F.avg_pool2d(err, kernel_size=patch, stride=patch)
    err = err.view(err.size(0), -1)
    topk_vals, _ = torch.topk(err, k=topk, dim=1)
    return topk_vals.mean(dim=1)


In [8]:
model.eval()
anomaly_scores = []

with torch.no_grad():
    for clip in tqdm(sorted(os.listdir(TEST_VIDEOS))):
        clip_path = os.path.join(TEST_VIDEOS, clip)
        frames = sorted(os.listdir(clip_path))

        for i in range(1, len(frames)):
            prev = cv2.imread(os.path.join(clip_path, frames[i-1]), cv2.IMREAD_GRAYSCALE)
            curr = cv2.imread(os.path.join(clip_path, frames[i]), cv2.IMREAD_GRAYSCALE)

            prev = cv2.resize(prev, (IMG_SIZE, IMG_SIZE))
            curr = cv2.resize(curr, (IMG_SIZE, IMG_SIZE))

            diff = np.abs(curr.astype(np.float32) - prev.astype(np.float32)) / 255.0
            diff = torch.tensor(diff).unsqueeze(0).unsqueeze(0).to(DEVICE)

            recon = model(diff)
            score = patch_error_map(recon, diff).item()

            fid = int(frames[i].replace("frame_", "").replace(".jpg", ""))
            anomaly_scores.append({
                "Id": f"{int(clip)}_{fid}",
                "raw_score": score
            })


100%|██████████| 21/21 [02:15<00:00,  6.45s/it]


In [9]:
# Build DataFrame
df = pd.DataFrame(anomaly_scores)

df[['vid', 'fid']] = df['Id'].str.split('_', expand=True).astype(int)
df = df.sort_values(['vid', 'fid']).reset_index(drop=True)


In [15]:
rows_to_add = []

for vid, g in df.groupby("vid"):
    g = g.sort_values("fid")

    # frame_1 exists (first valid diff)
    first = g.iloc[0].copy()

    # create frame_0 with SAME score
    first["fid"] = first["fid"] - 1
    first["Id"] = f"{vid}_{first['fid']}"

    rows_to_add.append(first)

df = pd.concat([df, pd.DataFrame(rows_to_add)], ignore_index=True)
df = df.sort_values(["vid", "fid"]).reset_index(drop=True)


In [16]:
print("Rows:", len(df))
print("Unique Ids:", df["Id"].nunique())


Rows: 11706
Unique Ids: 11706


In [17]:
# Per-video z-score (CRITICAL)
df["zscore"] = (
    df.groupby("vid")["raw_score"]
      .transform(lambda x: (x - x.mean()) / (x.std() + 1e-6))
)


In [18]:
# Convert to AUC-safe ranking
df["Predicted"] = df["zscore"].rank(pct=True)


In [19]:
# Build submission
submission = df[["Id", "Predicted"]].copy()


In [20]:
print(submission.isna().sum())
print(len(submission), submission["Id"].nunique())
submission.head()



Id           0
Predicted    0
dtype: int64
11706 11706


Unnamed: 0,Id,Predicted
0,1_939,0.681146
1,1_940,0.681146
2,1_941,0.753716
3,1_942,0.777379
4,1_943,0.785495


In [21]:
submission.to_csv("/kaggle/working/submission.csv", index=False)
