In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os
import glob
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

print("GPU Available:", tf.config.list_physical_devices('GPU'))

INPUT_ROOT = "Avenue_Corrupted-20251221T112159Z-3-001\Avenue_Corrupted\Dataset" 

In [None]:
# --- CONFIGURATION ---

OUTPUT_ROOT_DATASET = "cleaned_data"


def fix_dataset(input_dir, output_dir, is_testing=False):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    folders = sorted(os.listdir(input_dir))
    
    print(f"Cleaning {len(folders)} folders in {'TESTING' if is_testing else 'TRAINING'} mode...")

    for folder in folders:
        in_path = os.path.join(input_dir, folder)
        out_path = os.path.join(output_dir, folder)
        
        if not os.path.isdir(in_path): continue
            
        os.makedirs(out_path, exist_ok=True)
        frames = sorted(glob.glob(os.path.join(in_path, "*.jpg")))
        
        last_valid_blur = None # Memory of the last good frame

        # Check if this is one of the "Bad Start" clips (13 or 19)
        # We check both "13" and "19" strings to be safe
        is_bad_start = is_testing and (folder in ['13', '19'] or folder.endswith('13') or folder.endswith('19'))

        for frame_path in tqdm(frames, desc=f"Folder {folder}", leave=False):
            img = cv2.imread(frame_path)
            if img is None: continue
            filename = os.path.basename(frame_path)

            # --- 1. FIX UPSIDE DOWN FRAMES (Test Set Only) ---
            if is_testing:
                # We blur the image A LOT to see big shapes (head, body)
                curr_blur = cv2.medianBlur(img, 11)
                
                if last_valid_blur is None:
                    # SPECIAL RULE: If it's Clip 13 or 19, the first frame is wrong.
                    if is_bad_start:
                        img = cv2.flip(img, 0)       # Flip the real image
                        curr_blur = cv2.flip(curr_blur, 0) # Flip our memory anchor
                    
                    last_valid_blur = curr_blur # Set this as the standard for the next frame
                else:
                    flipped_blur = cv2.flip(curr_blur, 0)
                    
                    # Compare: Is the Normal or Flipped version closer to the last frame?
                    diff_normal = np.mean((curr_blur - last_valid_blur) ** 2)
                    diff_flipped = np.mean((flipped_blur - last_valid_blur) ** 2)
                    
                    # If the flipped version is significantly better, use it
                    if diff_flipped < diff_normal:
                        img = cv2.flip(img, 0) # Flip the REAL image
                        last_valid_blur = flipped_blur
                    else:
                        last_valid_blur = curr_blur

            # --- 2. CLEAN THE NOISE (All Sets) ---
            # This filter removes the colored speckles
            img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)

            # Save the clean frame
            cv2.imwrite(os.path.join(out_path, filename), img)

# RUN THE CLEANER
# 1. Clean Training Data
fix_dataset(os.path.join(INPUT_ROOT, "training_videos"), 
            os.path.join(OUTPUT_ROOT_DATASET, "training_videos"), is_testing=False)

# 2. Clean Testing Data
fix_dataset(os.path.join(INPUT_ROOT, "testing_videos"), 
            os.path.join(OUTPUT_ROOT_DATASET, "testing_videos"), is_testing=True)


TEST_DIR =  os.path.join(OUTPUT_ROOT_DATASET, "testing_videos")

print("ALL DATA CLEANED!")

In [None]:
from tensorflow.keras import layers, models, Input

def build_unet():
    # We take 4 frames of history (stacked together, so 12 channels)
    inputs = Input(shape=(256, 256, 12)) 
    
    # --- ENCODER (Squishing down) ---
    c1 = layers.Conv2D(64, 3, padding='same', activation='relu')(inputs)
    c1 = layers.Conv2D(64, 3, padding='same', activation='relu')(c1)
    p1 = layers.MaxPooling2D()(c1)
    
    c2 = layers.Conv2D(128, 3, padding='same', activation='relu')(p1)
    c2 = layers.Conv2D(128, 3, padding='same', activation='relu')(c2)
    p2 = layers.MaxPooling2D()(c2)
    
    c3 = layers.Conv2D(256, 3, padding='same', activation='relu')(p2)
    c3 = layers.Conv2D(256, 3, padding='same', activation='relu')(c3)
    p3 = layers.MaxPooling2D()(c3)
    
    # --- BOTTLENECK (The deepest part) ---
    c4 = layers.Conv2D(512, 3, padding='same', activation='relu')(p3)
    c4 = layers.Conv2D(512, 3, padding='same', activation='relu')(c4)
    
    # --- DECODER (Building back up) ---
    u1 = layers.Conv2DTranspose(256, 2, strides=(2, 2), padding='same')(c4)
    u1 = layers.concatenate([u1, c3]) # Skip connection!
    c5 = layers.Conv2D(256, 3, padding='same', activation='relu')(u1)
    c5 = layers.Conv2D(256, 3, padding='same', activation='relu')(c5)
    
    u2 = layers.Conv2DTranspose(128, 2, strides=(2, 2), padding='same')(c5)
    u2 = layers.concatenate([u2, c2])
    c6 = layers.Conv2D(128, 3, padding='same', activation='relu')(u2)
    c6 = layers.Conv2D(128, 3, padding='same', activation='relu')(c6)
    
    u3 = layers.Conv2DTranspose(64, 2, strides=(2, 2), padding='same')(c6)
    u3 = layers.concatenate([u3, c1])
    c7 = layers.Conv2D(64, 3, padding='same', activation='relu')(u3)
    c7 = layers.Conv2D(64, 3, padding='same', activation='relu')(c7)
    
    # Final prediction (1 frame, 3 channels RGB)
    outputs = layers.Conv2D(3, 3, padding='same', activation='tanh')(c7)
    
    return models.Model(inputs, outputs)

ai_brain = build_unet()
print("Brain built!")

In [None]:
def build_feature_discriminator(input_shape=(256, 256, 3)):
    initializer = tf.random_normal_initializer(0., 0.02)
    inputs = tf.keras.Input(shape=input_shape)
    
    def downsample(filters, size, apply_batchnorm=True):
        result = tf.keras.Sequential()
        result.add(tf.keras.layers.Conv2D(filters, size, strides=2, padding='same',
                                          kernel_initializer=initializer, use_bias=False))
        if apply_batchnorm:
            result.add(tf.keras.layers.BatchNormalization())
        result.add(tf.keras.layers.LeakyReLU())
        return result

    x = downsample(64, 4, apply_batchnorm=False)(inputs) # (128, 128, 64)
    f1 = downsample(128, 4)(x)                           # (64, 64, 128) -> FEATURE LAYER 1
    f2 = downsample(256, 4)(f1)                          # (32, 32, 256) -> FEATURE LAYER 2
    f3 = downsample(512, 4)(f2)                          # (16, 16, 512)
    
    output = tf.keras.layers.Conv2D(1, 4, strides=1, padding='same',
                                    kernel_initializer=initializer)(f3) 
    
    # Return Output AND Intermediate Features
    return tf.keras.Model(inputs=inputs, outputs=[output, f1, f2], name="Feature_Discriminator")

ai_discriminator = build_feature_discriminator()
print("Feature Matching Discriminator Built!")

In [None]:
import tensorflow as tf
import numpy as np
import os
import glob
import cv2

# --- 1. LOSS FUNCTIONS (Feature Matching + Motion) ---

def ohem_loss(predicted, target):
    """
    Online Hard Example Mining (OHEM).
    Instead of averaging ALL pixels, we only take the mean of the 
    WORST 10% of pixels. This forces the model to fix the anomalies.
    """
    # 1. Calculate absolute error per pixel
    abs_error = tf.abs(predicted - target)
    
    # 2. Flatten the errors to a 1D list
    # Shape: [Batch_Size * 256 * 256 * 3]
    flat_errors = tf.reshape(abs_error, [-1])
    
    # 3. Find the Top K (Worst 10% of errors)
    # Total pixels per batch
    k = tf.cast(tf.shape(flat_errors)[0], tf.float32) * 0.10 
    k = tf.cast(k, tf.int32)
    
    # Get the worst values
    worst_errors, _ = tf.math.top_k(flat_errors, k=k)
    
    # 4. Return the mean of ONLY the worst errors
    return tf.reduce_mean(worst_errors)

def feature_matching_loss(real_feats, fake_feats):
    """
    Forces the Generator to match the internal features of the Discriminator.
    This stabilizes training significantly.
    """
    loss = 0
    for r, f in zip(real_feats, fake_feats):
        loss += tf.reduce_mean(tf.abs(r - f))
    return loss

def ssim_loss(predicted, target):
    # Clip values to [-1, 1] to prevent NaN crashes
    pred = tf.clip_by_value(predicted, -1.0, 1.0)
    target = tf.clip_by_value(target, -1.0, 1.0)
    return 1.0 - tf.reduce_mean(tf.image.ssim(pred, target, max_val=2.0))


def get_total_generator_loss(predicted, target, d_fake_out, real_feats, fake_feats, last_history):
    # Weights
    LAM_INT = 1.0    
    LAM_SSIM = 1.0   
    LAM_GDL = 1.0    
    LAM_ADV = 0.05   
    LAM_FEAT = 1.0 
    
    # Standard Losses
    l_int = ohem_loss(predicted, target)

    l_ssim = ssim_loss(predicted, target)
    
    dy_pred, dx_pred = tf.image.image_gradients(predicted)
    dy_true, dx_true = tf.image.image_gradients(target)
    l_grad = tf.reduce_mean(tf.abs(tf.abs(dx_pred) - tf.abs(dx_true)) + 
                            tf.abs(tf.abs(dy_pred) - tf.abs(dy_true)))
    
    l_adv = 0.5 * tf.reduce_mean(tf.square(d_fake_out - 1.0))
    l_feat = feature_matching_loss(real_feats, fake_feats)
    
    total_loss = (LAM_INT * l_int) + (LAM_SSIM * l_ssim) + \
                 (LAM_GDL * l_grad) + (LAM_ADV * l_adv) + (LAM_FEAT * l_feat)
    
    return total_loss, l_int, l_ssim, l_feat

# --- 2. DATA GENERATOR (Standard) ---

def train_generator():
    train_path = os.path.join(OUTPUT_ROOT_DATASET, "training_videos")
    folders = glob.glob(os.path.join(train_path, "*"))
    
    if not folders:
        print("CRITICAL ERROR: No data found.")
        return

    while True:
        folder = np.random.choice(folders)
        files = sorted(glob.glob(os.path.join(folder, "*.jpg")))
        if len(files) < 5: continue
        idx = np.random.randint(0, len(files) - 5)
        
        # Random Flip (Augmentation)
        flip_h = np.random.random() > 0.5
        
        history = []
        for i in range(4):
            img = cv2.imread(files[idx+i])
            if img is None: continue
            img = cv2.resize(img, (256, 256))
            if flip_h: img = cv2.flip(img, 1)
            img = (img / 127.5) - 1.0 
            history.append(img)
            
        if len(history) < 4: continue
        last_history_frame = history[-1]
        history_stack = np.concatenate(history, axis=2)
        
        target = cv2.imread(files[idx+4])
        if target is None: continue
        target = cv2.resize(target, (256, 256))
        if flip_h: target = cv2.flip(target, 1)
        target = (target / 127.5) - 1.0
        
        yield (history_stack, target, last_history_frame)

# --- 3. TRAINING SETUP ---

dataset = tf.data.Dataset.from_generator(
    train_generator, 
    output_signature=(
        tf.TensorSpec(shape=(256, 256, 12), dtype=tf.float32),
        tf.TensorSpec(shape=(256, 256, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(256, 256, 3), dtype=tf.float32)
    )
).batch(4).prefetch(tf.data.AUTOTUNE)

# Learning Rate Schedule
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=2e-4, decay_steps=5000, decay_rate=0.9, staircase=True
)

# Optimizers with Clipnorm (Stability)
gen_optimizer = tf.keras.optimizers.Adam(lr_schedule, beta_1=0.5, clipnorm=1.0)
disc_optimizer = tf.keras.optimizers.Adam(lr_schedule, beta_1=0.5, clipnorm=1.0)

# Init Optimizers
print("Initializing optimizer variables...")
if len(ai_discriminator.trainable_variables) > 0:
    dummy_grads = [tf.zeros_like(w) for w in ai_discriminator.trainable_variables]
    disc_optimizer.apply_gradients(zip(dummy_grads, ai_discriminator.trainable_variables))
print("Optimizer Ready!")

@tf.function
def train_step(history_batch, target_batch, last_hist_batch, use_gan=True):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_frame = ai_brain(history_batch, training=True)
        
        # Discriminator returns [Output, Feat1, Feat2]
        d_real_out, r_f1, r_f2 = ai_discriminator(target_batch, training=True)
        d_fake_out, f_f1, f_f2 = ai_discriminator(generated_frame, training=True)
        
        d_loss = 0.5 * (tf.reduce_mean(tf.square(d_real_out - 1.0)) + 
                        tf.reduce_mean(tf.square(d_fake_out)))
        
        # Calculate Total Loss
        total_loss, l_int, l_ssim, l_feat = get_total_generator_loss(
            generated_frame, target_batch, d_fake_out, 
            [r_f1, r_f2], [f_f1, f_f2], last_hist_batch
        )
        
        if not use_gan:
            # Remove adversarial components during warmup
            total_loss = total_loss - (0.05 * 0.5 * tf.reduce_mean(tf.square(d_fake_out - 1.0)))

    grad_gen = gen_tape.gradient(total_loss, ai_brain.trainable_variables)
    gen_optimizer.apply_gradients(zip(grad_gen, ai_brain.trainable_variables))
    
    if use_gan:
        grad_disc = disc_tape.gradient(d_loss, ai_discriminator.trainable_variables)
        disc_optimizer.apply_gradients(zip(grad_disc, ai_discriminator.trainable_variables))
    
    return total_loss, d_loss, l_int, l_ssim, l_feat

# --- 4. EXECUTION ---
print("Starting Feature Matching Training...")
MAX_STEPS = 40000 
WARMUP_STEPS = 2000 

for step, (x_batch, y_batch, last_batch) in enumerate(dataset):
    if step > MAX_STEPS: break
    use_gan_now = (step > WARMUP_STEPS)
    g_loss, d_loss, val_int, val_ssim, val_feat = train_step(x_batch, y_batch, last_batch, use_gan_now)
    
    if step % 100 == 0:
        status = "WARMUP" if not use_gan_now else "GAN"
        curr_lr = lr_schedule(step).numpy()
        print(f"Step {step} [{status}] LR:{curr_lr:.6f} | G_Loss:{g_loss:.4f} | Feat_Loss:{val_feat:.4f} | SSIM:{val_ssim:.4f}")

print("Training Complete!")

In [None]:
import pandas as pd
import numpy as np
import glob
import os
import cv2
from scipy.ndimage import gaussian_filter1d
from tqdm.notebook import tqdm

OUTPUT_CSV = "submission.csv"
NUM_HIS = 4

def get_optical_flow_mask(prev_frame, curr_frame):
    """
    Calculates Dense Optical Flow (Farneback).
    Returns a mask where movement is happening.
    """
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
    
    # Calculate Flow
    flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 
                                        0.5, 3, 15, 3, 5, 1.2, 0)
    
    # Calculate Magnitude (Speed of movement)
    mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    
    # Normalize Magnitude to 0-1
    mag = cv2.normalize(mag, None, 0, 1, cv2.NORM_MINMAX)
    
    # Threshold: Only consider significant movement
    _, mask = cv2.threshold(mag, 0.2, 1.0, cv2.THRESH_BINARY)
    
    # Expand to 3 channels
    return np.stack([mask]*3, axis=-1)

def calculate_smart_psnr(pred, target, mask):
    pred_0_1 = (pred + 1.0) / 2.0
    target_0_1 = (target + 1.0) / 2.0
    diff_sq = (target_0_1 - pred_0_1) ** 2
    
    # Weight foreground (mask) 1.0, background 0.05
    weighted_diff = diff_sq * (mask + 0.05)
    
    mse = np.mean(weighted_diff)
    if mse == 0: return 100.0
    return 10 * np.log10(1.0 / mse)

results = []
all_pred_scores = []
all_ids = []

video_folders = sorted(os.listdir(TEST_DIR))
print(f"Generating Smart Flow Predictions...")

for folder_name in tqdm(video_folders):
    folder_path = os.path.join(TEST_DIR, folder_name)
    if not os.path.isdir(folder_path): continue
    try: folder_id = int(folder_name)
    except ValueError: continue 

    frame_files = sorted(glob.glob(os.path.join(folder_path, "*.jpg")))
    video_psnrs = []
    
    for i in range(len(frame_files)):
        # ID Construction
        filename = os.path.basename(frame_files[i])
        try: frame_id = int(''.join(filter(str.isdigit, filename)))
        except ValueError: frame_id = i
        all_ids.append(f"{folder_id}_{frame_id}")
        
        if i < NUM_HIS:
            video_psnrs.append(None)
            continue
            
        # Input Prep
        history = []
        for j in range(NUM_HIS):
            img = cv2.imread(frame_files[i - NUM_HIS + j])
            img = cv2.resize(img, (256, 256))
            img = (img / 127.5) - 1.0
            history.append(img)
        
        # Calculate Flow Mask using the LAST frame of history and CURRENT target
        # We need un-normalized frames for flow calculation
        prev_raw = cv2.imread(frame_files[i-1])
        curr_raw = cv2.imread(frame_files[i])
        prev_raw = cv2.resize(prev_raw, (256, 256))
        curr_raw = cv2.resize(curr_raw, (256, 256))
        
        flow_mask = get_optical_flow_mask(prev_raw, curr_raw)
        
        # Predict
        inp = np.concatenate(history, axis=2)
        inp = np.expand_dims(inp, axis=0)
        
        # TTA (Double Predict)
        pred_normal = ai_brain(inp, training=False)[0].numpy()
        inp_flipped = inp[:, :, ::-1, :]
        pred_flipped = ai_brain(inp_flipped, training=False)[0].numpy()[:, ::-1, :]
        pred_avg = (pred_normal + pred_flipped) / 2.0
        
        # Smart Scoring
        gt = (curr_raw.astype(np.float32) / 127.5) - 1.0
        psnr = calculate_smart_psnr(pred_avg, gt, flow_mask)
        video_psnrs.append(psnr)

    # Normalize & Invert (Same as before)
    valid = [x for x in video_psnrs if x is not None]
    if not valid: filled = [0.0]*len(video_psnrs)
    else:
        mx = max(valid)
        filled = [x if x is not None else mx for x in video_psnrs]
    
    smoothed = gaussian_filter1d(filled, sigma=4.0)
    all_pred_scores.extend(smoothed)

# Global Norm
all_scores_arr = np.array(all_pred_scores)
reg_scores = (all_scores_arr - all_scores_arr.min()) / (all_scores_arr.max() - all_scores_arr.min())
anomaly_scores = 1.0 - reg_scores

df = pd.DataFrame({"Id": all_ids, "Predicted": anomaly_scores})
df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved Smart Flow Predictions to {OUTPUT_CSV}")