In [None]:
import cv2
import os
import numpy as np
import torch
import torch.nn.functional as F
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import time
import networkx as nx

# ===============================
# GPU SSIM Implementation (Torch - Gaussian Kernel)
# ===============================
# Precompute 1D Gaussian kernel
def gaussian_kernel(size, sigma, channels):
    coords = torch.arange(size).float() - size / 2 + 0.5
    gauss = torch.exp(-(coords**2) / (2 * sigma**2))
    gauss = gauss / gauss.sum()
    kernel_1d = gauss.unsqueeze(1)
    kernel_2d = torch.matmul(kernel_1d, kernel_1d.t()).unsqueeze(0).unsqueeze(0)
    kernel = kernel_2d.expand(channels, 1, size, size).contiguous()
    return kernel

# Global SSIM setup (11x11 window is standard)
SSIM_SIZE = 11
SSIM_SIGMA = 1.5
SSIM_CHANNELS = 3
SSIM_PAD = SSIM_SIZE // 2
SSIM_KERNEL = gaussian_kernel(SSIM_SIZE, SSIM_SIGMA, SSIM_CHANNELS)

def ssim_torch_gaussian(img1, img2, kernel=SSIM_KERNEL):
    # Ensure kernel is on the correct device
    kernel = kernel.to(img1.device)
    
    C1, C2 = 0.01**2, 0.03**2

    # Convolution with Gaussian kernel for mean and variance
    def conv_gauss(img):
        return F.conv2d(img, kernel, groups=SSIM_CHANNELS, padding=SSIM_PAD)

    mu1 = conv_gauss(img1)
    mu2 = conv_gauss(img2)
    
    mu1_sq, mu2_sq, mu1_mu2 = mu1**2, mu2**2, mu1 * mu2
    
    sigma1_sq = conv_gauss(img1 * img1) - mu1_sq
    sigma2_sq = conv_gauss(img2 * img2) - mu2_sq
    sigma12 = conv_gauss(img1 * img2) - mu1_mu2
    
    # SSIM formula
    numerator = (2 * mu1_mu2 + C1) * (2 * sigma12 + C2)
    denominator = (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)
    ssim_map = numerator / denominator
    
    # Mean across all dimensions
    return ssim_map.mean().item()

# ===============================
# Utility Functions
# ===============================

def extract_frames(video_path, output_dir="frames", resize=(640,360)):
    if not os.path.exists(video_path):
        print(f"❌ Video file '{video_path}' not found.")
        return 0, 30
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    count=0
    fps = int(cap.get(cv2.CAP_PROP_FPS)) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if resize:
            frame = cv2.resize(frame, resize)
        cv2.imwrite(f"{output_dir}/frame_{count:03d}.jpg", frame)
        count+=1
    cap.release()
    print(f"✅ Extracted {count} frames.")
    return count, fps

def to_tensor(frame, device):
    if frame is None:
        return None
    return torch.from_numpy(frame).permute(2,0,1).unsqueeze(0).float().to(device)/255.0

# ===============================
# Robust Combined Similarity
# ===============================

def combined_similarity_robust(p, frames, tensors, device, use_ssim=True):
    i, j = p
    frame1, frame2 = frames[i], frames[j]

    # --- 1. Histogram similarity (color-based) ---
    hist1 = cv2.calcHist([frame1],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
    hist2 = cv2.calcHist([frame2],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
    hist_score = (cv2.compareHist(hist1,hist2,cv2.HISTCMP_CORREL) + 1) / 2
    
    # --- 2. Optical flow (motion) ---
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(gray1,gray2,None,0.5,3,15,3,5,1.2,0)
    flow_magnitude = np.mean(np.sqrt(flow[...,0]**2 + flow[...,1]**2))
    flow_score = np.clip(1/(1+flow_magnitude), 0, 1)

    # --- 3. SSIM (Structural, GPU or slow CPU) ---
    ssim_score = 0.0
    if use_ssim and tensors[i] is not None and tensors[j] is not None:
        with torch.no_grad():
            ssim_score = ssim_torch_gaussian(tensors[i], tensors[j])
        ssim_score = np.clip(ssim_score, 0, 1)
        
    # --- 4. Weighted combination ---
    weight_ssim = 0.5 if use_ssim else 0.0
    weight_hist = 0.45 if use_ssim else 0.7
    weight_flow = 0.05 if use_ssim else 0.3
    final_score = weight_ssim * ssim_score + weight_hist * hist_score + weight_flow * flow_score
    
    return i, j, final_score

# ===============================
# Build Similarity Matrix (Full Parallel)
# ===============================
def build_similarity_matrix_full(frames, tensors, device):
    n = len(frames)
    similarity = np.zeros((n,n))
    pairs = [(i,j) for i in range(n) for j in range(i+1, n)]
    
    use_ssim = device.type != 'cpu'
    if not use_ssim:
        print("⚠️ Running on CPU: SSIM is disabled to save time. Using Hist+Flow only.")
    
    func_args = [(p, frames, tensors, device, use_ssim) for p in pairs]
    print("⚙️ Computing full similarity matrix (parallel)...")
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(
            lambda args: combined_similarity_robust(*args),
            func_args
        ), total=len(pairs)))

    for i, j, score in results:
        similarity[i][j] = similarity[j][i] = score
        
    return similarity

# ===============================
# Traveling Salesperson Problem (TSP) Approximation
# ===============================
def reconstruct_order_tsp(similarity):
    n = len(similarity)
    G = nx.Graph()
    
    for i in range(n): G.add_node(i)

    for i in range(n):
        for j in range(i+1, n):
            cost = 1.0 - similarity[i][j] 
            G.add_edge(i, j, weight=cost)
            
    order = [0]
    visited = {0}
    current = 0

    for _ in range(n - 1):
        best_neighbor = -1
        min_cost = float('inf')
        
        for neighbor in G.neighbors(current):
            if neighbor not in visited:
                cost = G[current][neighbor]['weight']
                if cost < min_cost:
                    min_cost = cost
                    best_neighbor = neighbor
        
        if best_neighbor != -1:
            order.append(best_neighbor)
            visited.add(best_neighbor)
            current = best_neighbor
        else:
            remaining = [i for i in range(n) if i not in visited]
            if remaining:
                current = remaining[0]
                order.append(current)
                visited.add(current)
            
    return order

# ===============================
# Save Reconstructed Video
# ===============================
def rebuild_video(order, frame_dir="frames", output_file="reconstructed_forward_motion.mp4", fps=30):
    if len(order)==0:
        print("⚠️ No frames to rebuild video.")
        return
    sample = cv2.imread(f"{frame_dir}/frame_000.jpg")
    if sample is None:
        print(f"❌ Frame file '{frame_dir}/frame_000.jpg' not found.")
        return
    h,w,_ = sample.shape
    
    # Use MP4V as primary codec
    out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w,h))
    if not out.isOpened():
        # Fallback to XVID
        out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'XVID'), fps, (w,h))
        if not out.isOpened():
            print("❌ Failed to initialize video writer with MP4V or XVID.")
            return

    for idx in order:
        frame = cv2.imread(f"{frame_dir}/frame_{idx:03d}.jpg")
        if frame is not None:
            out.write(frame)
    out.release()
    print(f"🎬 Reconstructed video saved as {output_file}")
    
# ===============================
# MAIN FUNCTION (GPU/CPU with Automatic Direction Fix)
# ===============================
def main():
    start = time.time()
    # Assume the jumbled video is named this by the user
    video_path = "jumbled_video.mp4" 
    
    # Choose device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"🚀 Using device: {device}")
    
    # --- 1. Frame Extraction (Resize for faster processing) ---
    total, fps_extracted = extract_frames(video_path, resize=(640,360))
    if total==0:
        return
    fps = fps_extracted

    frames = [cv2.imread(f"frames/frame_{i:03d}.jpg") for i in range(total)]
    frames = [f for f in frames if f is not None]
    if len(frames)==0:
        print("❌ No valid frames loaded. Exiting.")
        return
    
    # --- 2. Tensor Conversion (Needed for SSIM) ---
    tensors = [to_tensor(f, device) for f in frames]

    # --- 3. Similarity Matrix (Full N x N) ---
    similarity = build_similarity_matrix_full(frames, tensors, device)
    
    # --- 4. Order Reconstruction (TSP/Greedy) ---
    order = reconstruct_order_tsp(similarity)
    
    # --- AUTOMATIC DIRECTION FIX ---
    # Since testing confirmed the smooth order is often the reverse of the desired motion,
    # we permanently reverse the list to ensure the final output shows forward motion.
    print("🏃 Reversing reconstructed order to guarantee forward motion.")
    order.reverse() 
    # -------------------------------
    
    # --- 5. Video Rebuild and Cleanup ---
    rebuild_video(order, output_file="reconstructed_FORWARD_motion.mp4", fps=fps)

    end = time.time()
    print(f"⏱️ Total execution time (Fixed Full Matrix): {end-start:.2f} seconds")

if __name__=="__main__":
    main()

🚀 Using device: cpu
✅ Extracted 359 frames.
⚠️ Running on CPU: SSIM is disabled to save time. Using Hist+Flow only.
⚙️ Computing full similarity matrix (parallel)...


100%|██████████| 64261/64261 [09:43<00:00, 110.04it/s]


🎬 Reconstructed video saved as reconstructed_robust.mp4
⏱️ Total execution time (Fixed Full Matrix): 591.29 seconds


In [None]:
!pip install -r requirements.txt
!sudo pip3 install opencv-python


Defaulting to user installation because normal site-packages is not writeable


Sudo is disabled on this machine. To enable it, go to the ]8;;ms-settings:developers\Developer Settings page]8;;\ in the Settings app


In [1]:
import cv2
import os
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from scipy.ndimage import uniform_filter # Used for fast CPU SSIM
from tqdm import tqdm
import time
import networkx as nx

# --- Configuration ---
# Set the target resolution for all frame processing to save CPU/RAM.
# (1280, 720) is 720p; (640, 360) is 360p.
TARGET_RESIZE = (640, 360) 
# ---------------------

# ===============================
# CPU SSIM Implementation (NumPy/SciPy - Fast)
# ===============================

def ssim_numpy(img1, img2, L=1.0, k1=0.01, k2=0.03, filter_size=11):
    """Calculates SSIM for a pair of images using NumPy/SciPy (CPU-only)."""
    img1 = img1.astype(np.float64) / 255.0
    img2 = img2.astype(np.float64) / 255.0
    
    C1 = (k1 * L) ** 2
    C2 = (k2 * L) ** 2
    # uniform_filter is a fast, CPU-friendly alternative to Gaussian blur
    filter_func = uniform_filter
    
    mu1 = filter_func(img1, filter_size)
    mu2 = filter_func(img2, filter_size)
    
    mu1_sq, mu2_sq, mu1_mu2 = mu1 * mu1, mu2 * mu2, mu1 * mu2
    
    sigma1_sq = filter_func(img1 * img1, filter_size) - mu1_sq
    sigma2_sq = filter_func(img2 * img2, filter_size) - mu2_sq
    sigma12 = filter_func(img1 * img2, filter_size) - mu1_mu2
    
    # SSIM formula
    numerator = (2 * mu1_mu2 + C1) * (2 * sigma12 + C2)
    denominator = (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)
    
    ssim_map = np.divide(numerator, denominator, out=np.zeros_like(numerator), where=denominator!=0)
    
    return ssim_map.mean()

# ===============================
# VIDEO JUMBLING UTILITY (Memory Fix Included)
# ===============================

def jumble_and_save(clean_video_path, output_jumbled_path="test_jumbled_video.mp4", target_resize=TARGET_RESIZE):
    """
    Extracts frames from a clean video, shuffles them, and saves a new jumbled video.
    Frames are resized BEFORE being stored to prevent OutOfMemory errors.
    """
    if not os.path.exists(clean_video_path):
        print(f"❌ Error: Clean video file not found at '{clean_video_path}'.")
        return False, None, 0
        
    print(f"🎬 Reading frames from: {clean_video_path}")
    cap = cv2.VideoCapture(clean_video_path)
    frames = []
    
    if cap.isOpened():
        # Retrieve original FPS for accurate output
        fps = int(cap.get(cv2.CAP_PROP_FPS)) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30
    else:
        print("❌ Error: Could not open the video file.")
        return False, None, 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # --- MEMORY FIX: Resize the frame before storing ---
        if target_resize is not None:
            frame = cv2.resize(frame, target_resize)
        # --------------------------------------------------

        frames.append(frame)
    cap.release()
    
    num_frames = len(frames)
    if num_frames == 0:
        print("❌ Error: Video contains no readable frames.")
        return False, None, 0
        
    # Jumble the Order
    jumbled_indices = np.arange(num_frames)
    np.random.shuffle(jumbled_indices)

    # Rebuild Video in Jumbled Order
    h, w, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_jumbled_path, fourcc, fps, (w, h))
    
    for idx in jumbled_indices:
        out.write(frames[idx])
        
    out.release()
    print(f"✅ Success! Jumbled video saved as: {output_jumbled_path}")
    return True, output_jumbled_path, fps

# ===============================
# Reconstruction Utilities
# ===============================

def extract_frames(video_path, output_dir="frames", resize=TARGET_RESIZE):
    if not os.path.exists(video_path):
        print(f"❌ Video file '{video_path}' not found.")
        return 0, 30
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    count=0
    
    fps = int(cap.get(cv2.CAP_PROP_FPS)) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if resize:
            frame = cv2.resize(frame, resize)
        cv2.imwrite(f"{output_dir}/frame_{count:03d}.jpg", frame)
        count+=1
    cap.release()
    print(f"✅ Extracted {count} frames.")
    return count, fps


def combined_similarity_robust_cpu(p, frames):
    """Calculates combined similarity for a pair p=(i, j) on CPU."""
    i, j = p
    frame1, frame2 = frames[i], frames[j]

    # --- 1. Histogram similarity (Color) ---
    hist1 = cv2.calcHist([frame1],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
    hist2 = cv2.calcHist([frame2],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
    hist_score = (cv2.compareHist(hist1,hist2,cv2.HISTCMP_CORREL) + 1) / 2
    
    # --- 2. Optical flow (Motion) ---
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(gray1,gray2,None,0.5,3,15,3,5,1.2,0)
    flow_magnitude = np.mean(np.sqrt(flow[...,0]**2 + flow[...,1]**2))
    flow_score = np.clip(1/(1+flow_magnitude), 0, 1)

    # --- 3. SSIM (Structure) ---
    ssim_score = ssim_numpy(frame1, frame2)
    ssim_score = np.clip(ssim_score, 0, 1)
        
    # --- 4. Weighted combination (CPU optimized weights) ---
    weight_ssim = 0.4 
    weight_hist = 0.5 
    weight_flow = 0.1 
    
    final_score = weight_ssim * ssim_score + weight_hist * hist_score + weight_flow * flow_score
    
    return i, j, final_score


def build_similarity_matrix_full(frames):
    n = len(frames)
    similarity = np.zeros((n,n))
    pairs = [(i,j) for i in range(n) for j in range(i+1, n)]
    
    print("⚙️ Computing full similarity matrix (parallel, CPU optimized)...")
    with ThreadPoolExecutor() as executor:
        func_args = [(p, frames) for p in pairs]
        
        results = list(tqdm(executor.map(
            lambda args: combined_similarity_robust_cpu(*args),
            func_args
        ), total=len(pairs)))

    for i, j, score in results:
        similarity[i][j] = similarity[j][i] = score
        
    return similarity


def reconstruct_order_tsp(similarity):
    n = len(similarity)
    G = nx.Graph()
    
    for i in range(n):
        G.add_node(i)

    for i in range(n):
        for j in range(i+1, n):
            cost = 1.0 - similarity[i][j] 
            G.add_edge(i, j, weight=cost)
            
    order = [0]
    visited = {0}
    current = 0

    for _ in range(n - 1):
        best_neighbor = -1
        min_cost = float('inf')
        
        for neighbor in G.neighbors(current):
            if neighbor not in visited:
                cost = G[current][neighbor]['weight']
                if cost < min_cost:
                    min_cost = cost
                    best_neighbor = neighbor
        
        if best_neighbor != -1:
            order.append(best_neighbor)
            visited.add(best_neighbor)
            current = best_neighbor
        else:
            remaining = [i for i in range(n) if i not in visited]
            if remaining:
                current = remaining[0]
                order.append(current)
                visited.add(current)
            
    return order


def rebuild_video(order, frame_dir="frames", output_file="reconstructed_cpu_optimized_FORWARD.mp4", fps=30):
    if len(order)==0:
        print("⚠️ No frames to rebuild video.")
        return
    
    sample = cv2.imread(f"{frame_dir}/frame_000.jpg")
    if sample is None:
        print(f"❌ Frame file '{frame_dir}/frame_000.jpg' not found.")
        return
    h,w,_ = sample.shape
    
    out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w,h))
    if not out.isOpened():
        out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'XVID'), fps, (w,h))
        if not out.isOpened():
            print("❌ Failed to initialize video writer.")
            return

    for idx in order:
        frame = cv2.imread(f"{frame_dir}/frame_{idx:03d}.jpg")
        if frame is not None:
            out.write(frame)
    out.release()
    print(f"🎬 Reconstructed video saved as {output_file}")


# ===============================
# MAIN FUNCTION (CPU)
# ===============================
def main():
    start = time.time()
    video_path = "jumbled_video.mp4"
    fps = 30
    
    print(f"--- CPU Video Processor (Resize: {TARGET_RESIZE}) ---")
    
    # --- MODE SELECTION ---
    mode = input("Select mode: (1) Jumble new video OR (2) Reconstruct 'jumbled_video.mp4': ").strip()
    
    if mode == '1':
        clean_path = input("Enter path to your CLEAN source video (e.g., 'clean.mp4'): ").strip()
        jumbled_path = "test_jumbled_video.mp4"
        success, video_path, fps = jumble_and_save(clean_path, jumbled_path)
        if not success:
            return
        print(f"Ready to reconstruct: {video_path}")
    
    # --- 1. Frame Extraction ---
    total, fps_extracted = extract_frames(video_path, resize=TARGET_RESIZE)
    if total==0:
        return
    # Use the FPS extracted from the video for reconstruction
    fps = fps_extracted

    frames = [cv2.imread(f"frames/frame_{i:03d}.jpg") for i in range(total)]
    frames = [f for f in frames if f is not None]
    if len(frames)==0:
        print("❌ No valid frames loaded. Exiting.")
        return
    
    # --- 2. Similarity Matrix (Full N x N) ---
    similarity = build_similarity_matrix_full(frames)
    
    # --- 3. Order Reconstruction (TSP/Greedy) ---
    order = reconstruct_order_tsp(similarity)
    
    # --- 4. Apply Direction Choice ---
    print("\n--- Motion Direction Check ---")
    direction_choice = input("Enter desired output motion (forward / reverse): ").strip().lower()
    reverse_motion = (direction_choice == "reverse")

    if reverse_motion:
        print("🏃 Reversing the frame order based on user choice.")
        order.reverse() 
        output_file_name = "reconstructed_cpu_optimized_REVERSED.mp4"
    else:
        output_file_name = "reconstructed_cpu_optimized_FORWARD.mp4"
    
    # --- 5. Video Rebuild and Execution Log ---
    rebuild_video(order, output_file=output_file_name, fps=fps)

    end = time.time()
    total_time = end - start
    print(f"⏱️ Total execution time (CPU Optimized): {total_time:.2f} seconds")

    # Log results for the challenge deliverable
    with open("execution_log_cpu.txt", "w") as log:
        log.write(f"Device: CPU-Optimized (NumPy SSIM)\n")
        log.write(f"Source Video: {video_path}\n")
        log.write(f"Resize Resolution: {TARGET_RESIZE}\n")
        log.write(f"Total Execution Time: {total_time:.2f} sec\n")
        log.write(f"Output File: {output_file_name}\n")
    print("📄 Logged execution time in execution_log_cpu.txt")

if __name__=="__main__":
    main()

--- CPU Video Processor (Resize: (640, 360)) ---
🎬 Reading frames from: my_clean_source.mp4
✅ Success! Jumbled video saved as: test_jumbled_video.mp4
Ready to reconstruct: test_jumbled_video.mp4
✅ Extracted 359 frames.
⚙️ Computing full similarity matrix (parallel, CPU optimized)...


  1%|          | 539/64261 [00:28<56:57, 18.65it/s]  


KeyboardInterrupt: 