## Project Structure
```
2DIP_exercise/
│-- data/             # Contains images & videos
│   │-- input/        # 1 image and 1 video for each phase respectively
│   │-- output/       # All output images/videos must be stored here
│-- notebooks/        # Jupyter Notebooks for each phase
│   │-- part1.ipynb   # Image processing & feature extraction
│   │-- part2.ipynb   # Optical flow, object detection and tracking 
│-- README.md         # Project instructions
```

In [1]:
# imports
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# define paths
base_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
inputs = os.path.join(base_path, 'data','input')
outputs = os.path.join(base_path, 'data','output')

## Supplementary Code for Visualization

In [3]:
def display_images(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8, 6))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.show()

In [4]:
def get_frames(video_path):
    # Re-open the video
    cap = cv2.VideoCapture(video_path)

    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Convert BGR to RGB for matplotlib
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame_rgb)

    cap.release()

    return frames

In [15]:
import matplotlib.animation as animation
from IPython.display import HTML

import matplotlib as mpl
mpl.rcParams['animation.embed_limit'] = 100

def display_video(video_path):
    
    frames = get_frames(video_path)

    fig, ax = plt.subplots()
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
    im = ax.imshow(np.zeros_like(frames[0]))
    ax.axis('off')

    def update(frame):
        im.set_array(frame)
        return [im]

    ani = animation.FuncAnimation(fig, update, frames=frames, interval=50, blit=True, repeat=False)

    plt.close(fig)

    return ani

## Task 1 : Analyze movement patterns in a video sequence. **(6)**

a) Compute dense optical flow for each frame in a video of a moving crowd. **(2)**

b) Visualize the movement patterns in 2 different ways. **(2+2)**

In [5]:
def hsv_color_coded_visualization(flow, frame):
    """
    Visualizes optical flow using HSV color coding. This method visualizes optical flow using the HSV color space, 
    where hue represents the direction of flow and value represents the magnitude.
    Args:
        flow: Optical flow array of shape (height, width, 2).
        frame: The original frame to overlay the flow visualization.
    Returns:
        color_vis: Color-coded optical flow visualization.
    """
    # get a 2-channel array with optical flow vectors (u,v) and calculate magnitude and angle
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1]) # angle in radians
    hsv = np.zeros_like(frame)
    hsv[..., 1] = 255
    hsv[..., 0] = angle * 180 / np.pi / 2 # convert radians to degrees and scale to [0, 180] for hue
    hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
    flow_color = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    color_vis = cv2.addWeighted(frame, 0.6, flow_color, 0.4, 0)

    return flow_color, color_vis

In [11]:
def arrow_visualization(flow, frame, step=16):
    """
    Visualizes optical flow using arrows. This method draws arrows on the original frame to represent the flow vectors.
    Args:
        flow: Optical flow array of shape (height, width, 2).
        frame: The original frame to overlay the flow visualization.
        step: Step size for arrow placement.
    Returns:
        arrow_vis: Frame with arrows representing optical flow.
    """
    h, w = flow.shape[:2]
    y, x = np.mgrid[step // 2 : h : step, step // 2 : w : step]
    arrow_vis = frame.copy()
    for yi, xi in zip(y.flatten(), x.flatten()):
        fx, fy = flow[yi, xi]
        pt1 = (int(xi), int(yi))
        pt2 = (int(xi + fx), int(yi + fy))

        cv2.arrowedLine(
            img=arrow_vis,
            pt1=pt1,
            pt2=pt2,
            color=(0, 255, 0),
            thickness=1,
            tipLength=0.3,
        )
    return arrow_vis

In [12]:
def optical_flow(video_path, output_path1, output_path2, output_path3):
    """
    Compute and visualize dense optical flow for a video, saving both visualizations.

    Args:
        video_path (str): Path to input video file
        output_path1 (str): Path to save color-coded flow visualization
        output_path2 (str): Path to save arrow visualization
        output_path3 (str): Path to save flow color visualization
    """
    # Open video capture
    cap = cv2.VideoCapture(video_path)

    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Create VideoWriter objects
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out1 = cv2.VideoWriter(output_path1, fourcc, fps, (frame_width, frame_height))
    out2 = cv2.VideoWriter(output_path2, fourcc, fps, (frame_width, frame_height))
    out3 = cv2.VideoWriter(output_path3, fourcc, fps, (frame_width, frame_height))

    # Read first frame
    ret, old_frame = cap.read()
    if not ret:
        print("Error reading video")
        return

    # Convert to grayscale
    old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

    while True:
        # Read next frame
        ret, frame = cap.read()
        if not ret:
            break

        # Convert to grayscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Calculate optical flow
        flow = cv2.calcOpticalFlowFarneback(
            old_gray,
            frame_gray,
            None,
            pyr_scale=0.5,
            levels=5,
            winsize=15,
            iterations=5,
            poly_n=5,
            poly_sigma=1.2,
            flags=0,
        )

        # Generate hsv color-coded visualization
        flow_color, color_vis = hsv_color_coded_visualization(flow, frame)

        # Generate arrow visualization
        arrow_vis = arrow_visualization(flow, frame)

        # Write frames to output videos
        out1.write(color_vis)
        out2.write(arrow_vis)
        out3.write(flow_color)

        # Update previous frame
        old_gray = frame_gray

    # Release everything
    cap.release()
    out1.release()
    out2.release()
    out3.release()
    cv2.destroyAllWindows()

In [13]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
output_path1 = os.path.join(outputs, 'optical_flow_1.mp4')  # Output visualization video path
output_path2 = os.path.join(outputs, 'optical_flow_2.mp4')  # Output visualization video path
output_path3 = os.path.join(outputs, 'optical_flow_3.mp4')  # Output visualization video path

optical_flow(video_path, output_path1, output_path2, output_path3)

In [None]:
ani = display_video(output_path1)
HTML(ani.to_jshtml())

In [None]:
ani = display_video(output_path2)
HTML(ani.to_jshtml())

In [None]:
ani = display_video(output_path3)
HTML(ani.to_jshtml())

## Task 2 : Identify and track a moving object in a video sequence. **(9)**

a) Detect an object using template matching. The output would be the first frame where it appears, with a bounding box around the detected object. **(2)**

In [8]:
def locate_object(video_path, template_path, output_path):
    """
    Detect first occurrence of a template object in a video using template matching.
    
    Args:
        video_path (str): Path to input video file
        template_path (str): Path to template image file
        output_path (str): Path to save output frame with detection
    
    Returns:
        tuple: (frame_number, location) if object found, else None
    """
    # Read template
    template = cv2.imread(template_path)
    template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
    w, h = template_gray.shape[::-1]
    
    # Open video capture
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    
    # Template matching threshold
    threshold = 0.8
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        frame_count += 1
        
        # Convert frame to grayscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Perform template matching
        result = cv2.matchTemplate(frame_gray, template_gray, cv2.TM_CCOEFF_NORMED)
        
        # Find locations where matching exceeds threshold
        locations = np.where(result >= threshold)
        
        # Check if object is found
        if locations[0].size > 0:
            # Get the best match location
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
            top_left = max_loc
            bottom_right = (top_left[0] + w, top_left[1] + h)
            
            # Draw rectangle around detected object
            cv2.rectangle(frame, top_left, bottom_right, (0, 255, 0), 2)
            
            # Add text showing frame number
            text = f"Frame: {frame_count}"
            cv2.putText(frame, text, (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            # Save the frame with detection
            cv2.imwrite(output_path, frame)
            
            cap.release()
            return frame
    
    cap.release()
    return None

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
template_path = os.path.join(inputs, 'template.png')  # Replace with your template image path
output_path = os.path.join(outputs, 'detected_object.jpg')  # Output video path

image = locate_object(video_path, template_path, output_path)
display_images(image)

b) Implement a Kalman filter to predict the object's position in subsequent frames. **(5)**

In [None]:
def track(video_path, template_path, output_path):
    # TODO

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
template_path = os.path.join(inputs, 'template.png')  # Replace with your template image path
output_path = os.path.join(outputs, 'tracked_object.mp4')  # Output video path

track(video_path, template_path, output_path)

In [None]:
ani = display_video(output_path)
HTML(ani.to_jshtml())

c) Compare Bayesian filtering and Kalman filtering (theoretically). **(2)**

In [None]:
#TODO c):