## Project Structure
```
2DIP_exercise/
│-- data/             # Contains images & videos
│   │-- input/        # 1 image and 1 video for each phase respectively
│   │-- output/       # All output images/videos must be stored here
│-- notebooks/        # Jupyter Notebooks for each phase
│   │-- part1.ipynb   # Image processing & feature extraction
│   │-- part2.ipynb   # Optical flow, object detection and tracking 
│-- README.md         # Project instructions
```

In [None]:
# imports
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# define paths
base_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
inputs = os.path.join(base_path, 'data','input')
outputs = os.path.join(base_path, 'data','output')

## Supplementary Code for Visualization

In [3]:
def display_images(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8, 6))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.show()

In [None]:
def get_frames(video_path):
    # Re-open the video
    cap = cv2.VideoCapture(video_path)

    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Convert BGR to RGB for matplotlib
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame_rgb)

    cap.release()

    return frames

In [5]:
import matplotlib.animation as animation
from IPython.display import HTML

import matplotlib as mpl
mpl.rcParams['animation.embed_limit'] = 100

def display_video(video_path):
    
    frames = get_frames(video_path)

    fig, ax = plt.subplots()
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
    im = ax.imshow(np.zeros_like(frames[0]))
    ax.axis('off')

    def update(frame):
        im.set_array(frame)
        return [im]

    ani = animation.FuncAnimation(fig, update, frames=frames, interval=50, blit=True, repeat=False)

    plt.close(fig)

    return ani

## Task 1 : Analyze movement patterns in a video sequence. **(6)**

a) Compute dense optical flow for each frame in a video of a moving crowd. **(2)**

b) Visualize the movement patterns in 2 different ways. **(2+2)**

In [None]:
def optical_flow(video_path, hsv_output, arrow_output):
    
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    hsv_writer = cv2.VideoWriter(hsv_output, fourcc, fps, (w,h))
    arrow_writer = cv2.VideoWriter(arrow_output, fourcc, fps, (w,h))
    ok, prev = cap.read()
    prev_g = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)

    while True:
        ok, frame = cap.read()
        if not ok:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = _dense_flow(prev_g, gray)
        hsv_img = _flow_to_hsv(flow)
        arrow_img = _draw_arrows(frame, flow)
        hsv_writer.write(hsv_img)
        arrow_writer.write(arrow_img)
        prev_g = gray
 
    for wtr in (cap, hsv_writer, arrow_writer):
        wtr.release()

def _dense_flow(prev_g, curr_g):
    return cv2.calcOpticalFlowFarneback(
        prev_g, curr_g, None,
        pyr_scale = 0.3, levels = 3,
        winsize = 15, iterations = 3,
        poly_n = 5, poly_sigma = 1.2,
        flags = 0
    )

def _flow_to_hsv(flow):
    mag, ang = cv2.cartToPolar(flow[..., 0],flow[..., 1])
    hsv = np.zeros((*mag.shape, 3), dtype = np.uint8)
    hsv[..., 0] = ang * 90 / np.pi
    hsv[..., 1] = 255
    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

def _draw_arrows(frame, flow, step = 16, color = (0,255,0)):
    h,w = frame.shape[:2]
    arrows = frame.copy()
    for y in range(0, h, step):
        for x in range(0, w, step):
            dx, dy = flow[y, x].astype(int)
            cv2.arrowedLine(
                arrows, (x, y), (x + dx, y + dy),
                color, 1, tipLength = 0.3
            )
    return arrows

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
output_path1 = os.path.join(outputs, 'optical_flow_1.mp4')  # Output visualization video path
output_path2 = os.path.join(outputs, 'optical_flow_2.mp4')  # Output visualization video path

optical_flow(video_path, output_path1, output_path2)

In [None]:
ani = display_video(output_path1)
HTML(ani.to_jshtml())

In [None]:
ani = display_video(output_path2)
HTML(ani.to_jshtml())

## Task 2 : Identify and track a moving object in a video sequence. **(9)**

a) Detect an object using template matching. The output would be the first frame where it appears, with a bounding box around the detected object. **(2)**

In [None]:
def locate_object(video_path, template_path, output_path):
    cap=cv2.VideoCapture(video_path)
    template=cv2.imread(template_path, 0)
    w,h=template.shape[::-1]
    method=cv2.TM_CCOEFF_NORMED
    threshold=0.9 
    frame_count=0
    detected_frame=None

    while cap.isOpened():
        ret,frame=cap.read()
        if not ret:
            break
        frame_gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
        tmpm=cv2.matchTemplate(frame_gray,template,method)
        _,max_val,_,max_loc=cv2.minMaxLoc(tmpm)
    
        if max_val>=threshold:
            top_left=max_loc
            bottom_right=(top_left[0]+w, top_left[1]+h)
            cv2.rectangle(frame,top_left,bottom_right,(0,255,0),3)
            print(f"Object detected in the frame {frame_count}")
            cv2.imwrite(output_path,frame)
            detected_frame=frame.copy()  
            break

        frame_count +=1

    cap.release()
    cv2.destroyAllWindows()
        
    return detected_frame

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
template_path = os.path.join(inputs, 'template.png')  # Replace with your template image path
output_path = os.path.join(outputs, 'detected_object.jpg')  # Output video path

image = locate_object(video_path, template_path, output_path)
display_images(image)

b) Implement a Kalman filter to predict the object's position in subsequent frames. **(5)**

In [None]:
def track(video_path, template_path, output_path):
    template=cv2.imread(template_path,0)
    w,h= template.shape[::-1] 
    cap=cv2.VideoCapture(video_path)
    method=cv2.TM_CCOEFF_NORMED
    ret,frame=cap.read()

    frame_height,frame_width=frame.shape[:2]
    print('W :',frame_height,'H :',frame_width)
    fps=cap.get(cv2.CAP_PROP_FPS)
    fourcc=cv2.VideoWriter_fourcc(*'mp4v')
    out=cv2.VideoWriter(output_path,fourcc,fps,(frame_width,frame_height))
 
    kalman= cv2.KalmanFilter(4,2)
    kalman.transitionMatrix=np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]],np.float32)

    kalman.measurementMatrix= np.eye(2, 4, dtype=np.float32)
    kalman.measurementMatrix= np.array([[1,0,0,0],[0,1,0,0]],np.float32)

    kalman.processNoiseCov=np.eye(4,dtype=np.float32)*0.8

    initialized= False
    frame_idx= 0
    while cap.isOpened():
        ret,frame=cap.read()
        if not ret:
            break
    
        frame_gray= cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        tmpm=cv2.matchTemplate(frame_gray, template,method) 
        _,_,max_val,max_loc= cv2.minMaxLoc(tmpm)

        x,y =max_loc[0]+w//2,max_loc[1]+h//2
        if not initialized:
            kalman.statePre=np.array([[x],[y],[0],[0]], np.float32)
            initialized= True
        pred= kalman.predict()
        mx,my= max_loc[0] + w//2, max_loc[1] + h//2
        kalman.correct(np.array([[np.float32(mx)], [np.float32(my)]]))

        cv2.rectangle(frame,max_loc,(max_loc[0]+w,max_loc[1]+h),(0,255,0),2)
        cv2.circle(frame,(int(pred[0]),int(pred[1])),6,(0,0,255),-1)

        out.write(frame)
        frame_idx += 1
    cap.release()
    cv2.destroyAllWindows()
    return output_path

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
template_path = os.path.join(inputs, 'template.png')  # Replace with your template image path
output_path = os.path.join(outputs, 'tracked_object.mp4')  # Output video path

track(video_path, template_path, output_path)

In [None]:
ani = display_video(output_path)
HTML(ani.to_jshtml())

c) Compare Bayesian filtering and Kalman filtering (theoretically). **(2)**

In [None]:
#TODO c): 
# Bayesian filters are broad class of algorithms that are used to compute probability distribution of states given noisy measurements,
# while Kalman filter is a type of Bayesian filter that computes a point estimate(mean) and its covariance, not the whole distribution.
# Since Bayesian filter estimates the full probability distribution of states, that makes it computationally more expensive than Kalman filter. 
# Kalman filter works well and efficient only when linearity and gaussian noise is present, while Bayesian works for any type of noise and linearity/non-linearity.
# The above condition makes Bayesian more flexible to implement in various applications while Kalman filter is more suitable specific application.