## Project Structure
```
2DIP_exercise/
│-- data/             # Contains images & videos
│   │-- input/        # 1 image and 1 video for each phase respectively
│   │-- output/       # All output images/videos must be stored here
│-- notebooks/        # Jupyter Notebooks for each phase
│   │-- part1.ipynb   # Image processing & feature extraction
│   │-- part2.ipynb   # Optical flow, object detection and tracking 
│-- README.md         # Project instructions
```

In [None]:
# imports
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# define paths
base_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
inputs = os.path.join(base_path, 'data','input')
outputs = os.path.join(base_path, 'data','output')

## Supplementary Code for Visualization

In [3]:
def display_images(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8, 6))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.show()

In [None]:
def get_frames(video_path):
    # Re-open the video
    cap = cv2.VideoCapture(video_path)

    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Convert BGR to RGB for matplotlib
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame_rgb)

    cap.release()

    return frames

In [5]:
import matplotlib.animation as animation
from IPython.display import HTML

import matplotlib as mpl
mpl.rcParams['animation.embed_limit'] = 100

def display_video(video_path):
    
    frames = get_frames(video_path)

    fig, ax = plt.subplots()
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
    im = ax.imshow(np.zeros_like(frames[0]))
    ax.axis('off')

    def update(frame):
        im.set_array(frame)
        return [im]

    ani = animation.FuncAnimation(fig, update, frames=frames, interval=50, blit=True, repeat=False)

    plt.close(fig)

    return ani

## Task 1 : Analyze movement patterns in a video sequence. **(6)**

a) Compute dense optical flow for each frame in a video of a moving crowd. **(2)**

b) Visualize the movement patterns in 2 different ways. **(2+2)**

In [None]:
def optical_flow(video_path, hsv_output, arrow_output):
    """
    Dense Farnebäck optical flow with two visualisations:
    1) HSV color-wheel encoding
    2) Arrow Overlay
    """
    
    #cap will feed us the frames, if the file does not open or does not exist, then the code will raise and I/O Exception Error.
    cap = cv2.VideoCapture(video_path)                      # opens the input file
    if not cap.isOpened():                                  # it checks if the file is not open
        raise IOError(f"Cannot Open {video_path}")          # Exception Error
    
    # Copy the FPS/size from the source so the output matches perfectly.
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')                # .mp4 output
    fps = cap.get(cv2.CAP_PROP_FPS)                         # gives you the framerate of the video
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))              # gives you the frame width
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))             # gives you the frame heignt

    # For the two seperate files: One will hole the HSV frames, the other arrow overlays.
    hsv_writer = cv2.VideoWriter(hsv_output, fourcc, fps, (w,h))
    arrow_writer = cv2.VideoWriter(arrow_output, fourcc, fps, (w,h))

    # Optical flow always compares "previous vs current" gray frames. If the file doesnot exist, the code will raise a Value Exception Error.
    ok, prev = cap.read()                                   # grabs frame 0
    if not ok:                                              # Handling exeption
        raise ValueError("Is the video empty?")
    
    prev_g = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)         # converts to grayscale

    # This loop is used for looping the entire video. Standard OpenCV reads the loop: stops when the read fails
    while True:                                             # While it is true
        ok, frame = cap.read()                              # Read the next frame
        if not ok:                                          # If cap.read is not ok
            break                                           # Break the loop
        
        # Compute the dense Farneback flow between the two gray images.
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)      # grayscale version
        flow = _dense_flow(prev_g, gray)                    #(u,v) field

        # Turn the flow into two human-friendly pictures
        hsv_img = _flow_to_hsv(flow)                        # color-wheel visual
        arrow_img = _draw_arrows(frame, flow)               #arrow oerlay visual

        hsv_writer.write(hsv_img)                           # append frame to hsv video
        arrow_writer.write(arrow_img)                       # append frame to arrow video

        # Advance the "previous" frame pointer for the next loop
        prev_g = gray                                       # next iteration, current -> previous

    # Always release captures/ writers so the OS finalises the .mp4 files
    for wtr in (cap, hsv_writer, arrow_writer):             # close files, flush buffers
        wtr.release()

# Wrapper function around OpenCV's built in dense flow.
def _dense_flow(prev_g, curr_g):
    """
    Farnebäck dense optical-flow field (u,v) for two gray frames.
    """
    return cv2.calcOpticalFlowFarneback(
        prev_g, curr_g, None,
        pyr_scale = 0.3, levels = 3,
        winsize = 15, iterations = 3,
        poly_n = 5, poly_sigma = 1.2,
        flags = 0
    )

# Encodes each pixel's motion vector as hue (direction) and brightness (speed).
def _flow_to_hsv(flow):
    """
    Map flow -> HSV color wheel image (BGR for VideoWriter).
    """
    mag, ang = cv2.cartToPolar(flow[..., 0],flow[..., 1])   # speed $ angle
    hsv = np.zeros((*mag.shape, 3), dtype = np.uint8)       # empty hsv image
    hsv[..., 0] = ang * 90 / np.pi                          #hue = direction (0-180 degree range)
    hsv[..., 1] = 255                                       # fill saturation (vivid colors)
    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
    print("cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR):",cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR))
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)             # back to BGR for saving

# Produces an easy-to-read quiver pplot by subsampling the dense field.
def _draw_arrows(frame, flow, step = 16, color = (0,255,0)):
    """
    Overlay a sparse arrow grid on a copy of the frame.
    """
    h,w = frame.shape[:2]                                   # frames dimensions
    arrows = frame.copy()                                   # don't scribble on the original
    for y in range(0, h, step):                             # row by row, every step pixels
        for x in range(0, w, step):                         # column by column
            dx, dy = flow[y, x].astype(int)                 # flow vector at grid point
            cv2.arrowedLine(                                # draw a small green arrow
                arrows, (x, y), (x + dx, y + dy),
                color, 1, tipLength = 0.3
            )
    return arrows

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
output_path1 = os.path.join(outputs, 'optical_flow_1.mp4')  # Output visualization video path
output_path2 = os.path.join(outputs, 'optical_flow_2.mp4')  # Output visualization video path

optical_flow(video_path, output_path1, output_path2)

In [None]:
ani = display_video(output_path1)
HTML(ani.to_jshtml())

In [None]:
ani = display_video(output_path2)
HTML(ani.to_jshtml())

## Task 2 : Identify and track a moving object in a video sequence. **(9)**

a) Detect an object using template matching. The output would be the first frame where it appears, with a bounding box around the detected object. **(2)**

In [None]:
def locate_object(video_path, template_path, output_path):
    cap=cv2.VideoCapture(video_path)
    template=cv2.imread(template_path, 0)
    w,h=template.shape[::-1]   #r&C 
    method=cv2.TM_CCOEFF_NORMED #CorrCoeffNormed
    threshold=0.9 
    frame_count=0
    detected_frame=None #flag

    while cap.isOpened(): #ff
        ret,frame=cap.read()
        if not ret:
            break
        frame_gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) #Gs
        tmpm=cv2.matchTemplate(frame_gray,template,method) #tempMatch
        _,max_val,_,max_loc=cv2.minMaxLoc(tmpm) #for best match
    
        if max_val>=threshold:
            top_left=max_loc
            bottom_right=(top_left[0]+w, top_left[1]+h)
            cv2.rectangle(frame,top_left,bottom_right,(0,255,0),3) #draw
            print(f"Object detected in the frame {frame_count}")
            cv2.imwrite(output_path,frame)
            detected_frame=frame.copy()  
            break

        frame_count +=1

    cap.release()
    cv2.destroyAllWindows()
        
    return detected_frame

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
template_path = os.path.join(inputs, 'template.png')  # Replace with your template image path
output_path = os.path.join(outputs, 'detected_object.jpg')  # Output video path

image = locate_object(video_path, template_path, output_path)
display_images(image)

b) Implement a Kalman filter to predict the object's position in subsequent frames. **(5)**

In [None]:
def track(video_path, template_path, output_path):
    template=cv2.imread(template_path,0)
    w,h= template.shape[::-1]          #inverse R&C 
    cap=cv2.VideoCapture(video_path)
    method=cv2.TM_CCOEFF_NORMED  #tempm CorrCoeNormed
    ret,frame=cap.read()

    frame_height,frame_width=frame.shape[:2] #for the op 2channels
    print('W :',frame_height,'H :',frame_width)
    fps=cap.get(cv2.CAP_PROP_FPS)
    fourcc=cv2.VideoWriter_fourcc(*'mp4v')
    out=cv2.VideoWriter(output_path,fourcc,fps,(frame_width,frame_height))
 
    kalman= cv2.KalmanFilter(4,2)  #4s 2m  using the inbuild kallman
    kalman.transitionMatrix=np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]],np.float32) #changes to nxt states

    kalman.measurementMatrix= np.eye(2, 4, dtype=np.float32) #map the m to actual state
    kalman.measurementMatrix= np.array([[1,0,0,0],[0,1,0,0]],np.float32) #mm

    kalman.processNoiseCov=np.eye(4,dtype=np.float32)*0.8 #model trust

    initialized= False #flag
    frame_idx= 0
    while cap.isOpened():           #ff
        ret,frame=cap.read()
        if not ret:
            break
    
        frame_gray= cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #gs
        tmpm=cv2.matchTemplate(frame_gray, template,method) #tempMatch using CrrCoeNor
        print('Score of the frames based on Correlation Coefficient method:',tmpm) 
        _,_,max_val,max_loc= cv2.minMaxLoc(tmpm)
        print('Max score in array is : ',max_val)

        x,y =max_loc[0]+w//2,max_loc[1]+h//2
        if not initialized:
            kalman.statePre=np.array([[x],[y],[0],[0]], np.float32) #intial state guess
            initialized= True
        pred= kalman.predict()        # motion model included
        mx,my= max_loc[0] + w//2, max_loc[1] + h//2 #recaluculate the m position
        kalman.correct(np.array([[np.float32(mx)], [np.float32(my)]])) #correction

        #draw 
        cv2.rectangle(frame,max_loc,(max_loc[0]+w,max_loc[1]+h),(0,255,0),2) #temp
        cv2.circle(frame,(int(pred[0]),int(pred[1])),6,(0,0,255),-1) #predicted

        out.write(frame)
        frame_idx += 1
    cap.release()
    cv2.destroyAllWindows()
    return output_path

In [None]:
video_path = os.path.join(inputs, 'part2.mp4')  # Replace with your input video path
template_path = os.path.join(inputs, 'template.png')  # Replace with your template image path
output_path = os.path.join(outputs, 'tracked_object.mp4')  # Output video path

track(video_path, template_path, output_path)

In [None]:
ani = display_video(output_path)
HTML(ani.to_jshtml())

c) Compare Bayesian filtering and Kalman filtering (theoretically). **(2)**

In [None]:
#TODO c): 
# Bayesian filters are broad class of algorithms that are used to compute probablity distribution of states given noisy measurements,
# while Kalman filter are type of Bayesian filter that computes a point estimate(mean) and its covarience, not the whole distribution.
# Since Bayesian filter estimates the full probablity distribution of states. That makes it computationally more expensive than Kalman fitler. 
# Kalman filter works well and efficeint only when linearity and gaussian noise is present, while Bayesian works for any type of noise and linearity/non-linearity.
# The above condition makes Bayesian more flexible to implemnt in various application while Kalman filter is more suitable specific application.