# Exercise 1
* Use the biker.png template from Exercise materials to do mean shift tracking in the traffic video in Exercise materials. (Hint: use OpenCV's calcBackProject() function to produce a similarity image for mean shift - see this mean shift tutorial for more pointers)
* Note that for mean shift tracking you need to provide an initial tracking window manually, and the biker only shows up from frame 114, so wait until then to start tracking.
* What happens when the biker disappears over the horizon? Why?

In [1]:
# Importing libraries & packages
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import time
import glob
import sys

# Define data path + output folder
data = "../Data/"
output = "output/"

In [8]:
# Initialize video capture object
cap = cv.VideoCapture(data+"slow_traffic_small.mp4")

frames = []
while(1):
    ret, frame = cap.read()
    frames.append(frame)
    
    cv.imshow("Frames",frame)
    k = cv.waitKey(30) & 0xff
    if k == 27:
        break
    elif len(frames) >= 114:
        break

# Save frame 114
cv.imwrite(output+"frame114.png",frames[113])    
cv.destroyAllWindows()   
print("Successfully exited!")

Successfully exited!


In [None]:
# Initialize video capture object
cap = cv.VideoCapture(data+"slow_traffic_small.mp4")

# Get height and width from data
img = cv.imread(data+"biker.png")
h,w,_ = img.shape

# setup initial location of window
x, y, w, h = 591, 180, w, h
track_window = (x, y, 20, 25)

# set up the ROI for tracking
frame = cv.imread(output+"frame114.png")
roi = frame[y:y+h, x:x+w]
hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV)
mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)

# Setup the termination criteria, either 10 iteration or move by at least 1 pt
term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 )

count = 0
while(1):
    ret, frame = cap.read()
    
    if ret == True:
        if count >= 114:
            hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
            dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1)
            
            # apply meanshift to get the new location
            ret, track_window = cv.meanShift(dst, track_window, term_crit)
            
            # Draw it on image
            x,y,w,h = track_window
            img2 = cv.rectangle(frame, (x,y), (x+w,y+h), 255,2)
            cv.imshow('img2',img2)
            k = cv.waitKey(30) & 0xff
            if k == 27:
                break
        else:
            count += 1
            cv.imshow('img2',frame)
            k = cv.waitKey(30) & 0xff
            if k == 27:
                break
    else:
        break
    
cv.destroyAllWindows()   
print("Successfully exited!")

# Exercise 2

* Use the biker.png template from Exercise materials to do Kalman filter tracking in the traffic video in Exercise materials. 
Hints: See this Python implementation for pointers. You can define the state as the position and velocity of the biker, and use the output of mean shift or cam shift (or a detection method of your choice) for the measurement update (note: we only measure position!). You will need to define a measurement matrix, a state transition matrix (motion model), as well as covariance matrices for the measurement and process (model) noise. You can start with unit matrices, and experiment with the parameters.
* What happens if you skip the measurement step for certain frames?
Extra: Visualize the position uncertainty (errorCovPost attribute in OpenCV) as an ellipse. Plot the measured vs. Kalman filtered position over time and compare.


In [2]:
# Initialize kalman filter with 4 state variables and 2 measurement variables
kalman = cv.KalmanFilter(4,2)
kalman.measurementMatrix = np.array([[1,0,0,0],
                                     [0,1,0,0]],np.float32) # Setup a 2x4 measurement matrix, maps x and y coordinates to our 4-dimensional state vector

kalman.transitionMatrix = np.array([[1,0,1,0],
                                    [0,1,0,1],
                                    [0,0,1,0],
                                    [0,0,0,1]], np.float32) # Setup a 4x4 transition matrix, defines how state vectors evolve from time step 't' to 't+1'
                                                     # based on a simple linear motion model, where objects move linearly at constant velocity.
                                                     # First two rows map position estimates onto future position estimates. 
                                                     # Last two rows maintain unchanged predictions about velocities.
                                                     
kalman.processNoiseCov = np.array([[1, 0, 0, 0],
                                   [0, 1, 0, 0],
                                   [0, 0, 1, 0],
                                   [0, 0, 0, 1]], np.float32) * 0.03 # Setup a 4x4 noise process covariance matrix, represent the uncertainty in the motion model.
                                                              # Affects how the kalman filter predicts the next state.
                                                              # We setup diagonal matrix scaled by 0.03, meaning we add small errors to each of our 4 variables.


In [3]:
def predict(coord_x,coord_y):
    measured = np.array([[np.float32(coord_x)],[np.float32(coord_y)]]) # Create a measurement vector based on object position (x,y)
    kalman.correct(measured) # We update the kalman filter's state estimate with the measurement data
    predicted = kalman.predict() # We predict coordinates (px,py)
    px,py = int(predicted[0]),int(predicted[1]) # We extract the prediction coordinates
    return px,py

In [22]:
# Initialize video capture object
cap = cv.VideoCapture(data+"slow_traffic_small.mp4")

# Get height and width from data
img = cv.imread(data+"biker.png")
h,w,_ = img.shape

# Setup initial location of window
x, y, w, h = 591, 180, w, h
track_window = (x, y, 20, 25)

# set up the ROI for tracking
frame = cv.imread(output+"frame114.png")
roi = frame[y:y+h, x:x+w]
hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV)
#mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) # Original mask
mask = cv.inRange(hsv_roi, np.array((10., 36., 68.)), np.array((180.,255.,255.))) # Tried to change some params
roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)

# Setup the termination criteria, either 10 iteration or move by at least 1 pt
term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 )

count = 0
while(1):
    ret, frame = cap.read()
    
    if ret == True:
        if count >= 114:
            hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
            dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1)
            
            # Apply meanshift to get the new location
            ret, track_window = cv.meanShift(dst,track_window, term_crit)
            
            # Draw observation on image - in green
            x,y,w,h = track_window
            img2 = cv.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            print(f"x: {x}, y: {y}")
            
            # Predict coordinates
            prediction = predict(x,y)
            
            # draw predicton on image - in red
            img2 = cv.rectangle(frame,(prediction[0],prediction[1]), (prediction[0] + w, prediction[1] + h), (0,0,255), 1)
            print(f"prediction: {prediction}")
            
            cv.imshow('img2',img2)
            k = cv.waitKey(30) & 0xff
            if k == 27:
                break
        else:
            count += 1
            cv.imshow('img2',frame)
            k = cv.waitKey(30) & 0xff
            if k == 27:
                break
    else:
        break
    
cv.destroyAllWindows()   
print("Successfully exited!")

x: 591, y: 184
prediction: (517, 157)
x: 586, y: 181
prediction: (579, 178)
x: 582, y: 179
prediction: (611, 188)
x: 579, y: 178
prediction: (623, 192)
x: 577, y: 179
prediction: (622, 193)


  px,py = int(predicted[0]),int(predicted[1]) # We extract the prediction coordinates


x: 577, y: 178
prediction: (616, 191)
x: 575, y: 178
prediction: (607, 188)
x: 574, y: 178
prediction: (597, 185)
x: 574, y: 179
prediction: (589, 183)
x: 584, y: 182
prediction: (588, 183)
x: 584, y: 182
prediction: (587, 183)
x: 585, y: 181
prediction: (587, 182)
x: 585, y: 181
prediction: (586, 182)
x: 585, y: 181
prediction: (586, 181)
x: 582, y: 182
prediction: (584, 181)
x: 578, y: 181
prediction: (580, 181)
x: 576, y: 180
prediction: (576, 180)
x: 575, y: 181
prediction: (574, 180)
x: 575, y: 181
prediction: (572, 180)
x: 575, y: 180
prediction: (572, 180)
x: 575, y: 179
prediction: (572, 179)
x: 569, y: 176
prediction: (569, 177)
x: 568, y: 177
prediction: (567, 176)
x: 568, y: 178
prediction: (565, 176)
x: 568, y: 178
prediction: (565, 176)
x: 563, y: 177
prediction: (562, 176)
x: 563, y: 177
prediction: (561, 176)
x: 558, y: 176
prediction: (557, 175)
x: 556, y: 175
prediction: (554, 175)
x: 555, y: 174
prediction: (552, 173)
x: 555, y: 176
prediction: (551, 174)
x: 555, y: 1