### Q1 - Pixel Transform:


Select an image of your choice to demonstrate performing the following pixel transforms. For each transform show the original image next to the transformed image.

Importing modules and selecting an Image:

In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

img = cv2.imread('../sample_data/uv.JPG')
# Convert from BGR to RGB:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
def para_plots(img1, img2):
    fig, ax = plt.subplots(nrows=1, ncols=2,figsize=(10,10))
    fig.tight_layout()
    ax[0].set_title('Original Image')
    ax[0].imshow(img1)
    ax[1].set_title('Modified Image')
    ax[1].imshow(img2)
    

  1. **Translation:**

In [None]:
# image shape:
img_shape = img.shape

height = img_shape[0]
width = img_shape[1]

# Create an indentity matrix
identity_matrix = np.eye(3)
# Modify the translation part:
identity_matrix[0,2] = 200 # x
identity_matrix[1,2] = 250 # y

imageWarped = cv2.warpPerspective(img,identity_matrix,(width, height))
para_plots(img1=img, img2=imageWarped)




2. **Rotation:**

In [None]:
theta = 10.0*(np.pi/180.0)

x_axis_rotation_point = 200 # move the image right or left
y_axis_rotation_point = 0   # move the image up or down
translation = [0.0,0.0,1.0]

M = np.array([[np.cos(theta),-np.sin(theta), x_axis_rotation_point],
              [np.sin(theta),np.cos(theta), y_axis_rotation_point],
              translation
              ])

imageWarped = cv2.warpPerspective(img,M,(width,height))
para_plots(img1=img,img2=imageWarped)

3. **Scaling:**

In [None]:
a = 0.6 # resize the image
b = 0.0 # rotate the image
tx = 0  # move the image along the x-axis
ty = 0  # move the image along the y-axis

# scaled = cv2.resize(img,(int(width*10),int(height*10)),interpolation=cv2.INTER_NEAREST)
M = np.array([[a,-b,tx],[b,a,ty],translation])
x = cv2.warpPerspective(img,M,(width,height))
para_plots(img1=img,img2=x)

4. **Rotation, scaling, $\&$ Translation:**

In [None]:
scale = 0.6 # resize the image (scaling)
rotate = 0.1 # rotate the image
tx = 250  # move the image along the x-axis
ty = 500  # move the image along the y-axis

M = np.array([[scale,-rotate,tx],[rotate,scale,ty],translation])
x = cv2.warpPerspective(img,M,(width,height))
para_plots(img1=img,img2=x)

5. **Affine:**

In [None]:
a00 =  0.9
a01 =  0.1
a10 = -0.4
a11 =  0.6
tx = 0
ty = 800

M = np.array([[a00,a01,tx],[a10,a11,ty],[0.0,0.0,1.0]])
print(M)

imageWarped = cv2.warpPerspective(img,M,(width,height))

para_plots(img1=img, img2=imageWarped)

6. **Projective**

In [None]:
a00 =  1.0
a01 =  0.0
a10 =  0.0
a11 =  1.0
tx  = 250
ty  = 400
a20 = 0.0001
a21 = 0.0001
a22 = 1.0

M = np.array([[a00,a01,tx],[a10,a11,ty],[a20,a21,a22]])
print(M)

imageWarped = cv2.warpPerspective(img,M,(width, height))
para_plots(img1=img, img2=imageWarped)

### Q2: Perspective Transform.

1. Determine the best projective transform that restores the image to a picture that is centered with the optical axis and does not contain any rotation, changes to aspect ratio, skew, or keystone distortion. You are welcome to use OpenCV and other tools to help with this.

In [None]:
# import the image:

grid_img = cv2.imread('../sample_data/perspective_transform.jpg')
grid_img = cv2.cvtColor(grid_img,cv2.COLOR_BGR2RGB)

# Draw circles on the corners
def draw_circle(img,coordinates, color):
    for i in range(len(coordinates)):
        cv2.circle(img, center=coordinates[i], radius=30,color=color, thickness=-1)
plt.imshow(grid_img)

I don't want to get a projection of the whole image. I only want to get the reflection of the box. Therefore, I specify four points (the corner of the Godiva box).

In [None]:
top_left_corner = (120,930)
top_right_corner = (2910,820)
bottom_left_corner = (110,3050)
bottom_right_corner = (2990,3030)
red = (255,0,0)
blue = (0,0,255)
img_cp = grid_img.copy()
src_coordinates = [top_left_corner,top_right_corner,bottom_left_corner,bottom_right_corner]
draw_circle(img=img_cp, coordinates=src_coordinates, color=red)
plt.imshow(img_cp)

In [None]:

# manually mapping between source and dest.

top_left_corner_dest = (int(top_left_corner[0]/4), int(top_left_corner[1]/4))
top_right_corner_dest = (int(3 * top_right_corner[0]/4), int(top_right_corner[1]/4))
bottom_left_corner_dest = (int(3 * bottom_left_corner[0]/4), int(3 * bottom_left_corner[1]/4))
bottom_right_corner_dest = (2182, 2287)

dest_coordinates = [top_left_corner_dest,top_right_corner_dest,bottom_left_corner_dest, bottom_right_corner_dest]
draw_circle(img=img_cp,coordinates=dest_coordinates,color=blue)
plt.imshow(img_cp)

In [None]:
src_points = np.float32(src_coordinates)
dest_points = np.float32(dest_coordinates)
output_width = max(top_right_corner_dest[0], bottom_right_corner_dest[0])
output_height = max(top_left_corner_dest[0], bottom_left_corner_dest[1])
perspective_matrix = cv2.getPerspectiveTransform(src_points, dest_points)
results = cv2.warpPerspective(grid_img,perspective_matrix,(output_width,output_height))

2. State the projective transform matrix (3 x 3).

In [None]:
print(perspective_matrix)

3. Describe for how you determined the projective transform and provide a justification for any design decisions you made (e.g, selection of any parameter values).

In my case, I choose an image with a box in it. The box is what I wanted to project from the world to the output. Therefore, my matrix are a set of four points I specified as the corners of the box (top-right, top-left, bottom-right, and bottom-left). Each corner coordinates is red-circled. To map these four corners in the output image, I scaled the input points. I wanted to apply L1 norm or L2 but the output image did not reflect correctly. I adjusted the scaled matrix values manually, and after several trials and errors, I think the output image projected the box within the input image appropriately.

4. Original and restored images:

In [None]:
para_plots(img1=grid_img, img2=results)

**Please ignore the following. A failed maethod to use L1 norm to get the projected points on the other side of the plane**

In [None]:

# width_top_corners = np.sqrt(((top_left_corner[0] - top_right_corner[0]) ** 2) +
#                             ((top_left_corner[1] - top_right_corner[1]) ** 2)
#                             )

# width_bottom_corners = np.sqrt(((bottom_left_corner[0] - bottom_right_corner[0]) ** 2) +
#                                ((bottom_left_corner[1] - bottom_right_corner[1]) ** 2)
#                                )

# max_width = max(int(width_top_corners), int(width_bottom_corners))

# height_left_corners = np.sqrt(((top_left_corner[0] - bottom_left_corner[0]) ** 2) +
#                              ((top_left_corner[1] - bottom_left_corner[1]) ** 2)
#                              )

# height_right_corners = np.sqrt(((top_right_corner[0] - bottom_right_corner[0]) ** 2) +
#                                 ((top_right_corner[1] - bottom_right_corner[1]) ** 2)
#                                 )
# max_height = max(int(height_left_corners), int(height_right_corners))

# src_points = np.float32(src_coordinates)
# dest_points = np.float32([[0,0],
#                           [0, max_height],
#                           [max_width, max_height],
#                           [max_width - 1,0]])
# perspective_matrix = cv2.getPerspectiveTransform(src_points, dest_points)
# results = cv2.warpPerspective(grid_img, perspective_matrix,(width, height), flags=cv2.INTER_LINEAR)
# plt.imshow(results)

### Q3: Object Tracking

1. **Explain why you chose this video and this object for tracking.**

    - The video is easy to process since it has the same background over the duration of the video.
    - The vaccum moves in one direction (vertically), so it is easy to track.
    - Upon completion of this homework, I realized that tracking an object vertically is actually harder than I thought. The reason is the object size changes
    as it moves away from the camera.

2. In the first frame of the video, draw an overlayed boundary (e.g., a boundary box) around the object you plan to track

In [1]:
def display_first_frame(frame, coords):
    x = coords[0]
    y = coords[1]
    w = coords[2]
    h = coords[3]
    cv2.rectangle(frame,pt1=(x,y),pt2=(x+w,y+h),color=(255,0,0), thickness=5)
    plt.imshow('first frame',frame)
    

3. Develop some method for tracking the object over the duration of the video. During each frame of the video, indicate where the object is with an overlayed boundary. You are welcome to use any method you like, whether it's something you developed or something someone else developed. Describe your object tracking algorithm, and why you chose it.

In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

capture = cv2.VideoCapture("../sample_data/object_tracking_2.mp4")
first_frame = True
ret, frame = capture.read()
fps = int(capture.get(cv2.CAP_PROP_FPS))
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
output = cv2.VideoWriter("Frame.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

# Manually choose the coordinates and dimesions of the vaccum in the first frame.
x,y,w,h = 570,1880,500,610
coords = [x,y,w,h]
# Specify the Region of Interest (ROI)
roi = frame[y:y+h, x:x+w]

# Starts dispalying the video frame by frame.
while capture.isOpened():
    ret, frame = capture.read()
    
    if not ret:
        break
    
    if first_frame:
        display_first_frame(frame=frame, coords=coords)
        first_frame = False
        
    img_frame = frame.copy()
    # Change the frame color so I can mask certain color scales off of the object I am tracking
    img_frame = cv2.cvtColor(img_frame, cv2.COLOR_BGR2XYZ)
    '''
    specify the range of colors we're masking. The vaccum is black with minimal variations of other colors.
    So, I mask the black and the other colors that are a bit off the black color scale.
    '''
    mask = cv2.inRange(img_frame,lowerb=(0,0,0),upperb=(70,60,80))
    # Grab the contours for the masked object
    contours, _ = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    '''
    The contours are list of lists. I iterate over each one and grab the coordinates and dimensions of each one.
    I also grab the area of each contour. The area is used to limit the rectangles drawn around the object, as well as the space surrounding the object.
    '''
    for contour in contours:
        x,y,w,h = cv2.boundingRect(contour)
        area = cv2.contourArea(contour)
        if area > 1e3:
            cv2.rectangle(frame,pt1=(x,y),pt2=(x+w,y+h),color=(255,0,0),thickness=3)

    output.write(frame)
    cv2.imshow('frame',frame)
    if cv2.waitKey(1) == ord('q'):
        break
capture.release()
cv2.destroyAllWindows()



4. Describe how well the tracking algorithm performed. Did it meet all of your expectations, or did it fail to track at some points?

5. Upload the modified video with the bounding box.

## Sources used for this HW:
    1- 