# Task 1

In [13]:
'''
Notes:
1. All of your implementation for task 1 should be in this file. 
2. Please Read the instructions and do not modify the input and output formats of function detect_faces().
3. If you want to show an image for debugging, please use show_image() function in helper.py.
4. Please do NOT save any intermediate files in your final submission.
'''

import cv2
import numpy as np
import argparse
import json
import os
import sys
import math


from typing import Dict, List
from utils import show_image


'''
Please do NOT add any imports. The allowed libraries are already imported for you.
'''

def detect_faces(img: np.ndarray) -> List[List[float]]:
    """
    Args:
        img : input image is an np.ndarray represent an input image of shape H x W x 3.
            H is the height of the image, W is the width of the image. 3 is the [R, G, B] channel (NOT [B, G, R]!).

    Returns:
        detection_results: a python nested list. 
            Each element is the detected bounding boxes of the faces (may be more than one faces in one image).
            The format of detected bounding boxes a python list of float with length of 4. It should be formed as 
            [topleft-x, topleft-y, box-width, box-height] in pixels.
    """
    detection_results: List[List[float]] = [] # Please make sure your output follows this data format.

    # Add your code here. Do not modify the return and input arguments.

    # Load the Haar Cascade Classifier for face detection
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    # Convert image to grayscale as Haar Cascade works on grayscale images
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # Detect faces in the image
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    detection_results = [[float(x), float(y), float(w), float(h)] for (x, y, w, h) in faces]
    
    return detection_results


def parse_args():
    parser = argparse.ArgumentParser(description="cse 573 homework 4.")
    parser.add_argument(
        "--input_path", type=str, default="data/validation_folder/images",
        help="path to validation or test folder")
    parser.add_argument(
        "--output", type=str, default="./result_task1.json",
        help="path to the characters folder")

    args = parser.parse_args()
    return args

def save_results(result_dict, filename):
    results = []
    results = result_dict
    with open(filename, "w") as file:
        json.dump(results, file, indent=4)

def check_output_format(faces, img, img_name):
    if not isinstance(faces, list):
        print('Wrong output type for image %s! Should be a %s, but you get %s.' % (img_name, list, type(faces)))
        return False
    for i, face in enumerate(faces):
        if not isinstance(face, list):
            print('Wrong bounding box type in image %s the %dth face! Should be a %s, but you get %s.' % (img_name, i, list, type(face)))
            return False
        if not len(face) == 4:
            print('Wrong bounding box format in image %s the %dth face! The length should be %s , but you get %s.' % (img_name, i, 4, len(face)))
            return False
        for j, num in enumerate(face):
            if not isinstance(num, float):
                print('Wrong bounding box type in image %s the %dth face! Should be a list of %s, but you get a list of %s.' % (img_name, i, float, type(num)))
                return False
        if face[0] >= img.shape[1] or face[1] >= img.shape[0] or face[0] + face[2] >= img.shape[1] or face[1] + face[3] >= img.shape[0]:
            print('Warning: Wrong bounding box in image %s the %dth face exceeds the image size!' % (img_name, i))
            print('One possible reason of this is incorrect bounding box format. The format should be [topleft-x, topleft-y, box-width, box-height] in pixels.')
    return True


def batch_detection(img_dir):
    res = {}
    for img_name in sorted(os.listdir(img_dir)):
        img_path = os.path.join(img_dir, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        faces = detect_faces(img)
        if not check_output_format(faces, img, img_name):
            print('Wrong output format!')
            sys.exit(2)
        res[img_name] = faces
    return res

# def main():

#     args = parse_args()
#     path, filename = os.path.split(args.output)
#     os.makedirs(path, exist_ok=True)
#     result_list = batch_detection(args.input_path)
#     save_results(result_list, args.output)

# if __name__ == "__main__":
#     main()

    

In [15]:
result_list = batch_detection('data/validation_folder/images/')
save_results(result_list, 'result_task1_val.json')

In [None]:
img = cv2.imread('data/validation_folder/images/img_1.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
faces = detect_faces(img)

In [16]:
! python3 ComputeFBeta/ComputeFBeta.py --preds result_task1_val.json --groundtruth data/validation_folder/ground-truth.json

F1 score: 0.9281


In [20]:
np.min([1, 1.25*0.9281])*40

40.0

In [21]:
! python3 task1.py --input_path data/validation_folder/images --output ./result_task1_val.json

In [22]:
! python3 ComputeFBeta/ComputeFBeta.py --preds result_task1_val.json --groundtruth data/validation_folder/ground-truth.json

F1 score: 0.9281


In [23]:
np.min([1, 1.25*0.9281])*40

40.0

# Task 2

In [87]:
def stitch_images(img1, img2, descriptor1, descriptor2, min_match_threshold=10):
    matches = match_features(descriptor1, descriptor2, min_match_threshold=10)

    if len(matches) > min_match_threshold:
        # Get the matching keypoints for each of the images
        img1_pts = np.float32([descriptor1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
        img2_pts = np.float32([descriptor2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)

        # Compute the homography matrix
        M, _ = cv2.findHomography(img1_pts, img2_pts, cv2.RANSAC, 5.0)

        # Get the dimensions of the two images
        h1, w1 = img1.shape[:2]
        h2, w2 = img2.shape[:2]

        # Warp the images based on the homography matrix
        warp_img1 = cv2.warpPerspective(img1, M, (w1 + w2, max(h1, h2)))
        warp_img1[0:h2, 0:w2] = img2

        return warp_img1
    else:
        print("Not enough matches found - {}/{}".format(len(matches), min_match_threshold))
        return None

In [257]:
# 1. Only add your code inside the function (including newly improted packages). 
#  You can design a new function and call the new function in the given functions. 
# 3. Not following the project guidelines will result in a 10% reduction in grades
# 4 . If you want to show an image for debugging, please use show_image() function in helper.py.
# 5. Please do NOT save any intermediate files in your final submission.

import cv2
import numpy as np
import matplotlib.pyplot as plt
import argparse
import json
import array as arr


def parse_args():
    parser = argparse.ArgumentParser(description="cse 573 homework 4.")
    parser.add_argument(
        "--input_path", type=str, default="data/images_panaroma",
        help="path to images for panaroma construction")
    parser.add_argument(
        "--output_overlap", type=str, default="./task2_overlap.txt",
        help="path to the overlap result")
    parser.add_argument(
        "--output_panaroma", type=str, default="./task2_result.png",
        help="path to final panaroma image ")

    args = parser.parse_args()
    return args

def extract_features(image):
    # Initialize SIFT detector
    sift = cv2.SIFT_create()
    if image.dtype != np.uint8:
        image = cv2.convertScaleAbs(image)
    # Detect keypoints and compute descriptors
    key_pointers , descriptors = sift.detectAndCompute(image, None)
    return key_pointers, descriptors

def match_features(descriptors1, descriptors2, min_distance_threshold):
    
    d1 = descriptors1[:, np.newaxis, :]
    d2 = descriptors2[np.newaxis, :, :]

    
    distances = np.linalg.norm(d1 - d2, axis=2)

    min_distances = np.min(distances, axis=1)
    closest_indices = np.argmin(distances, axis=1)

    valid_matches = min_distances <= min_distance_threshold

    matches = [[i,closest_indices[i]] for i in range(len(descriptors1)) if valid_matches[i]]

    return matches


def find_image_pairs(overlap_matrix):
    pairs = []
    n = len(overlap_matrix)

    for i in range(n):
        for j in range(i + 1, n):
            if overlap_matrix[i][j] == 1:
                # Add the pair (i, j) if the images overlap
                pairs.append((i, j))

    return pairs



def get_translation_matrix(img, H):
    
    # Define corners in homogeneous coordinates
    height, width = img.shape[:2]
    corners = np.array([
        [0, 0, 1],                               # Top-left
        [0, height - 1, 1],                       # Bottom-left
        [width - 1, 0, 1],                        # Top-right
        [width - 1, height - 1, 1]                # Bottom-right
    ])

    # Transform corners using the homography matrix
    transformed_corners = np.matmul(H,corners.T).T

    # Normalize to convert from homogeneous coordinates
    transformed_corners = transformed_corners / transformed_corners[:, 2][:, np.newaxis]

    # Calculate translation values
    translation_x = int(max(-transformed_corners[:, 0].min(), 0))
    translation_y = int(max(-transformed_corners[:, 1].min(), 0))

    # Calculate dimensions of the transformed image
    new_H = int(max(transformed_corners[:, 1].max(), 0)) + translation_y
    new_W = int(max(transformed_corners[:, 0].max(), 0)) + translation_x

    # Create translation matrix
    M = np.array([[1, 0, translation_x], [0, 1, translation_y], [0, 0, 1]], dtype=float)

    return M, new_H, new_W


def stitch_images(img1, img2, key_pointers1, key_pointers2, matches):
    
    img1_pts = []
    img2_pts = []
    
    for m in matches:
        img1_pts.append(key_pointers1[m[0]].pt)
        img2_pts.append(key_pointers2[m[1]].pt)
        
    img1_pts = np.array(img1_pts)
    img2_pts = np.array(img2_pts)
    
    # Compute the homography matrix
    H, _ = cv2.findHomography(img2_pts, img1_pts, cv2.RANSAC, 5.0)
    
    
    translation_matrix, new_H, new_W = get_translation_matrix(img1, H)
    
    H_img2 = np.matmul(translation_matrix, H)
    H_img1 = np.matmul(translation_matrix,np.identity(3))
    
    warpim2 = cv2.warpPerspective(img2, H_img2, (new_W, new_H))
    warpim1 = cv2.warpPerspective(img1, H_img1, (new_W, new_H))
  
    blended_img = np.where(warpim1>0, warpim1, warpim2)
    return blended_img


def stitch(inp_path, imgmark, N=4, savepath=''): 
    "The output image should be saved in the savepath."
    "The intermediate overlap relation should be returned as NxN a one-hot(only contains 0 or 1) array."
    "Do NOT modify the code provided."
    imgpath = [f'{inp_path}/{imgmark}_{n}.png' for n in range(1,N+1)]
    imgs = []
    for ipath in imgpath:
        img = cv2.imread(ipath)
        imgs.append(img)
    "Start you code here"
    overlap_arr = np.eye(len(imgs), dtype=int)
    descriptors = {}
    key_pointers = {}
    matched_points_set = {}
    stitched_image = None
    processed_set = set()
    for i, img1 in enumerate(imgs):
        for j, img2 in enumerate(imgs):
            if i != j:
                # Extract features and match
                if i not in descriptors.keys():
                    keys, descs = extract_features(img1)
                    key_pointers[i] = keys
                    descriptors[i] = descs
                if j not in descriptors.keys():
                    keys,descs= extract_features(img2)
                    key_pointers[j] = keys
                    descriptors[j] = descs
                    
                matches = match_features(descriptors[i], descriptors[j], 50)
                
                matched_points_set[(i,j)] = matches
                
                min_match_threshold = 10 # Value to tune
                if len(matches) > min_match_threshold:
                    overlap_arr[i, j] = 1
                    if stitched_image is None:
                        stitched_image = stitch_images(imgs[i], imgs[j], key_pointers[i], key_pointers[j], matched_points_set[(i,j)])
                        processed_set.add(i)
                        processed_set.add(j)
                    elif stitched_image is not None and j not in processed_set:
                        key_pointers_stitiched_image, descriptors_stitiched_image = extract_features(stitched_image)
                        matched_points = match_features(descriptors_stitiched_image, descriptors[j], 50)
                        stitched_image = stitch_images(stitched_image, imgs[j], key_pointers_stitiched_image, key_pointers[j], matched_points)
                        processed_set.add(j)
                else:
                    overlap_arr[i, j] = 0
                
    with open('overlap_arr.json', 'w') as f:
        json.dump(overlap_arr.tolist(), f)
    
    if stitched_image is not None:
        cv2.imwrite(savepath, stitched_image)
                
    return overlap_arr
    
# if __name__ == "__main__":
#     #task2
#     args = parse_args()
#     overlap_arr = stitch(args.input_path, 't2', N=4, savepath=f'{args.output_panaroma}')
#     with open(f'{args.output_overlap}', 'w') as outfile:
#         json.dump(overlap_arr.tolist(), outfile)
    


In [258]:
stitch('data/images_panaroma/', 't2', N=4, savepath='.')

array([[1, 1, 0, 1],
       [1, 1, 1, 1],
       [0, 1, 1, 0],
       [1, 1, 0, 1]])

In [221]:
img1 = cv2.imread('data/images_panaroma/t2_1.png')
img4 = cv2.imread('data/images_panaroma/t2_4.png')

descriptors1 = extract_features(img1)
descriptors2 = extract_features(img4)

In [222]:
def get_homogeneous_coordinate_corners(img):
    height, width = img.shape[:2]
    corners = [
        np.array([0, 0, 1]),                 # Top-left corner
        np.array([0, height - 1, 1]),        # Bottom-left corner
        np.array([width - 1, 0, 1]),         # Top-right corner
        np.array([width - 1, height - 1, 1]) # Bottom-right corner
    ]
    return np.array(corners)

In [225]:
get_homogeneous_coordinate_corners(img1)

array([[  0,   0,   1],
       [  0, 370,   1],
       [542,   0,   1],
       [542, 370,   1]])

In [42]:
np.linalg.norm(descriptors1[0]-descriptors2[0])

471.8114

In [20]:
matches[0]

< cv2.DMatch 0x131f398f0>

In [115]:
for m in matches:
    print(descriptors1[m.queryIdx].reshape(-1, 1, 2))
    print(descriptors2[m.trainIdx])
    break

[[[  6.   0.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0.   4.]]

 [[ 29.   0.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0.  34.]]

 [[ 81.   0.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0. 134.]]

 [[ 78.   0.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0. 109.]]

 [[  6.   0.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0.  13.]]

 [[112.   2.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0.  35.]]

 [[134.  23.]]

 [[  2.   1.]]

 [[  0.   0.]]

 [[  0.  95.]]

 [[ 48.  12.]]

 [[ 37.  35.]]

 [[  0.   0.]]

 [[  0.  33.]]

 [[  6.   0.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0.  18.]]

 [[ 93.  11.]]

 [[ 11.   2.]]

 [[  0.   0.]]

 [[  0.  23.]]

 [[134.  81.]]

 [[132.  51.]]

 [[  0.   0.]]

 [[  0.   4.]]

 [[ 11.  16.]]

 [[134. 134.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  2.   1.]]

 [[ 18.  25.]]

 [[  0.   0.]]

 [[  1.  10.]]

 [[ 10.   8.]]

 [[ 97.  89.]]

 [[  0.   0.]]

 [[  0.   4.]]

 [[  4.  12.]]

 [[134. 134.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[  0.   0.]]

 [[101. 121.]]

 [[  1. 

In [26]:
np.sqrt(np.sum(np.square(descriptors1[0] - descriptors2[0])))

471.8114

In [None]:
def imageStitching_noClip(im1, im2, H2to1):
    '''
    Returns a panorama of im1 and im2 using the given 
    homography matrix without cliping.
    
    INPUTS
        im1 and im2 - images to be stitched.
        H2to1- the homography matrix.
    OUTPUT
        img_pano - the panorama image.
    ''' 
    # YOUR CODE HERE
    def createmask(image):
        mask = np.zeros((image.shape[0], image.shape[1]))
        mask[1:image.shape[0]-1,1:image.shape[1]-1] = 1
        mask = distance_transform_edt(mask)
        mask = mask / np.max(mask)
        return mask
        
    # YOUR CODE HERE
    left_top = np.array([0, 0, 1])
    left_bottom = np.array([0, im2.shape[0] - 1, 1])
    right_top = np.array([im2.shape[1] - 1, 0, 1])
    right_bottom = np.array([im2.shape[1] - 1, im2.shape[0] - 1, 1])
    
    left_top_est = H2to1 @ left_top
    left_bottom_est = H2to1 @ left_bottom
    right_top_est = H2to1 @ right_top
    right_bottom_est = H2to1 @ right_bottom
    
    left_top_est = left_top_est/left_top_est[2]
    left_bottom_est = left_bottom_est/left_bottom_est[2]
    right_top_est = right_top_est/right_top_est[2]
    right_bottom_est = right_bottom_est/right_bottom_est[2]
    
    transx = int(max(-left_top_est[0], -left_bottom_est[0], 0))
    transy = int(max(-left_top_est[1], -right_top_est[1], 0))
    
    imH = max(left_bottom_est[1].astype(int) , right_bottom_est[1].astype(int)) + transy
    imW = max(right_top_est[0].astype(int) , right_bottom_est[0].astype(int)) + transx
    
    M = np.array([[1, 0, transx], 
                  [0 , 1, transy], 
                  [0, 0, 1]]).astype(float)
    print(M)
    
    warpim2 = cv2.warpPerspective(im2, M@H2to1, (imW, imH))
    warpim1 = cv2.warpPerspective(im1, M@np.identity(3), (imW, imH))
    warpim2 = warpim2/255
    warpim1 = warpim1/255
    
    maskim1 = createmask(im1)
    maskim2 = createmask(im2)
    
    warpmask1 = cv2.warpPerspective(maskim1, M@np.identity(3), (imW, imH))
    warpmask2 = cv2.warpPerspective(maskim2, M@H2to1, (imW, imH))
    summask = warpmask1 + warpmask2
    
    warpmask1 = np.divide(warpmask1, summask, out=np.zeros_like(warpmask1), where=summask!=0)
    warpmask2 = np.divide(warpmask2, summask, out=np.zeros_like(warpmask2), where=summask!=0)
    
    warpmask1 = np.expand_dims(warpmask1, axis = 2)
    warpmask1 = np.tile(warpmask1, (1,1,3))
    
    warpmask2 = np.expand_dims(warpmask2, axis = 2)
    warpmask2 = np.tile(warpmask2, (1,1,3))
    # img = cv2.addWeighted(mountain, 0.3, dog, 0.7, 0)
    img_pano = warpim1*warpmask1 + warpim2*warpmask2

#     plt.figure(figsize = (18., 18.))
#     plt.imshow(img_pano)
    
    return img_pano

In [186]:
a = {}
b = {}

def fun():
    return a, b

a[1], b[2] = fun()

In [187]:
a

{1: {...}}

In [188]:
b

{2: {...}}

In [259]:
! python3 task2.py --input_path data/images_panaroma --output_overlap ./task2_overlap.txt --output_panaroma ./task2_result.png