# MA - INF 2201 - Computer Vision WS 22/23

### Exercise Sheet 01


In [None]:
import cv2
import numpy as np
import os
import time

DATA_DIR = '../data'
SIM_THRESHOLD = 0.5 # similarity threshold for template matching. Can be adapted.


### 1. Fourier Transform 

In this task, we will show a useful property of the Fourier Transform, which is the convolution property. It tells us that convolution in the spatial domain corresponds to multiplication in the frequency domain. The input image that you will be operating on is orange.jpeg and celeb.jpeg.

- Load the two images orange.jpeg and celeb.jpeg. Remember to convert to grayscale.
- Create a 7 ×7 Gaussian kernel with sigma=1.
- Blur the images by convolving the image with the Gaussian kernel directly in the spatial domain. You may use the library function (cv2.filter2D).
- Now we are going to blur the images in the frequency domain using Fourier Transform. We multiply the kernel function and the frequency image instead of applying convolution. To get the final result, we transform back to the image space. You may use functions included in the package numpy.fft to apply the transform and its inverse.
- Visualise the results for both images and report the mean absolute difference between the two blurring methods and the time taken by each of them.
    
(3 Points)

### 2. Template Matching 
In this task, we will implement template matching using Sum Square Difference and normalized cross-correlation similarity measures. The input image is RidingBike.jpeg and the template image (what we want to find in the larger input image) is RidingBikeTemplate.jpeg

- Implement Sum Square Difference.
- Implement template matching using your implementation of Sum Square Difference.
- Implement Normalized Cross-correlation.
- Implement template matching using Normalized Cross-correlation.
- Draw rectangles on the image where similarity ≥0.5 for both methods. You may experiment with other threshold values to determine the matching.

### 3. Template matching multi-scale 

In this task, we will build the Gaussian pyramid to make template matching work at different resolutions. Read the image DogGray.jpeg and the template DogTemplate.jpeg
- Build a 5 level Gaussian pyramid by downsampling put image yourself.
- Now create a 5 level Gaussian pyramid using cv2.pyrDown. Compare it with your implementation by printing the mean absolute difference at each level.
- Perform template matching by using your implementation of normalized crosscorrelation . Report the time taken by this method.
- Show the template matching using normalized cross correlation at the different Pyramid levels of both the template and input images (you can use the pyramid obtained by pyrDown).
- As you observed, implementing template matching naively is not efficient. Now we will rely on the pyramid technique while constraining the search space in order to make it faster. Follow the procedure described in the lecture slides: search only in regions with high similarity in the previous pyramid level. Compare the times taken by this method and the naive implementation.
- Visualise the template matching results.

(6 Points)

### 4. Pyramids for image blending 

In this task, we will stitch two images using pyramids. Without pyramids, blending does not look natural because of discontinuities between the pixel values. We will blend the images dog.jpeg and moon.jpeg.
- Load the two images dog.jpeg and moon.jpeg.
- Create the Gaussian Pyramids of the two images, and find their Laplacian Pyramids LA and LB (remember that a Laplacian Pyramid is the difference between two levels in the Gaussian Pyramid as explained in the lecture, i.e. Li = Gi −expand(Gi+1)). Set the number of levels to 5.
- Blend the image dog.jpeg with the image moon.jpeg: create a Gaussian pyramid GR for the region of interest in the given mask mask.jpeg (first transform the mask to grayscale).
- Combine the Laplacian pyramids using GR as weights for the blending, i.e. LSl(i,j) = GRl(i,j) ·LAl(i,j) + (1 −GRl(i,j)) ·LBl(i,j)
- Collapse the LS pyramid to obtain the final composite image: LSl = LSl + expand(LSl+1). Apply the blending operation on the images inside task4 (the results will be funny).

(6 Points)

### 5. Edges 

In this task, we will detect edges in images using the derivative of a Gaussian kernel. Read the image einstein.jpeg.

- Compute the weights of the derivative (in x) of a 5x5 Gaussian kernel with σ = 0.6.
- Compute the weights of the derivative (in y) of a 5x5 Gaussian kernel with σ = 0.6.
- To get the edges, convolve the image with the kernels computed in previous steps. You can use cv2.filter2D.
- Compute the edge magnitude and the edge direction (you can use numpy.arctan2). Visualise the magnitude and direction.

(2 Points)


In [None]:
# blur the image in the spatial domain using convolution
def blur_im_spatial(image, kernel_size):
    #TODO
    pass
    

# blur the image in the frequency domain
def blur_im_freq(image, kernel):
    #TODO
    pass
 
# implement the sum square difference (SQD) similarity 
def calc_sum_square_difference(image, template):
    pass
       
# implement the normalized cross correlation (NCC) similarity 
def calc_normalized_cross_correlation(image, template):
    pass

#draw rectanges on the input image in regions where the similarity is larger than SIM_THRESHOLD
def draw_rectangles(input_im, similarity_im):
    pass

#You can choose to resize the image using the new dimensions or the scaling factor
def pyramid_down(image, dstSize, scale_factor=None):   
    pass
#create a pyramid of the image using the specified pyram function pyram_method.
#pyram_func can either be cv2.pyrDown or your own implementation
def create_gaussian_pyramid(image, pyram_func, num_levels):
    #in a loop, create a pyramid of downsampled blurred images using the Gaussian kernel
    pass
def calc_derivative_gaussian_kernel(size, sigma):
    # TODO: implement
    pass

def create_laplacian_pyramid(image, num_levels=5):
    #create the laplacian pyramid using the gaussian pyramid
    gaussian_pyramid = create_gaussian_pyramid(image, cv2.pyrdown, num_levels)
    #complete as described in the exercise sheet
    pass
# Given the final weighted pyramid, sum up the images at each level with the upscaled previous level
def collapse_pyramid(laplacian_pyramid):
    
    final_im = laplacian_pyramid[0]
    for l in range(1, len(laplacian_pyramid)):
        #TODO complete code 
        pass
    return final_im
#Fourier Transform

def task1(input_im_file):
    full_path = os.path.join(DATA_DIR, input_im_file)
    image = cv2.imread(full_path, cv2.IMREAD_GRAYSCALE)
    kernel_siZe = 7
    kernel = None  # TODO: create kernel
    # time the blurring of the different methods
    start_time = time.time()
    conv_result = blur_im_spatial(image, kernel_siZe) 
    end_time = time.time()
    print('time taken to apply blur in the spatial domain', end_time-start_time)
    # measure the timing here too
    fft_result = blur_im_freq(image, kernel)

    # TODO: compare results in terms of run time and mean square difference




#Template matching using single-scale
def task2(input_im_file, template_im_file):
    full_path_im = os.path.join(DATA_DIR, input_im_file)
    full_path_template = os.path.join(DATA_DIR, template_im_file)
    in_im = cv2.imread(full_path_im, cv2.IMREAD_GRAYSCALE)
    template = cv2.imread(template_im_file, cv2.IMREAD_GRAYSCALE)
    result_sqd = calc_sum_square_difference(in_im, template)
    result_ncc = calc_normalized_cross_correlation(in_im, template)

    #draw rectanges at matching regions
    vis_sqd = draw_rectangles(in_im, result_sqd)
    vis_ncc = draw_rectangles(in_im, result_ncc)
    


def task3(input_im_file, template_im_file):
    pass
    # TODO: calculate the time needed for template matching with the pyramid

    # TODO: show the template matching results using the pyramid



#Image blending
def task4(input_im_file1, input_im_file2, interest_region_file, num_pyr_levels=5):
    #TODO you can use the steps described in the exercise sheet to help guide you through the solution
    result = None
    return result

def task5(input_im, kernel_size=5, sigma=0.5):
    image = cv2.imread("../data/einstein.jpeg", 0)

    kernel_x, kernel_y = calc_derivative_gaussian_kernel(kernel_size, sigma)

    edges_x = None  # TODO: convolve with kernel_x
    edges_y = None  # TODO: convolve with kernel_y

    magnitude = None  # TODO: compute edge magnitude
    direction = None  # TODO: compute edge direction

    # TODO visualise the results



if __name__ == "__main__":
    task1('orange.jpeg')
    task1('celeb.jpeg')
    task2('RidingBike.jpeg', 'RidingBikeTemplate.jpeg')
    task3('DogGray.jpeg', 'DogTemplate.jpeg')
    task4('dog.jpeg', 'moon.jpeg', 'mask.jpeg')
    # just for fun, blend these these images as well
    for i in range[1,2,10]:
        ind = str(i).zfill(2)
        blended_im = task4('task4_extra/source_%s.jpg'%ind, 'task4/target_%s.jpg'%ind, 'task4/mask_%s.jpg'%ind)
        #visualise the blended image

    task5('einstein.jpeg')

# MA - INF 2201 - Computer Vision WS 22/23

### Exercise Sheet 01
