# Lab 5: Warping and Image Segmentation

In [12]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import math

imagesDir = 'images' # Change this, according to your images' directory path

## 1. Warping

In [3]:
# Read image
img = cv2.imread(os.path.join(imagesDir, 'giraffe.jpg')) # Change this, according to your image's path

# Resize image to facilitate visualization
img = cv2.resize(img, (0, 0), fx = 0.4, fy = 0.4)

# Show image
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Affine Transformation

In [11]:
# Select original coordinates of three points of the original image
ori_coord = np.array([[0, 0], [img.shape[1] - 1, 0], [0, img.shape[0] - 1]]).astype(np.float32)

# Select target coordinates (where the points will move to)
tar_coord = np.array([[0, img.shape[1]*0.33], [img.shape[1]*0.85, img.shape[0]*0.25], [img.shape[1]*0.15, img.shape[0]*0.7]]).astype(np.float32)

# Get affine transformation matrix
warp_mat = cv2.getAffineTransform(ori_coord, tar_coord)

# Apply transformation to the image
warp_dst = cv2.warpAffine(img, warp_mat, (img.shape[1], img.shape[0]))

# Show Image
cv2.imshow('Warped Image', warp_dst)
cv2.waitKey(0)
cv2.destroyAllWindows()

Exercise 1.1: Rotate an image by [defining the rotation matrix](https://docs.opencv.org/4.x/da/d54/group__imgproc__transform.html#gafbbc470ce83812914a70abfb604f4326) and then applying transformation to the image.

In [10]:
(h, w) = img.shape[:2]
rotation_matrix = cv2.getRotationMatrix2D((w // 2, h // 2), 20, 1)
rotated_image = cv2.warpAffine(img, rotation_matrix, (w, h))


cv2.imshow('Rotated Image', rotated_image)
while True:
    if cv2.waitKey(1) == ord("q"):
        break
cv2.destroyAllWindows()

Exercise 1.2: Rotate an image through cv2.getAffineTransform().

In [14]:
angle = 45

ori_coord = np.array([[0, 0], [img.shape[1] - 1, 0], [0, img.shape[0] - 1]]).astype(np.float32)
tar_coord = np.array([[0, 0], [(img.shape[1] - 1) * math.cos(angle), 0], [0, (img.shape[0] - 1) * math.sin(angle)]]).astype(np.float32)

warp_mat = cv2.getAffineTransform(ori_coord, tar_coord)
warp_dst = cv2.warpAffine(img, warp_mat, (img.shape[1], img.shape[0]))

cv2.imshow("Original Image", img)
cv2.imshow('Rotated Image', rotated_image)
while True:
    if cv2.waitKey(1) == ord("q"):
        break
cv2.destroyAllWindows()


Exercise 1.3: Implement transformation matrices (without using getAffineTransform or getRotationMatrix2D) to achieve the following goals:
* Apply a translation of 100 pixels to the right
* Apply a rotation of 90 degrees 
* Scale the image to twice its size

In [21]:
height, width = img.shape[:2] 
T = np.float32([[1, 0, 100], [0, 1, 0]]) 
img_translation = cv2.warpAffine(img, T, (width, height))

#https://www.geeksforgeeks.org/python-opencv-getrotationmatrix2d-function/

cx, cy = width // 2, height // 2

angle = np.radians(90)
cos_a = math.cos(angle)
sin_a = math.sin(angle)

R = np.float32([[cos_a,  -sin_a, cx - cx * cos_a + cy * sin_a], [sin_a, cos_a, cy - cx * sin_a - cy * cos_a]])
img_rotated = cv2.warpAffine(img, R, (width, height))

s = 2
S = np.float32([[s,  0, 0], [0, s, 0]])
img_scaled = cv2.warpAffine(img, S, (width, height)) 

cv2.imshow("Original Image", img)
cv2.imshow('Translated Image', img_translation)
cv2.imshow('Rotated Image', img_rotated)
cv2.imshow('Scaled Image', img_scaled)
while True:
    if cv2.waitKey(1) == ord("q"):
        break
cv2.destroyAllWindows()


### Homography

Example of homography using feature matching from last week

In [23]:
# Load images
img1 = cv2.imread(os.path.join(imagesDir, 'match_box01a_1.png'), cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread(os.path.join(imagesDir, 'match_box01a_2.png'), cv2.IMREAD_GRAYSCALE)

cv2.imshow('Query', img1)
cv2.imshow('Train', img2)
while True:
    if cv2.waitKey(1) == ord("q"):
        break
cv2.destroyAllWindows()

In [26]:
# Initiate SIFT detector
sift = cv2.SIFT_create()

# Find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)

# Apply FLAN Matcher
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1,des2,k=2)

# Store all the good matches as per Lowe's ratio test
good = []
for m, n in matches:
    if m.distance < 0.7 * n.distance:
        good.append(m)

match_output = cv2.drawMatchesKnn(img1, kp1, img2, kp2, np.expand_dims(good, 0), None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

# Obtain points corresponding to the matches in the query and train images
query_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
train_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

# Obtain homography that represents the transformation from the points of the train image into the position of the query image 
M, mask = cv2.findHomography(train_pts, query_pts, cv2.RANSAC, 5.0)

# Apply transformation to image
warped_img = cv2.warpPerspective(img2, M, (img1.shape[1], img1.shape[0]),flags=cv2.INTER_LINEAR)

cv2.imshow('Query', img1)
cv2.imshow('Original Image', img2)
cv2.imshow('Warped Image', warped_img)
cv2.imshow('Feature Matching Image', match_output)
while True:
    if cv2.waitKey(1) == ord("q"):
        break
cv2.destroyAllWindows()

Exercise 1.4: Apply the drawMatches() function (check last week's notebook) to visualize the feature matching after removing the outliers using RANSAC. Compare the results of removing outliers using RANSAC with the Lowe's ratio test implemented last week.

Hint: The output mask from cv2.findHomography() represents the inliers.

In [37]:
def detect_and_match(query, train, detector, lowe=True, ratio_test=0.75):
    kp1, des1 = detector.detectAndCompute(query, None)
    kp2, des2 = detector.detectAndCompute(train, None)

    bf = cv2.BFMatcher(cv2.NORM_HAMMING if isinstance(detector, cv2.ORB) else cv2.NORM_L2, crossCheck=False)
    matches = bf.knnMatch(des1, des2, k=2)

    good = []  

    if lowe:
        for m, n in matches:
            if m.distance < ratio_test * n.distance:
                good.append(m)  
    else:
        matches = [m for m, _ in matches]

        src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)

        if len(matches) >= 4:  # Homography requires at least 4 points
            H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
            mask = mask.ravel()  # Flatten mask
            good = [m for i, m in enumerate(matches) if mask[i]]

    img_matches = cv2.drawMatches(query, kp1, train, kp2, good, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    return img_matches

orb = cv2.ORB_create()

img_lowe = detect_and_match(img1, img2, orb, lowe=True)
img_ransac = detect_and_match(img1, img2, orb, lowe=False)
cv2.imshow("Lowe's Ratio Test", img_lowe)
cv2.imshow("RANSAC Filtering", img_ransac)

while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cv2.destroyAllWindows()


Exercise 1.5: Draw lines around the object by:
* Obtaining homography that transforms points from the query image to the train image
* Applying [the perspectiveTransform function](https://docs.opencv.org/3.4/d2/de8/group__core__array.html#gad327659ac03e5fd6894b90025e6900a7) to obtain the coordinates of the object of the query image on the train image
* Drawing lines on the train image that connect the coordinates of the object using [polylines](https://docs.opencv.org/3.4/d6/d6e/group__imgproc__draw.html#gaa3c25f9fb764b6bef791bf034f6e26f5)

In [39]:

def detect_and_draw_box(query, train, detector, ratio_test=0.75):
    kp1, des1 = detector.detectAndCompute(query, None)
    kp2, des2 = detector.detectAndCompute(train, None)

    bf = cv2.BFMatcher(cv2.NORM_HAMMING if isinstance(detector, cv2.ORB) else cv2.NORM_L2, crossCheck=False)
    matches = bf.knnMatch(des1, des2, k=2)

    good = [m for m, n in matches if m.distance < ratio_test * n.distance]

    if len(good) >= 4:  # Homography needs at least 4 points
        src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

        H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

        if H is not None:
            h, w = query.shape[:2]
            obj_corners = np.float32([[0, 0], [w, 0], [w, h], [0, h]]).reshape(-1, 1, 2)

            transformed_corners = cv2.perspectiveTransform(obj_corners, H)

            train_with_box = train.copy()
            cv2.polylines(train_with_box, [np.int32(transformed_corners)], isClosed=True, color=(0, 255, 0), thickness=3)

            return train_with_box

    return train  #

orb = cv2.ORB_create()
img_with_box = detect_and_draw_box(img1, img2, orb)

cv2.imshow("Detected Object", img_with_box)

while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cv2.destroyAllWindows()


# 2. Segmentation

### Thresholding

In [40]:
# Read image
img = cv2.imread(os.path.join(imagesDir, 'sudoku.png'))

# Convert to grayscale
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Show image
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Otsu Thresholding

In [42]:
# Apply global binary threshold
ret, th_global = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

# Apply binary threshold with Otsu's method
ret, th_otsu = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)

# Show images
cv2.imshow('Global Threshold', th_global)
cv2.imshow('Otsu Threshold', th_otsu)
while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

Exercise 2.1: Verify the effects of blurring the image using a Gaussian filter, before applying the Otsu thresholding method.

In [43]:
# Apply binary threshold with Otsu's method
ret, th_otsu = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)

blur = cv2.GaussianBlur(img,(5,5),0)
ret, th_otsu2 = cv2.threshold(blur, 127, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)

cv2.imshow('Otsu Threshold', th_otsu)
cv2.imshow('Otsu Threshold after blur', th_otsu2)
while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

Adaptive Threshold

In [45]:
# Apply adaptive thresholding
th_adaptive = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

# Show images
cv2.imshow('Adaptive Threshold', th_adaptive)
cv2.imshow('Otsu Threshold', th_otsu)
cv2.imshow('Global Threshold', th_global)
while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

Exercise 2.2: Verify the effects of blurring with filters of increasing sizes before applying the adaptive threshold.

In [47]:
#, adaptive thresholding can help. Here, the algorithm determines the threshold for a pixel based on a small region around it. 

blur1 = cv2.GaussianBlur(img,(5,5),0)
blur2 = cv2.GaussianBlur(img,(15,15),0)
blur3 = cv2.GaussianBlur(img,(25,25),0)

def get_adaptive_threshold(img):
    return cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)


th_adaptive = get_adaptive_threshold(img)
th_adaptive2 = get_adaptive_threshold(blur1)
th_adaptive3 = get_adaptive_threshold(blur2)
th_adaptive4 = get_adaptive_threshold(blur3)

cv2.imshow('Adaptive Threshold', th_adaptive)
cv2.imshow('Adaptive Threshold with blur 1', th_adaptive2)
cv2.imshow('Adaptive Threshold with blur 2', th_adaptive3)
cv2.imshow('Adaptive Threshold with blur 3', th_adaptive4)

while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()


### Segmentation with [K-Means](https://docs.opencv.org/master/d5/d38/group__core__cluster.html#ga9a34dc06c6ec9460e90860f15bcd2f88)

In [49]:
# Read image
img2 = cv2.imread(os.path.join(imagesDir, 'home.jpg'))

# Show image
cv2.imshow('Image', img2)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [50]:
# Reshape the image and turn its values to float
print(f"Previous shape: {img2.shape}")

reshaped_image = img2.reshape((-1,3))
reshaped_image = np.float32(reshaped_image)

print(f"Current shape: {reshaped_image.shape}")

# Define criteria and number of clusters (k)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
k = 4

# Apply K-means
ret, label, center = cv2.kmeans(reshaped_image, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

Previous shape: (384, 512, 3)
Current shape: (196608, 3)


In [53]:
# Convert back to uint8, and make resulting image
center = np.uint8(center)
result = center[label.flatten()]
result = result.reshape((img2.shape))

# Show image
cv2.imshow('Image', img2)
cv2.imshow('K-Means Result', result)
while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

Exercise 2.3: Experiment with different number of clusters.

In [None]:
def try_different_cluter(k):
    ret, label, center = cv2.kmeans(reshaped_image, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    center = np.uint8(center)
    return center[label.flatten()].reshape((img2.shape))

res2 = try_different_cluter(2)
res = try_different_cluter(4)
res1 = try_different_cluter(8)

cv2.imshow('2 clusters', res2)
cv2.imshow('4 clusters', res)
cv2.imshow('8 clusters', res1)
while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

### Segmentation with [GrabCut](https://docs.opencv.org/4.x/d3/d47/group__imgproc__segmentation.html#ga909c1dda50efcbeaa3ce126be862b37f)

In [58]:
# Read image
img = cv2.imread(os.path.join(imagesDir, 'giraffe.jpg')) # Change this, according to your image's path

# Resize image to facilitate visualization
img = cv2.resize(img, (0, 0), fx = 0.6, fy = 0.6)

# Show image
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [61]:
# Define image mask for the GrabCut output with same dimensions as the image
mask = np.zeros(img.shape[:2], np.uint8)

# Define the bounding box coordinates with the object of interest: (x, y, width, heigh)
bb = (0, 0, 400, 500)

# Allocate memory for the two arrays that this algorithm internally uses for the segmentation of the foreground and background
bgModel = np.zeros((1, 65), np.float64)
fgModel = np.zeros((1, 65), np.float64)

# Apply GrabCut
(mask, bgModel, fgModel) = cv2.grabCut(img, mask, bb, bgModel, fgModel, 5, cv2.GC_INIT_WITH_RECT)

# All definite background and probable background pixels are set to 0, and all definite foreground and probable foreground pixels are set to 1
output_mask = np.where((mask == cv2.GC_BGD) | (mask == cv2.GC_PR_BGD), 0, 1)

# Scale the mask from the range [0, 1] to [0, 255]
output_mask = (output_mask * 255).astype("uint8")

# Apply a bitwise AND to the image using the generated mask by GrabCut to obtain the final result
grabcut_result = cv2.bitwise_and(img, img, mask=output_mask)

# Show result
cv2.imshow('Output Mask', output_mask)
cv2.imshow('GrabCut Result', grabcut_result)
while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

Exercise 2.4: Select a region of interest in the image (using cv2.selectROI function) for the GrabCut algorithm.

In [69]:
# Define image mask for the GrabCut output with same dimensions as the image
mask = np.zeros(img.shape[:2], np.uint8)

bb = cv2.selectROI("select the area", img)

# Allocate memory for the two arrays that this algorithm internally uses for the segmentation of the foreground and background
bgModel = np.zeros((1, 65), np.float64)
fgModel = np.zeros((1, 65), np.float64)

# Apply GrabCut
(mask, bgModel, fgModel) = cv2.grabCut(img, mask, bb, bgModel, fgModel, 5, cv2.GC_INIT_WITH_RECT)

# All definite background and probable background pixels are set to 0, and all definite foreground and probable foreground pixels are set to 1
output_mask = np.where((mask == cv2.GC_BGD) | (mask == cv2.GC_PR_BGD), 0, 1)

# Scale the mask from the range [0, 1] to [0, 255]
output_mask = (output_mask * 255).astype("uint8")

# Apply a bitwise AND to the image using the generated mask by GrabCut to obtain the final result
grabcut_result = cv2.bitwise_and(img, img, mask=output_mask)

# Show result
cv2.imshow('Output Mask', output_mask)
cv2.imshow('GrabCut Result', grabcut_result)
while True:
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

Select a ROI and then press SPACE or ENTER button!
Cancel the selection process by pressing c button!
