In [1]:
# TRANSLATIONS
# This is an affine that simply shifts the position of an image

import cv2
import numpy as np

image = cv2.imread('images/signature.jpg')

#Store height and width if the image
height, width = image.shape[:2]

quarter_height, quarter_width = height/4 , width/4

#     | 1 0 Tx |
# T = | 0 1 Ty |

# T is our translation matrix
T = np.float32([[1, 0, quarter_width], [0, 1, quarter_height]])

#We use warpAffine to transform the image using the matrix, T
img_translation = cv2.warpAffine(image, T , (width,height))
cv2.imshow('Translation', img_translation)
cv2.waitKey()
cv2.destroyAllWindows()

In [9]:
# ROTATIONS
# cv2.getRotationMatrix2D(rotation_center_x, rotation_center_y, angle of rotation, scale)

import cv2
import numpy as np

image = cv2.imread('images/input.png')
cv2.imshow('Original image', image)
cv2.waitKey(0)

height, width = image.shape[:2]

# Divide by two to rotate the image around its centre
rotation_matrix = cv2.getRotationMatrix2D((width/2 , height/2), 90, 1)

rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))

cv2.imshow('Rotated Image', rotated_image)
cv2.waitKey()
cv2.destroyAllWindows()

In [6]:
# RE-SIZING, SCALING AND INTERPOLATION
# Interpolation is a method of constructing new data point within the range of a discrete set of known data points
# cv2.INTER_AREA - Good for shrinking or down sampling
# cv2.INTER_NEAREST - Fastest
# cv2.INTER_LINEAR - Good for zooming or up sampling (default)
# INTER_CUBIC - Better
# INTER_LANCZOS4 - Best

import cv2
import numpy as np

image = cv2.imread('images/signature.jpg')

# Let's make our image 3/4 of it's original size
image_scaled = cv2.resize(image, None, fx=0.75, fy=0.75)
cv2.imshow('Scaling - Cubic Interpolation', image_scaled)
cv2.waitKey()

#Let's double the size of our image
img_scaled = cv2.resize(image, None, fx=2, fy=2, interpolation = cv2.INTER_CUBIC)
cv2.imshow('Scaling - Cubic Interpolation', img_scaled)
cv2.waitKey()

# Let's skew the re-sizing by setting exact dimensions
img_scaled = cv2.resize(image, (900, 400), interpolation = cv2.INTER_AREA)
cv2.imshow('Scaling - Skewed Size', img_scaled)
cv2.waitKey()

cv2.destroyAllWindows()



In [9]:
# IMAGE PYRAMIDS
import cv2

image = cv2.imread('images/signature.jpg')


smaller = cv2.pyrDown(image)
larger = cv2.pyrUp(smaller)

cv2.imshow('Original', image)
cv2.imshow('Smaller', smaller)
cv2.imshow('Larger', larger)

cv2.waitKey(0)
cv2.destroyAllWindows()



In [11]:
# CROPPING IMAGES
# Cropping images refers to extracting a of that image

import cv2
import numpy as np

image = cv2.imread('images/signature.jpg')
height, width = image.shape[:2]

# Let's get the starting pixel coordinates (top left of ropping rectangle)
start_row, start_col = int(height * .25) , int(width * .25)

# Let's get the ending pixel coordinates (bottom right)
end_row, end_col = int(height * .75) , int(width * .75)

# Simply use indexing to crop out the rectangle we desire
cropped = image[start_row:end_row , start_col:end_col]

cv2.imshow("Original Image", image)
cv2.waitKey(0)
cv2.imshow("Cropped Image", cropped)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [1]:
# ARITHMETIC OPERATIONS
# These area simple operations that allow us to directly add or subtract to the color intensity
# Calculates the per-element operation of to arrays, The overall effect is increassing or decreassing grightness

import cv2
import numpy as np

image = cv2.imread("images/signature.jpg")

# Create a matrix of ones, then multiply it by a scaler of 100
# This gives a matrix with same dimensions of our image width all values being 100
M = np.ones(image.shape, dtype = "uint8") * 75

# We use this to add this matrix M, to our image
# Notice the increase in brightness
added = cv2.add(image, M)
cv2.imshow("Added", added)

# Likewise we can also subtract
# Notice the decrease in brightness
subtracted = cv2.subtract(image, M)
cv2.imshow("Subtracted", subtracted)

cv2.waitKey(0)
cv2.destroyAllWindows()

In [2]:
# BITWISE OPERATIONS AND MASKING
# To demonstrate these operations let's create some simple images

import cv2
import numpy as np

# if you're wondering why only two dimensions, well this is a grayscale image,
# if we doing a colores image, we'd use 
# rectangle = np.zeros((300,300,3),np.uint8)

# Making a square
square = np.zeros((300,300), np.uint8)
cv2.rectangle(square, (50, 50), (250, 250), 255, -2)
cv2.imshow("Square", square)
cv2.waitKey(0)

# Making a ellipse
ellipse = np.zeros((300,300), np.uint8)
cv2.ellipse(ellipse, (150,150), (150,150) ,30, 0, 180, 255, -1)
cv2.imshow("Ellipse", ellipse)
cv2.waitKey(0)

#cv2.destroyAllWindows()

# EXPERIMENTING WITH SOME BITWISE OPERATIONS

# Show only where they intersect
bitwiseAND = cv2.bitwise_and(square, ellipse)
cv2.imshow("AND", bitwiseAND)
cv2.waitKey(0)

# Shows where either square or ellipse is
bitwiseOR = cv2.bitwise_or(square, ellipse)
cv2.imshow("OR", bitwiseOR)
cv2.waitKey(0)

# Shows where either exists by itself
bitwiseXOR = cv2.bitwise_xor(square, ellipse)
cv2.imshow("XOR", bitwiseXOR)
cv2.waitKey(0)

# Shows everything that isn't part of the square
bitwiseNOT_SQUARE = cv2.bitwise_not(square)
cv2.imshow("NOT - square", bitwiseNOT_SQUARE)
cv2.waitKey(0)

cv2.destroyAllWindows()



In [13]:
# CONVOLUTIONS & BLURRING
# A CONVOLUTION is a mathematical operation performed on two functions
# producing a thris function which is typically a modified version of one 
# of the original functions

# Output Image = Image x Function_KernelSize

# In Computer Vision we use kernel's to specify the size over which 
# we run our manipulating function over our image

# BLURRING
# is an operation where we average the pixels within a region (kernel)

#               |  1  1  1  1  1  |
#               |  1  1  1  1  1  |
# Kernel = 1/25 |  1  1  1  1  1  |
#               |  1  1  1  1  1  |
#               |  1  1  1  1  1  |

# The above is a 5 x 5 kernel.
# We multiply by 1/25 to normalize i.e. sum to 1, otherwise we'd be increasing intensity.
# cv2.filter2D(image, -1, kernel)

import cv2
import numpy as np

image = cv2.imread("images/signature.jpg")
cv2.imshow("Original Image", image)
cv2.waitKey(0)

# Creating our 3 x 3 kernel
kernel_3x3 = np.ones((3, 3), np.float32) / 9

# We use the cv2.filter2D to convolve the kernel with an image
blurred = cv2.filter2D(image, -1, kernel_3x3)
cv2.imshow('3x3 Kernel Blurring', blurred)
cv2.waitKey(0)

# Creating our 7x7 Kernel
kernel_7x7 = np.ones((7,7), np.float32) / 49

blurred2 = cv2.filter2D(image, -1, kernel_7x7)
cv2.imshow('7x7 Kernel Blurring', blurred2)
cv2.waitKey(0)

cv2.destroyAllWindows()

In [14]:
# OTHER COMMONLY USED BLURRING METHODS IN OPENCV
import cv2 
import numpy as np

image = cv2.imread('images/signature.jpg')

# Averaging done by convolving the image with a normalized box filter.
# This takes the pixels under the box and replaces the central element
# Box size needs to odd and positive
# blur - Averages values over specified window
blur = cv2.blur(image, (3,3))
cv2.imshow("Averaging", blur)
cv2.waitKey(0)

# instead of box filter , gaussian model
# GaussianBlur - Similar, but uses a Gaussian window (more emphasis 
# or weighting on points around the center)
Gaussian = cv2.GaussianBlur(image, (7,7), 0)
cv2.imshow("Gaussian Blurring", Gaussian)
cv2.waitKey(0)

# Takes median of all the pixels under kernel area and central 
# element is replaces with this median value
# medianBlur - Uses median of  all elements in the window
median = cv2.medianBlur(image, 5)
cv2.imshow("Median Blurring", median)
cv2.waitKey(0)

# Bilateral is very effective in noise removal while keeping edges sharp
# bilateralFilter - Blur while keeping edges sharp (slower), It also takes a Gaussian filter
# in space, but one more Gaussian filter which is a function of pixel difference, The pixel difference
# function makes sure only those pixeels with similar intensity to central pixel is 
# considered for blurring, So it preserver the edges since pixels at edges will have large intensity variation
bilateral = cv2.bilateralFilter(image, 9, 75, 75)
cv2.imshow('Bilateral Blurring', bilateral)
cv2.waitKey(0)

cv2.destroyAllWindows()


In [15]:
# IMAGE DE-NOISING - NON_LOCAL  MEANS DENOISING
import cv2
import numpy as np

image = cv2.imread('images/signature.jpg')
# Parameters, after None are - the filter strength 'h' (S-10 is a good range)
# Next is hForColorComponents, set as same value as h again
#
# cv2.fastNlMeansDenoising() - works with a single grayscale images
# cv2.fastNlMeansDenoisingColored() - works with a color image
# cv2.fastNlMeansDenoisingMulti() - works with a image sequence captured in short period of time (grayscale images)
# cv2.fastNlMeansDenoisingColoredMulti() - same as above, but for color images

dst = cv2.fastNlMeansDenoisingColored(image, None, 6, 6, 7, 21)

cv2.imshow('Fast Means Donising', dst)
cv2.waitKey(0)

cv2.destroyAllWindows()


In [16]:
# SHARPENING
# Sharpening is the opposite of blurring, it strengthens or emphasizing edges in an image
# Our kernel matrix sums to one, so there is no need to normalize (i.e multiply by a factor to
# retain the same brightness of the original)
#          |  -1  -1  -1  |
# Kernel = |  -1   9  -1  |  
#          |  -1  -1  -1  |

import cv2
import numpy as np

image = cv2.imread('images/signature.jpg')

# Create our sharpening kernel, we don't normal normalize since the 
# values in the matrix sum to 1
kernel_sharpening = np.array([
    [-1,-1,-1],
    [-1, 9,-1],
    [-1,-1,-1]
])
# applying different kernels to the input image
sharpened = cv2.filter2D(image, -1, kernel_sharpening)
cv2.imshow('ImageSharpening', sharpened)
    
cv2.waitKey(0)
cv2.destroyAllWindows()

In [2]:
# THRESHOLDING, BINARIZTION & ADAPTIVE THRESHOLDING
# Thresholding is act of converting an image to a binary form.
#
# cv2.threshold(image, Threshold Value, Max Value, Threshold Type)
#
# Threshold Types:
# cv2.THRESH_BINARY - Most Common
# cv2.THRESH_BINARY_INV - Most Common
# cv2.THRESH_TRUNC
# cv2.THRESH_TOZERO
# cv2.THRESH_TOZERO
#
# NOTE : Image need to be converted to greyscale before thresholding

import cv2
import numpy as np

# Load our image as greyscale
image = cv2.imread('images/signature.jpg',0)
cv2.imshow("Original",image)

# Values below 127 goes to 0 (black), everything above goes to 255 (white))
ret, thresh1 = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
cv2.imshow('1 Threshold Binary', thresh1)

# Values below 127 go to 255 and values above 127 go to 0 (reverse of above)
ret, thresh2 = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('2 Threshold Binary Inverse', thresh2)

# Values above 127 are truncated (held) at 127 (the 255 argument is unused)
ret, thresh3 = cv2.threshold(image, 127, 255, cv2.THRESH_TRUNC)
cv2.imshow('3 THRESH TRUNC', thresh3)

# Values below 127 goes to 0 , above 127 are unchanged
ret, thresh4 = cv2.threshold(image, 127, 255, cv2.THRESH_TOZERO)
cv2.imshow('4 THRESH TOZERO', thresh4)

# resever of above, below 127 is unchanged, above 127 goes to 0
ret, thresh5 = cv2.threshold(image, 127, 255, cv2.THRESH_TOZERO_INV)
cv2.imshow('5 THRESH TOZERO_INV', thresh5)

cv2.waitKey(0)
cv2.destroyAllWindows()

In [18]:
# ADAPTIVE THRESHOLDING
# - Simple threshold methods take that uncertainty away
# - Adaptive threshold methods take that uncertainty away

# cv2.adaptiveThreshold(image, Max value, Adaptive type, Threshold Type, Block size, Constant that is subtracted from mean)
# NOTE: Block sizes need to be odd numbers!
#
# Adaptive Threshold Types:
# ADAPTIVE_THRESH_MEAN_C - based on mean of the neighboorhodd of pixels
# ADAPTIVE_THRESH_GAUSSIAN_C - weighted sum of neighborhood pixels under the Gaussian window
# THRESH_OTSU (uses cv2.threshold function) - Clever algorithm assumes there are two peaks in the gray scale Histrogram
# of the image and then tries to find an optimal value to separate these two peaks to find T.

import cv2
import numpy as np

# Load our image as greyscale
image = cv2.imread('images/signature.jpg',0)
cv2.imshow("Original",image)
cv2.waitKey(0)

# Values below 127 goes to 0 (black), everything above goes to 255 (white))
ret, thresh1 = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
cv2.imshow('Threshold Binary', thresh1)
cv2.waitKey(0)

# It's good practice to blur images as it removes noise
image = cv2.GaussianBlur(image, (3,3) , 0)

# Using adaptive threshold
thresh = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 3, 5)
cv2.imshow("Adaptive Mean Thresholding", thresh)
cv2.waitKey(0)

_, th2 = cv2.threshold(image,0 ,255 ,cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Otsu's Thresholding", thresh)
cv2.waitKey(0)

# Otsu's thresholding after Gaussian filtering
blur = cv2.GaussianBlur(image, (5,5), 0)
_, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Gaussian Otsu's Thresholding", thresh)
cv2.waitKey(0)

cv2.destroyAllWindows()


In [6]:
# DILATION AND EROSION
# These are operations in the field of mathematical morphology
# Dilation - Adds pixels to the boundaries of objects in an image
# Erosion  - Removes pixels at the boundaries of object in an image
# Opening  - Erosion followed by dilation
# Closing  - Dilation folowwed by erosion

# Common StackOverflow question : "Why is dilation and erosion doing the reverse of what I expect?"
# Remember
# Dilation - Adds pixels to the boundaries of objects in an image
# Erosion - Removes pixels at the boundaries of object in an image
import cv2
import numpy as np

image = cv2.imread("images/signature.jpg")

cv2.imshow("Original", image)
cv2.waitKey(0)

# Let's define our kernel size
kernel = np.ones((5,5), np.uint8)


# Now we erode
erosion = cv2.erode(image, kernel, iterations = 1)
cv2.imshow("Erosion", erosion)
cv2.waitKey(0)

# dilation
dilation = cv2.dilate(image, kernel, iterations = 1)
cv2.imshow("Dilation", dilation)
cv2.waitKey(0)

# Opening - Good for removing noise
opening = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
cv2.imshow("Opening", opening)
cv2.waitKey(0)

#Closing - Good for removing noise
closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
cv2.imshow("Closing", closing)
cv2.waitKey(0)

cv2.destroyAllWindows()

# There are some other less popular morphology operations, see the official OpenCV site

In [5]:
# EDGE DETECTION & IMAGE GRADIENTS

# Edge Detection is a very important area in Computer Vision, expecially when
# dealing with contours (you'll learn this later soon)
# Edges can be defined as sudden changes (discontinuities) in an image and they can encode just as much information as pixels

# Edge Detection Algorithms
# There are three types of Edge Detection
# - Sobel - to emphasize vertical or horizontal edges
# - Laplacion - Gets all orientations
# - Canny - Optimal due to low error rate, well defines edges and accurate detection

# Canny Edge Detection Algorithm (developed by John F. Canny in 1986)

# 1. Applies Gaussian bluring
# 2. Finds intensity gradient of the image
# 3. Applied non-maximum suppression (i.e removes pixels that are not edges)
# 4. Hysteresis - Aplies thresholds (i.e. if pixel is tithin the upper and lower thresholds, it's considered an edge)

import cv2
import numpy as np

image = cv2.imread("images/signature.jpg",0)

height, width = image.shape

# Extract Sobel Edges
sobel_x = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5)
sobel_y = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)

cv2.imshow('Original', image)
cv2.waitKey(0)
cv2.imshow('Sobel X', sobel_x)
cv2.waitKey(0)
cv2.imshow('Sobel Y', sobel_y)
cv2.waitKey(0)

sobel_OR = cv2.bitwise_or(sobel_x, sobel_y)
cv2.imshow('sobel_OR', sobel_OR)
cv2.waitKey(0)

laplacian = cv2.Laplacian(image, cv2.CV_64F)
cv2.imshow("Laplacian", laplacian)
cv2.waitKey(0)

# Then, we need to provide two values: threshold1 and threshold2, Any gradient value larger than threshold2
# is considered to be an edge. Any value below threshold is considered not to be an edge.
# values in between threshold1 and threshold2 are either classified as edge or non-edges based on how their
# intensities are "connected". in this case, any gradient values below 60 are considered non-edges
# whereas any values above 120 are considered edges.

# Canny Edge Detection uses gradient values as thresholds
# The first threshold gradient
canny = cv2.Canny(image, 20, 170)
cv2.imshow("Canny", canny)
cv2.waitKey(0)

cv2.destroyAllWindows()


In [4]:
# PERSPECTIVE AND AFFINE TRANSFORMS
import cv2
import numpy as np
import matplotlib.pyplot as plt

image = cv2.imread("images/signature.jpg")

cv2.imshow("Original", image)
cv2.waitKey(0)

# Cordinates of the 4 corners of the original image
points_A = np.float32([[320,15], [700,215], [85,610], [530,780]])

# Cordinates of the 4 corners of the desired output
# We use a ratio of an A4 Paper 1 : 1.41
points_B = np.float32([[0,0], [420,0], [0,594], [420,594]])

# Use the two sets of four points to compute
# the Perspective Transformation matrix, M
M = cv2.getPerspectiveTransform(points_A, points_B)

warped = cv2.warpPerspective(image, M, (420, 594))

cv2.imshow("warpPerspective", warped)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [3]:
# In Affine transforms you only need 3 coordinates to obtain the correct transform
import cv2
import numpy as np
import matplotlib.pyplot as plt

image = cv2.imread("images/signature.jpg")
rows, cols, ch = image.shape

cv2.imshow("Original", image)
cv2.waitKey(0)

# Cordinates of the 4 corners of the original image
points_A = np.float32([[320,15], [700,215], [85,610]])

# Cordinates of the 4 corners of the desired output
# we use a ratio of an A4 paper 1 : 1.41
points_B = np.float32([[0,0], [420,0], [0,594]])

# Use the two sets of your points to compute
# the perspective Transformation matrix, M
M = cv2.getAffineTransform(points_A, points_B)

warped = cv2.warpAffine(image, M, (cols,rows))

cv2.imshow("warpPerspective", warped)
cv2.waitKey(0)

cv2.destroyAllWindows()