# Reading an image

Youtube channel: ProgrammingKnowledge

In [1]:
# flags               integer value            description

# cv2.IMREAD_COLOR          1               Loads a color image
# cv2.IMREAD_GRAYSCALE      0               Loads image in grayscale mode
# cv2.IMREAD_UNCHANGED      -1              Loads image such as such including the alpha channel

In [1]:
import cv2

In [17]:
img = cv2.imread("NAO.jpg",0)    #wrong filename will return None but will not give error

# Displaying an image

In [18]:
cv2.imshow('image',img)  #first parameter is name of window
cv2.waitKey(5000)        #to hold the image for 5000 ms onto a window
cv2.destroyAllWindows()
# if we write ...cv2.waitKey(0)....it'll not wait for any amount of seconds
# ...it'll just close when close(top right) is clicked

# Writing an image to a file

In [19]:
cv2.imwrite("NAO_copy.png",img)   #first parameter is file name with extension

True

# Reading video

In [28]:
cap = cv2.VideoCapture(0)   #either file name with extension, or device index of the camera
#  which we want to use is passed as the parameter
#device index in most cases is either 0 or -1;  1 for 2nd camera ,2 for 3rd camera in case of multiple cameras

In [29]:
# while loop to capture frames indefinitely
while True:
    ret, frame = cap.read()   # in ret...true or false will be stored a/c to availability of frame
    
    gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)  # first argument is the source, second is the color
    cv2.imshow('frame_window',gray) #first parameter is name of window
    
    if cv2.waitKey(1) == ord('q'):
        break
        
cap.release()  #after reading the variable resources need to be released
cv2.destroyAllWindows()
 
#cap.isOpened()    
#while True na likhkr while cap.isOpened() bhi likh skte they... if the device index is wrong or the file passed 
#in VideoCapture().. as parameter does not  exists then the value of cap.isOpened() will be false  


#cap.get()
#takes property id as parameter
#cap.get(cv2.CAP_PROP_FRAME_WIDTH)....gives the width of the frame..similarly for height ; every property has a number 
# associated with it..for width it is 3 and for height it is 4
#there are other properties too

# Saving the video

In [4]:
#VideoWriter()
cap = cv2.VideoCapture(0)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi',fourcc,20.0,(640,480))  #first argument: name with which video is to be saved, 
                                                           #2nd argument:FOURCC code
                                                           #3rd: number_of_frames_per_second
                                                           #4th: size
while True:
    ret, frame = cap.read()   
    if ret == True:   #i.e. if frame is available..
        
        out.write(frame)  #out is the instance of VideoWriter() created previously in the program
                          #video is saved to output.avi frame by frame
        gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) 
        cv2.imshow('frame_window',gray) 
        

        if cv2.waitKey(1) == ord('q'):
            break
    else:
        break
        
cap.release() 
out.release()
cv2.destroyAllWindows()


# Drawing, writing  on images

In [1]:
# import numpy as np
import cv2

img = cv2.imread("NAO.jpg",1)
#to get a black image
# img = np.zeros([500,300,3]) #no of rows,columns,...3 indicates 3 channels B,G,R


img = cv2.line(img,(0,0),(255,255),(0,255,0),5)  #1st parameter: image,  2nd: starting coordinates, 3rd:ending coords,
                                                 #4th:color in BGR format, 5th: thickness of line
img = cv2.arrowedLine(img,(0,255),(255,300),(255,0,0),5)
img = cv2.rectangle(img,(30,30),(100,80),(0,0,255),5)  #top-left and bottom-right coords; last parameter:thickness
                            #if in place of thickness -1 is written the rectangle will be filled with the given color
img = cv2.rectangle(img,(300,100),(600,200),(0,255,0),-1)
img = cv2.circle(img,(400,400),30,(0,255,0),5) #2nd parameter:centre, 3rd:radius, last:thickness

font = cv2.FONT_HERSHEY_SIMPLEX
img = cv2.putText(img,'OpenCV',(10,500),font,4,(255,255,255),10,cv2.LINE_8)#3rd argument:starting point; then font style,font size, 
                                                             #font color; then thickness,linetype
cv2.imshow('image',img)

cv2.waitKey(0)
cv2.destroyAllWindows()

# Setting camera parameters

In [3]:
import cv2 
cap = cv2.VideoCapture(0)
print(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
print(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

cap.set (3,1208)  #cap.get(cv2.CAP_PROP_FRAME_WIDTH)....gives the width of the frame..similarly for height
cap.set(4,720)#  every property has a number  associated with it..for width it is 3 and for height it is 4
#note: camera apne aukaat k hisab se hi resolution lega..aise koi bhi number cap.set() k andr pass krdene se kaam nhi hoga
#mtlb 3000,3000 pass kroge to bhi 1280,720 hi rhega
#2nd thing to note: resolution takes that available value that is closest to the number passed  
#mtlb dhyaan do maine 1208 pass kiya tha pr 1280 h resolution output me
print(cap.get(3))
print(cap.get(4))
while(cap.isOpened()):
    ret,frame = cap.read()
    if ret == True:
        
        gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
        cv2.imshow('frame',gray)
        
        if cv2.waitKey(1) == ord('q'):
            break
     
    else:
        break
        
cap.release()
cv2.destroyAllWindows()                    

640.0
480.0
1280.0
720.0


# Showing date, time on videos

In [8]:
import cv2 
import time

cap = cv2.VideoCapture(0)

cap.set (3,1208)
cap.set(4,720)

while(cap.isOpened()):
    ret,frame = cap.read()
    if ret == True:
        
        font = cv2.FONT_HERSHEY_SIMPLEX
        
        cv2.imshow('frame',frame)
        localtime = time.asctime(time.localtime(time.time()))
        frame = cv2.putText(frame,localtime,(10,50),font,1,(0,0,0),2,cv2.LINE_AA)
        cv2.imshow('frame',frame)
        
        if cv2.waitKey(1) == ord('q'):
            break
     
    else:
        break
        
cap.release()
cv2.destroyAllWindows()                    

# Handling mouse events

listing all events in cv2 library

In [11]:
import cv2

events = [i for i in dir(cv2) if 'EVENT' in i] #dir(cv2) shows all the classes and member function inside the cv2 package
print(events)

['EVENT_FLAG_ALTKEY', 'EVENT_FLAG_CTRLKEY', 'EVENT_FLAG_LBUTTON', 'EVENT_FLAG_MBUTTON', 'EVENT_FLAG_RBUTTON', 'EVENT_FLAG_SHIFTKEY', 'EVENT_LBUTTONDBLCLK', 'EVENT_LBUTTONDOWN', 'EVENT_LBUTTONUP', 'EVENT_MBUTTONDBLCLK', 'EVENT_MBUTTONDOWN', 'EVENT_MBUTTONUP', 'EVENT_MOUSEHWHEEL', 'EVENT_MOUSEMOVE', 'EVENT_MOUSEWHEEL', 'EVENT_RBUTTONDBLCLK', 'EVENT_RBUTTONDOWN', 'EVENT_RBUTTONUP']


listening mouse events

example 1: getting the coordinates of where mouse has been clicked

In [44]:
import cv2

def click_event(event, x, y, flags, param): #x,y are coordinates where mouse has been clicked
    if event == cv2.EVENT_LBUTTONDOWN:
        font = cv2.FONT_HERSHEY_SIMPLEX
        strXY = str(x) + ", " + str(y)
        cv2.putText(img,strXY,(x,y),font,0.5,(0,0,0),2)
        cv2.imshow('image',img)
        
    if event == cv2.EVENT_RBUTTONDOWN:
        blue = img[y,x,0]
        green = img[y,x,1]
        red = img[y,x,2]
        font = cv2.FONT_HERSHEY_SIMPLEX
        strBGR = str(blue) + ", " + str(green)+ ", " + str(red)
        cv2.putText(img,strBGR,(x,y),font,0.5,(0,0,255),2)
        cv2.imshow('image',img)

img = cv2.imread('NAO.jpg')
cv2.imshow('image',img)

cv2.setMouseCallback('image',click_event)

cv2.waitKey(0)
cv2.destroyAllWindows()

example 2: drawing points and connecting them using line

In [21]:
import cv2

def click_event(event,x,y,flags,param):
    if event == cv2.EVENT_LBUTTONDOWN:
        cv2.circle(img,(x,y),3,(0,0,255),-1)
        points.append((x,y))
        if len(points) >= 2: # line bnane k liye km se km 2 points hone chahiye
            cv2.line(img,points[-1],points[-2],(255,0,0),5)   #we want to join the last 2 points
        cv2.imshow('image',img)
        
        
img = cv2.imread('NAO.jpg')
cv2.imshow('image',img)
points = []

cv2.setMouseCallback('image',click_event)

cv2.waitKey(0)
cv2.destroyAllWindows()

example 3: read image...click any point on the image...show the color of the point in a second window

In [32]:
import numpy as np
import cv2

def click_event(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        blue = img[x][y][0]  # dono notations to access an element correct h
        green = img[x,y,1]
        red = img[x,y,2]
        
#         x[0,2] = x[0][2] though the second case is more inefficient 
#         as a new temporary array is created after the first index that is subsequently indexed by 2
        
        cv2.circle(img, (x,y), 3, (0,0,255), -1)
        mycolorImage = np.zeros(img.shape,np.uint8)  #datatype np.uint8 hata dene se kaam nhi krta h
        
        mycolorImage[:] = [blue,green,red] # [:]..means fill every point in the image with the [blue,green,red]
        
        cv2.imshow('color',mycolorImage)
        
        
img = cv2.imread('NAO.jpg')
cv2.imshow('image',img)
points = []

cv2.setMouseCallback('image',click_event)

cv2.waitKey(0)
cv2.destroyAllWindows()

IndexError: index 537 is out of bounds for axis 0 with size 480

In [None]:
# doubt....image k lower portions pe click krne se error aata h ...kyun?
# shi color nhi pick krta h kuch points k liye...why?

# cv2.split, cv2.resize, cv2.add, cv2.addWeighted, ROI

In [33]:
import numpy as np
import cv2

img = cv2.imread('NAO.jpg')

print(img.shape)  # a tuple of number of rows,columns, and channels...channels means blue,green,red
print(img.size) # Total number of pixels
print(img.dtype) # Image datatype
b, g, r = cv2.split(img)   #note python k split jaisa nhi h...it's gonna split the image in bgr channels
img = cv2.merge((b,g,r))

cv2.imshow('imshow',img)
cv2.waitKey(0)
cv2.destroyAllWindows()


(639, 480, 3)
920160
uint8


ROI- Region Of Interest

In [45]:
import numpy as np
import cv2

img = cv2.imread('NAO.jpg')

print(img.shape)
print(img.size) 
print(img.dtype)
b, g, r = cv2.split(img)
img = cv2.merge((b,g,r))

item = img[314:413,344:439]
img[66:51,108:86] = item



cv2.imshow('imshow',img)
cv2.waitKey(0)
cv2.destroyAllWindows()


(639, 480, 3)
920160
uint8


ValueError: could not broadcast input array from shape (99,95,3) into shape (0,0,3)

adding  images

inorder to add two images their size need to be same 

In [61]:
import cv2

img = cv2.imread('NAO.jpg')
img = cv2.resize(img,(512,512))

img2 = cv2.imread('hero.jpeg')
img2 = cv2.resize(img2,(512,512))



# res_img = cv2.add(img,img2)
res_img = cv2.addWeighted(img,0.2,img2,0.8,0) #weighted addition....last parameter is the scalar we wanna add

cv2.imshow('image',res_img)
cv2.waitKey(0)
cv2.destroyAllWindows()


# Binding Trackbars to OpenCV

useful when we want to change some values in image dynamically at runtime

In [2]:
import  numpy as np
import cv2


def cmd_trackbar(x):
    pass
    
img = np.zeros((300,512,3),np.uint8)
cv2.namedWindow('image')

cv2.createTrackbar('B','image',0,255,cmd_trackbar) #1st argument: trackbar name, 2nd: window name,3rd: initial value
#(note not min value) at which the trackbar is set,4th:max value of trackbar,
#5th:callback function whenever trackbar value changes
cv2.createTrackbar('G','image',0,255,cmd_trackbar)
cv2.createTrackbar('R','image',0,255,cmd_trackbar)

while(1):
    cv2.imshow('image',img)
    k = cv2.waitKey(1)
    if k == 27:
        break
        
    b = cv2.getTrackbarPos('B','image')
    g = cv2.getTrackbarPos('G','image')
    r = cv2.getTrackbarPos('R','image')
    
    img[:] = [b,g,r]
    
cv2.destroyAllWindows()        

adding a switch to a trackbar

In [None]:
import  numpy as np
import cv2


def cmd_trackbar(x):
    pass
    
img = np.zeros((300,512,3),np.uint8)
cv2.namedWindow('image')

cv2.createTrackbar('B','image',0,255,cmd_trackbar) 
cv2.createTrackbar('G','image',0,255,cmd_trackbar)
cv2.createTrackbar('R','image',0,255,cmd_trackbar)

switch = 'OFF/ON'
cv2.createTrackbar(switch,'image',0,1,cmd_trackbar)

while(1):
    cv2.imshow('image',img)
    k = cv2.waitKey(1)
    if k == 27:
        break
        
    b = cv2.getTrackbarPos('B','image')
    g = cv2.getTrackbarPos('G','image')
    r = cv2.getTrackbarPos('R','image')
    s = cv2.getTrackbarPos(switch,'image')
    
    if s == 0:
        img[:] = 0
    else:
        img[:] = [b,g,r] #when the position of the switch trackbar is not zero only then the changes in other trackbar 
                         #values will be reflected
 
    
    
cv2.destroyAllWindows()        

changing the image to color/grayscale depending on position of trackbar & printing the position of the trackbar on the image 

In [1]:
import cv2


def cmd_trackbar(x):
    pass
    

cv2.namedWindow('image')

cv2.createTrackbar('Position','image',10,400,cmd_trackbar) 


switch = 'color/gray'
cv2.createTrackbar(switch,'image',0,1,cmd_trackbar)

while(1):
    
    img = cv2.imread('NAO.jpg')
    pos = cv2.getTrackbarPos('Position','image')
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(img, str(pos), (50,150), font, 6, (0,0,255), 10)
    
    k = cv2.waitKey(1)
    if k == 27:
        break
        
    s = cv2.getTrackbarPos(switch,'image')

    
    if s == 0:
        pass
    else:
        img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 
        
    cv2.imshow('image',img)                     
 
    
    
cv2.destroyAllWindows()        

# Object Detection and Object Tracking using HSV color space

HSV: Hue,Saturation,Value

HSV se shades mil skta h BGR se nhi milta tha

Hue corressponds to the color components(base pigment), hence just by selecting a range of Hue you can select any color(0-360)

Saturation is the amount of color(depth of the pigment)(dominance of Hue)(0-100%)

Value is basically the brightness of the color(0-100%)

object detection

In [21]:
import numpy as np
import cv2

def nothing(x):
    pass

cv2.namedWindow('Tracking')
cv2.createTrackbar('LH','Tracking',0,255,nothing)
cv2.createTrackbar('LS','Tracking',0,255,nothing)
cv2.createTrackbar('LV','Tracking',0,255,nothing)

cv2.createTrackbar('UH','Tracking',255,255,nothing)
cv2.createTrackbar('US','Tracking',255,255,nothing)
cv2.createTrackbar('UV','Tracking',255,255,nothing)

while True:
    frame = cv2.imread('balls.jpg')
    
    hsv = cv2.cvtColor(frame,cv2.COLOR_BGR2HSV)
    
    l_h = cv2.getTrackbarPos('LH','Tracking')
    l_s = cv2.getTrackbarPos('LS','Tracking')
    l_v = cv2.getTrackbarPos('LV','Tracking') #lower  #38.5° 76.55% 88.63%  for yellow 
                                              #...online image color picker
                                              #        u_h     l_s    l_v
    u_h = cv2.getTrackbarPos('UH','Tracking')
    u_s = cv2.getTrackbarPos('US','Tracking')
    u_v = cv2.getTrackbarPos('UV','Tracking')#upper
    
    
    l_b = np.array([l_h, l_s, l_v])
    u_b = np.array([u_h, u_s, u_v])
    
    mask = cv2.inRange(hsv,l_b,u_b)
    
    res = cv2.bitwise_and(frame, frame, mask = mask)
    
    cv2.imshow('frame',frame)
    cv2.imshow('mask',mask)
    cv2.imshow('res',res)
    
    key = cv2.waitKey(1)
    
    if key == 27:
        break
        
cv2.destroyAllWindows()        

object tracking is object detection in video

In [17]:
import cv2
import numpy as np

def nothing(x):
    pass

cap = cv2.VideoCapture(0);

cv2.namedWindow("Tracking")
cv2.createTrackbar("LH", "Tracking", 0, 255, nothing)
cv2.createTrackbar("LS", "Tracking", 0, 255, nothing)
cv2.createTrackbar("LV", "Tracking", 0, 255, nothing)
cv2.createTrackbar("UH", "Tracking", 255, 255, nothing)
cv2.createTrackbar("US", "Tracking", 255, 255, nothing)
cv2.createTrackbar("UV", "Tracking", 255, 255, nothing)

while True:
    
    _, frame = cap.read()

    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    l_h = cv2.getTrackbarPos("LH", "Tracking")
    l_s = cv2.getTrackbarPos("LS", "Tracking")
    l_v = cv2.getTrackbarPos("LV", "Tracking")

    u_h = cv2.getTrackbarPos("UH", "Tracking")
    u_s = cv2.getTrackbarPos("US", "Tracking")
    u_v = cv2.getTrackbarPos("UV", "Tracking")

    l_b = np.array([l_h, l_s, l_v])
    u_b = np.array([u_h, u_s, u_v])

    mask = cv2.inRange(hsv, l_b, u_b)

    res = cv2.bitwise_and(frame, frame, mask=mask)

    cv2.imshow("frame", frame)
    cv2.imshow("mask", mask)
    cv2.imshow("res", res)

    key = cv2.waitKey(1)
    if key == 27:
        break

cap.release()
cv2.destroyAllWindows()

# Simple Image Thresholding

Thresholding is a segmentation technique used for separating an object from its background...each pixel of an image is compared with a predefined threshold value...this comparison forms two groups...pixels having intensity lower than the threshold value and pixels having intensity higher than the threshold value

threshold value is global for all pixels

In [28]:
import cv2
import numpy as np

img = cv2.imread('gradient.png',0)

#ret contains True or False 
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY) #2nd param:threshold, 3rd:max value,
                                                           #4th:threshold type
_,thresh2 = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
_,thresh3 = cv2.threshold(img,127,255,cv2.THRESH_TRUNC)#upto pixel value=127 pixel value will not
    #change...will remain as that in original image but after that it will become constant at 127
_,thresh4 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO)#when pixel value is less than 127..it'll
#be reduced to zero...else it'll remain the same
_,thresh5 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO_INV)
#note thresh4 inverse hokr thresh5 nhi bnaega...meaning me change h
#agr pixel value 127 se bda h to wo zero hojaega nhi to same rhega

cv2.imshow('Image',img)
cv2.imshow('SIT_BINARY',thresh1)
cv2.imshow('SIT_BINARY_INV',thresh2)
cv2.imshow('SIT_TRUNC',thresh3)
cv2.imshow('SIT_TOZERO',thresh4)
cv2.imshow('SIT_TOZERO_INV',thresh5)

cv2.waitKey(0)
cv2.destroyAllWindows()

# Adaptive Thresholding

threshold is not global for all pixels...it is instead calculated for a smaller region

used when the intensity of all regions in the image is not the same throughout

below program shows what is the problem with Simple Image Thresholding 

In [30]:
import cv2
import numpy as np

img = cv2.imread('sudoku.png',0)
_,th1 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

cv2.imshow('Image', img)
cv2.imshow('th1', th1)

cv2.waitKey(0)
cv2.destroyAllWindows()

below program shows how Adaptive thresholding solves the problem

In [34]:
import cv2
import numpy as np

img = cv2.imread('sudoku.png',0)
th2 = cv2.adaptiveThreshold(img,255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,11,2) 
#2nd: max value,3rd:adaptive method,4th:threshold type,5th,6th ;see description of function
th3 = cv2.adaptiveThreshold(img,255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

cv2.imshow('Image', img)
cv2.imshow('MEAN_C', th2)
cv2.imshow('GAUSSIAN_C',th3)

cv2.waitKey(0)
cv2.destroyAllWindows()

# matplotlib with OpenCV

In [15]:
import cv2
import matplotlib.pyplot as plt

img = cv2.imread('NAO.jpg',-1)
cv2.imshow('image',img)

img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)

%matplotlib 
plt.imshow(img) # to get the matplotlib output in a separate window use %matplotlib 
# plt.xticks([]),plt.yticks([]) #hides the markings on x and y axis
plt.axis('off') #this will do the same trick as above

#OpenCV reads image in BGR format but pyplot reads in RGB format

cv2.waitKey(0)
cv2.destroyAllWindows()

Using matplotlib backend: Qt5Agg


displaying multiple images in 1 matplotlib window

In [22]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('gradient.png',0)


ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY) 
_,thresh2 = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
_,thresh3 = cv2.threshold(img,127,255,cv2.THRESH_TRUNC)
_,thresh4 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO)
_,thresh5 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO_INV)


# cv2.imshow('Image',img)
# cv2.imshow('SIT_BINARY',thresh1)
# cv2.imshow('SIT_BINARY_INV',thresh2)
# cv2.imshow('SIT_TRUNC',thresh3)
# cv2.imshow('SIT_TOZERO',thresh4)
# cv2.imshow('SIT_TOZERO',thresh5)

titles = ['Image','SIT_BINARY','SIT_BINARY_INV','SIT_TRUNC','SIT_TOZERO','SIT_TOZERO_INV']
images = [img,thresh1,thresh2,thresh3,thresh4,thresh5]

for i in range(6):
    plt.subplot(2,3,i+1),plt.imshow(images[i],'gray') #1st:no of rows,2nd:no of cols,
                                                  #3rd:index of image
    plt.title(titles[i])

cv2.waitKey(0)
cv2.destroyAllWindows()

# Morphological Transformations

are operations based on image shape

normally performed on binary images

A binary image is one that consists of pixels that can have one of exactly two colors, usually black and white

A Kernel tells us how to change the value of any given pixel by combining it with different amounts of the neighbouring pixels

1)Dilation

In [29]:
import cv2

import matplotlib.pyplot as plt

img = cv2.imread('balls.jpg',cv2.IMREAD_GRAYSCALE)
_, mask = cv2.threshold(img, 220, 255, cv2.THRESH_BINARY_INV)
#mask is applied for images which are not binary


kernel = np.ones((2,2), np.uint8)
dilation = cv2.dilate(mask,kernel,iterations=2) #in the masked image..there are some black dots
# left on the white portion...to remove them we use dilation...ab bhi thoda bacha h
#..isliye iterations
# the bigger the shape of the kernel the better will be the result
#but there is a problem...if any pixel under the kernel is 1 the result will be 1...
#this increases the size of the white portion...which does not reflect the true size
titles = ['image','mask','dilation']
images = [img,mask,dilation]



for i in range(3):
    plt.subplot(1,3,i+1),plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')

2)Erosion

In [30]:
import cv2

import matplotlib.pyplot as plt

img = cv2.imread('balls.jpg',cv2.IMREAD_GRAYSCALE)
_, mask = cv2.threshold(img, 220, 255, cv2.THRESH_BINARY_INV)


kernel = np.ones((1,1), np.uint8)
dilation = cv2.dilate(mask,kernel,iterations=2)
erosion = cv2.erode(mask, kernel, iterations=1)

titles = ['image','mask','dilation','erosion']
images = [img,mask,dilation,erosion]
#a pixel in original image either 1 or 0 will be considered as 1 only if all the pixels 
#under the kernel is 1


for i in range(4):
    plt.subplot(2,2,i+1),plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')

3)Opening, 4)closing, 5)Morphological gradient

In [34]:
import cv2

import matplotlib.pyplot as plt

img = cv2.imread('balls.jpg',cv2.IMREAD_GRAYSCALE)
_, mask = cv2.threshold(img, 220, 255, cv2.THRESH_BINARY_INV)

kernel = np.ones((5,5), np.uint8)
dilation = cv2.dilate(mask,kernel,iterations=2)
erosion = cv2.erode(mask, kernel, iterations=1)
opening = cv2.morphologyEx(mask,cv2.MORPH_OPEN,kernel)
closing = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,kernel)
mg = cv2.morphologyEx(mask,cv2.MORPH_GRADIENT,kernel)


titles = ['image','mask','dilation','erosion','opening','closing','mg']
images = [img,mask,dilation,erosion,opening,closing,mg]
#opening is just another name of erosion followed by dilation
#closing: dilation followed by erosion

for i in range(7):
    plt.subplot(4,2,i+1),plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')

# Smoothing Images

Homogeneous filter,Gaussian filter,Median filter,Bilateral filter

Homogeneous filter: each output filter is the mean of its kernel neighbours...each pixel has the same weight

median filter replaces each pixel's value with the median of the neighbouring pixels. used in case of 'salt and pepper noise'

Gaussian filter uses different weight kernel in both x and y direction..used to remove high frequency noises

In [14]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('NAO.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

kernel = np.ones((5,5), np.float32)/25
dst = cv2.filter2D(img, -1, kernel)

titles = ['image','2D Convolution']
images = [img,dst]

for i in range(2):
    plt.subplot(1,2,i+1), plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')
    

LPF(low pass filters) help in removing noises, blurring the images
HPF help in finding edges in the images

blurring methods in OpenCV

In [19]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('NAO.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


kernel = np.ones((5,5), np.float32)/25
dst = cv2.filter2D(img, -1, kernel)
blur = cv2.blur(img, (5,5)) #simple averaging
gblur = cv2.GaussianBlur(img, (5,5),0)


titles = ['image','2D Convolution','blur()','GaussianBlur()']
images = [img,dst,blur,gblur]

for i in range(4):
    plt.subplot(2,2,i+1), plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')


median blur

In [41]:
import cv2
import matplotlib.pyplot as plt

img1 = cv2.imread('Noise_salt_and_pepper.png')
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
median = cv2.medianBlur(img1, 3 ) #here kernel size must be odd except 1

titles = ['image','medianBlur()']
images = [img1,median]

for i in range(2):
    plt.subplot(1,2,i+1), plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')


bilateral filter

uptill now in the process of smoothening we smoothened the edges too
.Sometimes we want to preserve the edges

In [44]:
import cv2
import matplotlib.pyplot as plt

img1 = cv2.imread('NAO.jpg')
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
blur = cv2.blur(img1, (5,5))
bilateralFilter= cv2.bilateralFilter(img1, 9,75, 75 ) 

titles = ['image','blur()','bilateralFilter()']
images = [img1,blur,bilateralFilter]

for i in range(3):
    plt.subplot(1,3,i+1), plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')


# Image Gradients and Edge Detection

An image gradient is the directional change in the intensity or color in an image

Laplacian Gradient, Sobel( in x direction), Sobel( in y direction)

In [47]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('sudoku.png',cv2.IMREAD_GRAYSCALE)
lap = cv2.Laplacian(img,cv2.CV_64F,ksize=1) # 2nd parameter: 64 bit float datatype
                                            #3rd: kernel size..only odd nos
lap = np.uint8(np.absolute(lap)) #converting to unsigned int

sobelx = cv2.Sobel(img,cv2.CV_64F,1,0)#3rd param: order of derivative x
sobelx = np.uint8(np.absolute(sobelx)) #4th:order of derivative y
sobely = cv2.Sobel(img,cv2.CV_64F,0,1)
sobely = np.uint8(np.absolute(sobely))

sobelCombined = cv2.bitwise_or(sobelx,sobely) #combining the effect of sobelx and sobely

titles = ['image','Laplacian()','SobelX','SobelY','sobelCombined']
images = [img,lap,sobelx,sobely,sobelCombined]

for i in range(5):
    plt.subplot(3,2,i+1), plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')
    

# Canny Edge Detection

detects wide range of edges in images

In [50]:
import cv2
import matplotlib.pyplot as plt

img = cv2.imread('sudoku.png',0)
canny = cv2.Canny(img,100, 200)

sobelx = cv2.Sobel(img,cv2.CV_64F,1,0)
sobelx = np.uint8(np.absolute(sobelx))
sobely = cv2.Sobel(img,cv2.CV_64F,0,1)
sobely = np.uint8(np.absolute(sobely))

sobelCombined = cv2.bitwise_or(sobelx,sobely)

titles = ['image','sobelCombined','Canny Edge Detection']
images = [img,sobelCombined,canny]
for i in range(3):
    plt.subplot(1,3,i+1), plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.axis('off')
    

# Image Pyramids