In [1]:
!pip install opencv-contrib-python

Collecting opencv-contrib-python
  Using cached opencv_contrib_python-4.6.0.66-cp36-abi3-win_amd64.whl (42.5 MB)
Installing collected packages: opencv-contrib-python
Successfully installed opencv-contrib-python-4.6.0.66


In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

### Reading Images

In [2]:
img=cv2.imread('Cat.jpg')

cv2.IMREAD_COLOR- To load a color image neglecting existing transparency (default flag)

cv2.IMREAD_GRAYSCALE- To load a grayscale image

cv2.IMREAD_UNCHANGED- To load an image including an alpha channel

### Displaying Images in Python

In [3]:
cv2.imshow('image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Saving image

In [5]:
# save image  
#status = cv2.imwrite(r'D:\softroniics\Data science\Computer vision\dog.jpeg',img) 
#status = cv2.imwrite('dog2.jpeg',img) 
print("Image written sucess? : ", status) 

Image written sucess? :  True


# What is a pixel?

All images consist of pixels which are the raw building blocks of images. Images are made of pixels in a grid. A 640 x 480 image has 640 columns (the width) and 480 rows (the height). There are 640 * 480 = 307200 pixels in an image with those dimensions.

Each pixel in a grayscale image has a value representing the shade of gray. In OpenCV, there are 256 shades of gray — from 0 to 255. So a grayscale image would have a grayscale value associated with each pixel.

Pixels in a color image have additional information. There are several color spaces. For simplicity let’s only consider the RGB color space.

In OpenCV color images in the RGB (Red, Green, Blue) color space have a 3-tuple associated with each pixel: (B, G, R) .


Each value in the BGR 3-tuple has a range of [0, 255] . How many color possibilities are there for each pixel in an RGB image in OpenCV? That’s easy: 256 * 256 * 256 = 16777216 .

### Vector images

### Converting image to greyscale

In [7]:
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Gray Image", imgGray)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Edge detection

Edge detection is an image processing technique used for finding the boundaries of objects within images. Here we will use a popular edge detection algorithm Canny Edge Detection, developed by John F. Canny. In OpenCV, we have Canny() method to implement this algorithm. Here is the syntax:


edges = cv2.Canny(img, minVal, maxVal, apertureSize, L2gradient)  


In [9]:
imgCanny = cv2.Canny(img, 150, 200)
cv2.imshow("Canny Image", imgCanny)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Croping image

In [11]:
width, height = 250, 350
point1 = np.float32([[111, 219], [287, 188], [154, 482], [352, 440]])
point2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
matrix = cv2.getPerspectiveTransform(point1, point2)
cropped = cv2.warpPerspective(img, matrix, (width, height))

#cv2.imshow("Image", img)
cv2.imshow("Output", cropped)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Access Image properties

In [8]:
print(img.shape)

# Extracting the height and width of an image
h, w = img.shape[:2]
# Displaying the height and width
print("Height = {},  Width = {}".format(h, w))


(1199, 1200, 3)
Height = 1199,  Width = 1200


In [9]:
print( img.size )

4316400


In [10]:
print( img.dtype )

uint8


### Extracting the RGB values of a pixel

In [16]:
# Extracting RGB values. 
# Here we have randomly chosen a pixel
# by passing in 100, 100 for height and width.
(B, G, R) = img[100, 100]
  
# Displaying the pixel values
print("R = {}, G = {}, B = {}".format(R, G, B))
  
# We can also pass the channel to extract 
# the value for a specific channel
B = img[100, 100, 0]
print("B = {}".format(B))
G = img[100, 100,1]
print("G = {}".format(G))
R = img[100, 100, 2]
print("R = {}".format(R))

R = 236, G = 230, B = 214
B = 214
G = 230
R = 236


### Access pixel values and modify them

In [18]:
px = img[100,100]
print( px )

[214 230 236]


In [19]:
# accessing only blue pixel
blue = img[100,100,0]
print( blue )

214


In [20]:
# To modify the values, we just need to access the pixel and then overwrite it with a value

px = [255,255,255]
print(px)

[255, 255, 255]


In [22]:
img[100,100]=px
newpx=img[100,100]
print(newpx)

[255 255 255]


In [24]:
cv2.imshow('image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Extracting the Region of Interest (ROI)

In [None]:
# We will calculate the region of interest 
# by slicing the pixels of the image
roi = img[100 : 500, 200 : 700]

cv2.imshow('image',roi)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Resizing the Image

In [28]:
# resize() function takes 2 parameters,
# the image and the dimensions
resize = cv2.resize(img, (800, 800))

cv2.imshow('image',resize)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [32]:
print(img.shape)

(1199, 1200, 3)


The problem with this approach is that the aspect ratio of the image is not maintained. So we need to do some extra work in order to maintain a proper aspect ratio.

In [33]:
w=1199
h=1200
# Calculating the ratio
ratio = 800 / w

# Creating a tuple containing width and height
dim = (800, int(h * ratio))

# Resizing the image
resize_aspect = cv2.resize(img, dim)

cv2.imshow('image',resize_aspect)
cv2.waitKey(0)
cv2.destroyAllWindows()


### Rotating the Image

In [34]:
# Calculating the center of the image
center = (w // 2, h // 2)

# Generating a rotation matrix
matrix = cv2.getRotationMatrix2D(center, -45, 1.0)

# Performing the affine transformation
rotated = cv2.warpAffine(img, matrix, (w, h))

cv2.imshow('image',rotated)
cv2.waitKey(0)
cv2.destroyAllWindows()


### Drawing a Rectangle

It takes in 5 arguments –

. Image

. Top-left corner co-ordinates

. Bottom-right corner co-ordinates

. Color (in BGR format)

. Line width

In [35]:
# We are copying the original image,
# as it is an in-place operation.
output = img.copy()

# Using the rectangle() function to create a rectangle.
rectangle = cv2.rectangle(output, (1000, 850),(200, 200), (255, 0, 0), 2)

cv2.imshow('image',rectangle)
cv2.waitKey(0)
cv2.destroyAllWindows()


### Drawing circle

cv2.circle(image, center_coordinates, radius, color, thickness)


In [20]:
output = img.copy()

circle = cv2.circle(output,(600,500), 400, (255,0,0), 2)  
cv2.imshow('image',circle)  
cv2.waitKey(0)  
cv2.destroyAllWindows()

### Drawing Polylines

cv2.polyLine(image, arr, is_closed, color, thickness) 

In [21]:
output = img.copy()


#defining points for polylines  
pts = np.array([[100,50],[200,300],[700,200],[500,100]], np.int32)  
# pts = pts.reshape((-1,1,2))  
poly = cv2.polylines(output, [pts], True, (0,255,255), 3)  
cv2.imshow('image',poly)  
cv2.waitKey(0)  
cv2.destroyAllWindows()

### Displaying text

It is also an in-place operation.It takes in 7 arguments –

    .Image
    .Text to be displayed
    .Bottom-left corner co-ordinates, from where the text should start
    .Font
    .Font size
    .Color (BGR format)
    .Line width

In [22]:
# Copying the original image
output = img.copy()

# Adding the text using putText() function
text = cv2.putText(output, 'OpenCV Demo', (100, 550), cv2.FONT_HERSHEY_SIMPLEX, 4, (255, 0, 0), 2)

cv2.imshow('image',output)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Thresholding

Image thresholding is an important intermediary step for image processing pipelines. Thresholding can help us to remove lighter or darker regions and contours of images.

In [1]:
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# threshold the image by setting all pixel values less than 225
# to 255 (white; foreground) and all pixel values >= 225 to 255
# (black; background), thereby segmenting the image
thresh = cv2.threshold(imgGray, 225, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imshow("Thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

NameError: name 'cv2' is not defined

### PLAYING VIDEO USING VIDEOCAPTURE() FUNCTION

In [27]:
# importing libraries
import cv2
import numpy as np

# Create a VideoCapture object and read from input file
cap = cv2.VideoCapture('bunny.mp4')

# Check if camera opened successfully
if (cap.isOpened()== False):
	print("Error opening video file")

# Read until video is completed
while(cap.isOpened()):
	
# Capture frame-by-frame
	ret, frame = cap.read()
	if ret == True:
	# Display the resulting frame
		cv2.imshow('Frame', frame)
		
	# Press Q on keyboard to exit
		if cv2.waitKey(25) & 0xFF == ord('q'):
			break

# Break the loop
	else:
		break
# When everything done, release
# the video capture object
cap.release()

# Closes all the frames
cv2.destroyAllWindows()


## Capture Video from Camera

In [12]:
import cv2
import numpy as np
 
# Create a VideoCapture object
cap = cv2.VideoCapture(0)
 
# Check if camera opened successfully
if (cap.isOpened() == False): 
  print("Unable to read camera feed")
 
while(True):
  ret, frame = cap.read() 
  if ret == True: 
    # Display the resulting frame    
    cv2.imshow('frame',frame) 
    # Press Q on keyboard to stop recording
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break 
  # Break the loop
  else:
    break 
    
#When everything done, release the video capture and video write objects
cap.release()

# Closes all the frames
cv2.destroyAllWindows()

In [None]:
import numpy as np
import cv2

cap = cv2.VideoCapture(0)

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Our operations on the frame come here
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Display the resulting frame
    cv2.imshow('frame',gray)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## Saving a Video

The cv2.imwrite() function is used to save the video into the file. First, we need to create a VideoWriter object. Then we should specify the FourCC code and the number of frames per second (fps). The frame size should be passed within the function.

FourCC is a 4-byte code used to identify the video codec. The example is given below for saving the video.

In [14]:
import numpy as np  
import cv2  
  
cap = cv2.VideoCapture(0)    
# Define the codec and create VideoWriter object  
fourcc = cv2.VideoWriter_fourcc(*'XVID')  
out = cv2.VideoWriter('output.avi',fourcc, 20.0, (640,480))  
  
while(cap.isOpened()):  
    ret, frame = cap.read()  
    if ret==True:  
        # write the flipped frame  
        out.write(frame)  
  
        cv2.imshow('frame',frame)  
        if cv2.waitKey(1) & 0xFF == ord('q'):  
            break  
    else:  
        break  

# Release everything if job is finished  
cap.release()  
out.release()  
cv2.destroyAllWindows()  

In [2]:
import cv2
import numpy as np
 
# Create a VideoCapture object
cap = cv2.VideoCapture(0)
 
# Check if camera opened successfully
if (cap.isOpened() == False): 
  print("Unable to read camera feed")
 
# Default resolutions of the frame are obtained.The default resolutions are system dependent.
# We convert the resolutions from float to integer.
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
 
# Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))
 
while(True):
  ret, frame = cap.read()
 
  if ret == True: 
     
    # Write the frame into the file 'output.avi'
    #out.write(frame)
 
    # Display the resulting frame    
    cv2.imshow('frame',frame)
 
    # Press Q on keyboard to stop recording
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break
 
  # Break the loop
  else:
    break 
 
# When everything done, release the video capture and video write objects
cap.release()
out.release()
 
# Closes all the frames
cv2.destroyAllWindows()