4. Devise a method to locate and identify cards in selected frames from video-001 and
video-002. You may want to use one of the matchers you have developed in steps 2 or 3.
Validate and demonstrate the performance of your method.

CODE HAS BEEN TESTED ON GOOGLE COLAB ON PYTHON VERSION 3.7.12

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Importing necessary libraries**

In [None]:
import matplotlib.pyplot as plt                                                   #for plotting images 
import numpy as np                                                                #library for mathematical operations            
from PIL import Image                                                             
import glob                                                                       #for reading files
import os                                                                         #for performing os operations
                    
import matplotlib.image as mpimg                                                  
import cv2                                                                        #opencv

from google.colab.patches import cv2_imshow

In [None]:
data_dir='drive/My Drive/Image_Analysis/DATA/'                                    #data directory path
results_dir='drive/My Drive/Image_Analysis/RESULTS_3/'                              #results directory path

In [None]:
print('Using OpenCV version ', cv2.__version__)

In [None]:
def read_frames(filename):                                                          #function to read frames from video
    video_cap = cv2.VideoCapture(filename)                                             

    count = 0
    frames = []
    while True:
        success, frame = video_cap.read()

        if success:
            count += 1
            
                                                                                    
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)                          # convert from BGR to RGB format

            frames.append(frame)
        else:
                                                                                  
            break;


    print('Read ', count, ' frames in total')
    video_cap.release()
    
    return frames

def write_frames(frames, filename, codec='MP4V', grey='False', fps=24):               #function to write frames
    
    print('Writing frames to ', filename)
    fourcc = cv2.VideoWriter_fourcc(*codec) # 'MP4V' or 'H264' or 'VP08'/'VP09'
    writer = cv2.VideoWriter(filename, fourcc, fps, (frames[0].shape[1],frames[0].shape[0]), True)

    
    for i in range(0,len(frames)):

        
        #if (not grey):
            #frame = cv2.cvtColor(frames[i], cv2.COLOR_RGB2BGR)

        #else:
         #   print('gray')
          #  frame = cv2.cvtColor(frames[i], cv2.COLOR_GRAY2RGB)
        frame= frames[i]
        print('writing')
        writer.write(frame)

    
    writer.release()

**To display video in Notebook**

In [None]:
import io
import base64
from IPython.display import HTML, display

def show_video(filename):
    
    video = io.open(filename, 'rb').read()
    encoded = base64.b64encode(video)
    display(HTML(data='''<video width="500" height="240" alt="test" controls><source src="data:video/mp4;base64,{0}" type="video/mp4" /> </video>'''.format(encoded.decode('ascii'))))

**Video-001**

In [None]:

show_video(data_dir + 'video-001.MOV')

In [None]:
# get properties of video

video = cv2.VideoCapture(data_dir + 'video-001.MOV')

fps = video.get(cv2.CAP_PROP_FPS)
width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
codec = int(video.get(cv2.CAP_PROP_FOURCC))

video.release()

print('Frames per second (FPS) ', fps)
print('Width and height ', width, height)
print('Frames count ', frame_count)
print('Codec', codec.to_bytes(4, 'little'))

**Read frames from video**

In [None]:
#read frames from video

frames = read_frames(data_dir + 'video-001.MOV')

print(frames[14].shape)                                                            # shape of random frame

plt.imshow(frames[14])
plt.show()

**Implementing localization and identification of card images** : Before implementing localization, some pre-processing has to be done on frames. Each frame is converted to white background with the same approach as used in task-3 for test card images separation. After getting white background, thresholding and binary conversion of image is done and then bounding boxes are found and localized as done in task-1.
For identification, each bounding box is sliced into card image and prediction model obtained from task-3 is run on that image. It returns ID which is displayed in the output-video-001.mov saved in results_dir.

In [None]:
from keras.models import load_model

model = load_model(results_dir+'model_predict_ID.h5')                               #loading prediction model to find ID of card

In [None]:
import skimage.exposure
from google.colab.patches import cv2_imshow
plt.figure(figsize=(10,10))

resolution=(256,256)

frames_localized=[]
for frame in frames:
  img_br = frame
                                                                                      # convert to hsv
  hsv = cv2.cvtColor(img_br,cv2.COLOR_BGR2HSV)
                                                                                     
  range1 = (20,80,80)
  range2 = (90,255,255)
  mask = cv2.inRange(hsv,range1,range2)                                                # threshold using inRange
  mask = 255 - mask                                                                                
  mask = cv2.GaussianBlur(mask, (0,0), sigmaX=3, sigmaY=3, borderType = cv2.BORDER_DEFAULT)  # antialias mask
  mask = skimage.exposure.rescale_intensity(mask, in_range=(127.5,255), out_range=(0,255))
  result = img_br.copy()
  result[mask==0] = (255,255,255)

  gray= cv2.cvtColor(result, cv2.COLOR_BGR2GRAY) 
  thresh= 245
  blurred= cv2.GaussianBlur(gray,(5,5),cv2.BORDER_DEFAULT)                            #gaussian blurring 
  fm = cv2.threshold(blurred, thresh, 255, cv2.THRESH_BINARY_INV)[1]                  #binary thresholding
  fm = cv2.convertScaleAbs(fm)
  
  cnt_mod,_ = cv2.findContours(fm, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)        #find contours
  contours=[]

  for c in cnt_mod:
    if cv2.contourArea(c)>20000:                                                       #if contour area greater than 20000, then keep
      contours.append(c)

  contours_poly = [None]*len(contours)
  boundRect = [None]*len(contours)
  
  identity=[]

  for i, c in enumerate(contours):
    contours_poly[i] = cv2.approxPolyDP(c, 3, True)   
    boundRect[i] = cv2.boundingRect(contours_poly[i])                                 #create bounding rectangles
    x,y,w,h = boundRect[i]                                                            #finding bounding rectangle 
    roi = frame[y:y+h,x:x+w]                                                          #slicing image using x,y, height, width
    roi= cv2.resize(roi,resolution)                                                   
    x_test= np.array(roi).astype('float')/255.0                                       #normalized image
    x_test = np.reshape(x_test, (1,256,256,3))                                        #reshaped
    y_pred_test= model.predict(x_test)                                                #prediction run on card image
    y_pred_test = np.argmax(y_pred_test,axis = 1)                                     
    identity.append(y_pred_test)                                                      #predicted ID appended to identity 
  
  result= frame.copy()
  
  #drawing bounding boxes and putting ID for cards
  for i in range(len(contours)):                                                                                
    color = (255, 0, 0)
    cv2.drawContours(result, contours_poly, i, color)                                            
    cv2.rectangle(result, (int(boundRect[i][0]), int(boundRect[i][1])),(int(boundRect[i][0]+boundRect[i][2]), int(boundRect[i][1]+boundRect[i][3])), color, 2)
    cv2.putText(result, str(identity[i]), (int(boundRect[i][0]), int(boundRect[i][1])- 10), fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1.0, color = (125, 246, 55),thickness = 3)

  frames_localized.append(result)                                                     #frames appended


In [None]:
len(frames_localized)

In [None]:
plt.imshow(frames_localized[14])

**output_video_001.mov saved to results_dir**

In [None]:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(results_dir+'output_video_001.avi', fourcc, 30, (1080,1920))

for frames in frames_localized:
  f=cv2.cvtColor(frames, cv2.COLOR_BGR2RGB)
  out.write(f)

out.release()

**Video-002**

Displaying video in notebook was leading to sometimes session crash, so I have not displayed the video-002 here.

In [None]:
#show_video(data_dir + 'video-002.MOV')

In [None]:
# get properties of video

video = cv2.VideoCapture(data_dir + 'video-002.MOV')

fps = video.get(cv2.CAP_PROP_FPS)
width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
codec = int(video.get(cv2.CAP_PROP_FOURCC))

video.release()

print('Frames per second (FPS) ', fps)
print('Width and height ', width, height)
print('Frames count ', frame_count)
print('Codec', codec.to_bytes(4, 'little'))

In [None]:
# read frames
frames = read_frames(data_dir + 'video-002.MOV')

print(frames[100].shape) # shape of frame

plt.imshow(frames[200])
plt.show()

Note: Below code can take up to 7-10mins to run as the number of frames are more for video-002. 

In [None]:
import skimage.exposure
from google.colab.patches import cv2_imshow
plt.figure(figsize=(10,10))

resolution=(256,256)

frames_localized=[]
for frame in frames:
  img_br = frame
  # convert to hsv
  hsv = cv2.cvtColor(img_br,cv2.COLOR_BGR2HSV)
  # threshold using inRange
  range1 = (20,80,80)
  range2 = (90,255,255)
  mask = cv2.inRange(hsv,range1,range2)
  mask = 255 - mask
  # antialias mask
  mask = cv2.GaussianBlur(mask, (0,0), sigmaX=3, sigmaY=3, borderType = cv2.BORDER_DEFAULT)
  mask = skimage.exposure.rescale_intensity(mask, in_range=(127.5,255), out_range=(0,255))
  result = img_br.copy()
  result[mask==0] = (255,255,255)
  gray= cv2.cvtColor(result, cv2.COLOR_BGR2GRAY) 
  thresh= 245
  blurred= cv2.GaussianBlur(gray,(5,5),cv2.BORDER_DEFAULT)                            #gaussian blurring 
  fm = cv2.threshold(blurred, thresh, 255, cv2.THRESH_BINARY_INV)[1]                  #binary thresholding
  fm = cv2.convertScaleAbs(fm)
  cnt_mod,_ = cv2.findContours(fm, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)        #find contours
  contours=[]

  for c in cnt_mod:
    if cv2.contourArea(c)>20000:                                                       #if contour area greater than 20000, then keep
      contours.append(c)

  contours_poly = [None]*len(contours)
  boundRect = [None]*len(contours)
  
  identity=[]

  for i, c in enumerate(contours):
    contours_poly[i] = cv2.approxPolyDP(c, 3, True)   
    boundRect[i] = cv2.boundingRect(contours_poly[i])                                 #create bounding rectangles
    x,y,w,h = boundRect[i]                                                            #finding bounding rectangle 
    roi = frame[y:y+h,x:x+w]                                                          #slicing image using x,y, height, width
    roi= cv2.resize(roi,resolution)
    x_test= np.array(roi).astype('float')/255.0
    x_test = np.reshape(x_test, (1,256,256,3))
    y_pred_test= model.predict(x_test)
    y_pred_test = np.argmax(y_pred_test,axis = 1)
    identity.append(y_pred_test)
  
  drawing = np.zeros((fm.shape[0], fm.shape[1], 3), dtype=np.uint8)
  result= frame.copy()
        
  for i in range(len(contours)):                                                                                
    color = (255, 0, 0)
    cv2.drawContours(result, contours_poly, i, color)                                            
    cv2.rectangle(result, (int(boundRect[i][0]), int(boundRect[i][1])),(int(boundRect[i][0]+boundRect[i][2]), int(boundRect[i][1]+boundRect[i][3])), color, 2)
    cv2.putText(result, str(identity[i]), (int(boundRect[i][0]), int(boundRect[i][1])- 10), fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1.0, color = (125, 246, 55),thickness = 3)

  frames_localized.append(result)


In [None]:
len(frames_localized)

In [None]:
plt.imshow(frames_localized[200])

**Writing output_video_002 to results_dir**

In [None]:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(results_dir+'output_video_002.avi', fourcc, 30, (1080,1920))

for frames in frames_localized:
  f=cv2.cvtColor(frames, cv2.COLOR_BGR2RGB)
  out.write(f)

out.release()