In [12]:
import os

'''
CS585_Lab3
CS585 Image and Video Computing
Lab 3
--------------
This program introduces the following concepts:
	a) Reading a stream of images from a webcamera, and displaying the video
	b) Skin color detection
	c) Background differencing
	d) Visualizing motion history
--------------
'''

'\nCS585_Lab3\nCS585 Image and Video Computing\nLab 3\n--------------\nThis program introduces the following concepts:\n\ta) Reading a stream of images from a webcamera, and displaying the video\n\tb) Skin color detection\n\tc) Background differencing\n\td) Visualizing motion history\n--------------\n'

In [13]:
# PART 1: object detection by skin color

# step 1,
# play a video
# Because I am using Google Colab, so I am not able to play video
# with the OpenCV window. Therefore I have to download the video and play it.

# In your case, if you are running OpenCV with a local desktop and
# set up the OpenCV correctly, you should consider the following ways
# to display video or anything captured by your camera.

# https://docs.opencv.org/3.4/dd/d43/tutorial_py_video_display.html

In [14]:
# VIDEO_NAME = "thumbUp"
VIDEO_NAME = "fingers"
# VIDEO_NAME = "cat"
FVideo = f'videos/{VIDEO_NAME}.mp4'
WORKDIR = f"videos/{VIDEO_NAME}/"
os.makedirs(WORKDIR, exist_ok=True)
FFirst = WORKDIR + "first.png"  # file of first image

In [15]:
SHOW = False
CROP = False

THRESHOLD = 145

In [16]:
import cv2

# step 2:
# To detect the cat in the video, it's not convenient to work with multiple frames
# in the beginning.
# Thus we should extract one or the first few frames in the video, and work on them first.

vidcap = cv2.VideoCapture(FVideo)
success,image = vidcap.read()
count = 0

cv2.imwrite(FFirst, image)     # save the frame as JPEG file

True

In [17]:
# I reuse some of the functions implemented in the last lab to save some time
# A more "organized" way to do it is to import it from our lab1.py

def morph_image(image,morph, kernel_size):
  def erosion(image_cv, kernel_size = 4):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size,kernel_size))
    image_cv_eroded = cv2.erode(image_cv, kernel)
    return image_cv_eroded
  def dilate(image_cv, kernel_size = 4):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size,kernel_size))
    image_cv_dilated = cv2.dilate(image_cv, kernel)
    return image_cv_dilated
  if morph == 'dilate':
    image_morphed = dilate(image,kernel_size = kernel_size)
  elif morph == 'erode':
    image_morphed = erosion(image,kernel_size = kernel_size)
  else:
    raise NotImplementedError
  return image_morphed

def get_contours(cv_image_thres):
  contours, hierarchy = cv2.findContours(cv_image_thres, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
  return contours
def draw_contours(cv_image, contours,fill='line'):
  if fill=='solid':
    cv_image_out = cv2.drawContours(cv_image, contours, -1,(0,255,0),cv2.FILLED)
  else:
    cv_image_out = cv2.drawContours(cv_image, contours, -1,(0,255,0),1)
  return cv_image_out

def get_largest_contour(contours):
  max_area = 0
  max_i = -1
  for i,contour in enumerate(contours):
    area = cv2.contourArea(contour)
    if area > max_area:
      max_i = i
      max_area = area
  return [contours[max_i]]

In [18]:
# from google.colab.patches import cv2_imshow

# One of the things to pay attention to in computer vision is
# always the size of images you are dealing with, the bigger it is,
# the more time it takes any algorithm to process it,
# so it is always preferable to resize the image to a smaller one first.
# the cost is the lost of "details" in a high resolution image,
# but during most of the cases, this is trivial.

def load_image(image_path):
  image = cv2.imread(image_path)
  return image


def resize_and_crop_image(image):
  rows = image.shape[0]
  cols = image.shape[1]

  # # resize the image size to be 1/10 of the original one
  # cat_image_resized = cv2.resize(image,(cols//10,rows//10))
  # # we can further crop the image to focus on the cat
  # crop_img = cat_image_resized[:cols//10, :cols//10]

  # resize
  ratio = max(max(rows, cols) // 640, 1)
  # print(f"resize ratio = {ratio}.")
  image_resized = cv2.resize(image,(cols//ratio,rows//ratio))
  # print(image_resized.shape)
  if CROP:
    size = min(cols, rows) // ratio
    crop_img = image_resized[:size, :size]
  else:
    crop_img = image_resized
  # print(crop_img.shape)

  return crop_img


# let's test with our first frame of the video
cat_image = load_image(FFirst) # the image size (4096 * 2160, 3)is too large, which is not good for efficiency
print("first image.shape", cat_image.shape)

cat_image_cropped = resize_and_crop_image(cat_image)


if SHOW:
  # check if the image is small enough
  # cv2_imshow(cat_image_cropped)
  cv2.imshow("cropped image", cat_image_cropped)
  cv2.waitKey(0)


first image.shape (568, 320, 3)


In [19]:
# step 3,
# skin detection, since the cat is white, the most straight-forward way is to turn it into binary image with
# a suitable threshold, and after that, find contours
# (and maybe just keep the largest one for better precision).
# to save our time, simply reuse the functions we learned from the last lab section

cat_image = load_image(FFirst) # the image size (4096 * 2160, 3)is too large, which is not good for efficiency
cat_image_cropped = resize_and_crop_image(cat_image)

crop_img_gray = cv2.cvtColor(cat_image_cropped,cv2.COLOR_BGR2GRAY)
_,crop_img_thres = cv2.threshold(crop_img_gray, THRESHOLD, 255, cv2.THRESH_BINARY) 

if SHOW:
  # cv2_imshow(crop_img_thres)
  cv2.imshow("cropped image threshold", cat_image_cropped)
  cv2.waitKey(0)

crop_img_thres = morph_image(crop_img_thres,morph='erode',kernel_size=6)
crop_img_thres = morph_image(crop_img_thres,morph='dilate',kernel_size=6)

# Here if you want the result to be more precise or you need
# the boundary info of the "cat" object,
# you can also find the largest contour like we did
# in our last lab:

# contours = get_contours(crop_img_thres)
# largest_contour = get_largest_contour(contours)
# draw_contours(...), so on and so forth

if SHOW:
  # cv2_imshow(crop_img_gray)
  cv2.imshow("cropped image gray", cat_image_cropped,)
  cv2.waitKey(0)
  
  # cv2_imshow(crop_img_thres)
  cv2.imshow("cropped image threshold", cat_image_cropped)
  cv2.waitKey(0)


In [20]:
# step 4,
# Detect the cat in every frame and output the processed video
# for better readability, let's wrap everything we did before into some functions
# first:

def preprocess_image(captured_image):
  return resize_and_crop_image(captured_image)
  
# def preprocess_image(captured_image):
#   rows = captured_image.shape[0]
#   cols = captured_image.shape[1]
#   cat_image_resized = cv2.resize(captured_image,(cols//10,rows//10))
#   # we can further crop the image to focus on the cat
#   crop_img = cat_image_resized[:cols//10, :cols//10]
#   return crop_img


def draw_detected_cat(crop_img):
  crop_img_gray = cv2.cvtColor(crop_img,cv2.COLOR_BGR2GRAY)
  _,crop_img_thres = cv2.threshold(crop_img_gray, THRESHOLD, 255, cv2.THRESH_BINARY) # 180 and 255 are hyper-parameters
  crop_img_thres = morph_image(crop_img_thres,morph='erode',kernel_size=6)
  crop_img_thres = morph_image(crop_img_thres,morph='dilate',kernel_size=6)
  contours = get_contours(crop_img_thres)
  crop_img = draw_contours(crop_img,contours,fill='solid')
  return crop_img

# output the video with our processed frame
vidcap = cv2.VideoCapture(FVideo)
success, image = vidcap.read()
image_process = preprocess_image(image)
assert success
w, h = image_process.shape[:2]
vidwrite = cv2.VideoWriter(WORKDIR + "detected.mp4", cv2.VideoWriter_fourcc(*'MP4V'), 30, (h, w))

while success:
  # run with the functions we tested before, and write the process frame into the video file
  image_process = preprocess_image(image)
  image_detect = draw_detected_cat(image_process)

  print(image_process.shape)
  print(image_detect.shape)
  print(image.shape)
  
  cv2.imshow("detected", image_detect)
  cv2.waitKey(0)
  print(image_detect.shape)
  break
  
  vidwrite.write(image_detect) # write frame into video
  success,image = vidcap.read() # read frame from video
vidwrite.release()

(568, 320, 3)
(568, 320, 3)
(568, 320, 3)
(568, 320, 3)


In [21]:
# PART 2
# Motion history
# Here we compare the intensity difference between neighboring frames,
# And then acculumate the differences and draw then in a "white" canvas to visualize
# the track of motion, in this case, mostly the motion of cat.

# It is the same idea, but this time you need additional arrays/images to achieve the goal
import numpy as np

# We did thresholding here because we want to get rid of the difference caused by the moving camera
def thresholding(image):
  crop_img_gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
  _,crop_img_thres = cv2.threshold(crop_img_gray, THRESHOLD, 255, cv2.THRESH_BINARY) # 180 and 255 are hyper-parameters
  crop_img_thres = morph_image(crop_img_thres,morph='erode',kernel_size=6)
  crop_img_thres = morph_image(crop_img_thres,morph='dilate',kernel_size=20)
  return crop_img_thres

# use cv2.absdiff or some numpy functions, it is up to you
# but cv2.absdiff seems to be more convenient.
def frame_differencing(image_last,image_cur):
  image_diff = cv2.absdiff(image_last, image_cur)
  image_diff_mask = (image_diff > 0) # probably don't need this but just in case
  return image_diff_mask

# build a "white" canvas (its all black)
def init_canvas(size,mode='color'):
  cols,rows = size
  if mode == 'color':
    canvas = np.zeros((rows,cols,3)).astype(np.uint8)
  elif mode == 'grayscale':
    canvas = np.zeros((rows,cols))
  elif mode == 'binary':
    canvas = np.zeros((rows,cols)).astype(np.uint8)
  else:
    raise NotImplementedError
  return canvas

# same as PART 1
vidcap = cv2.VideoCapture(FVideo)
success, image = vidcap.read()
assert success
w, h = image_process.shape[:2]
vidwrite = cv2.VideoWriter(WORKDIR + "motion.mp4", cv2.VideoWriter_fourcc(*'MP4V'), 30, (h, w))

# keep track of the intensity of pixels of the last frame
image_last_process = preprocess_image(image)
image_last_thres = thresholding(image_process)

# init the canvas
canvas = init_canvas((image_last_thres.shape[1],image_last_thres.shape[0]),mode='color')

# same thing, load the frames of the video one by one
while success:

  # get the processed current frame
  image_cur_process = preprocess_image(image)
  image_cur_thres = thresholding(image_cur_process)

  # compare difference
  image_diff = frame_differencing(image_last_thres,image_cur_thres)
  # change the format of the differencing result to apply to rgb channels
  image_diff_mask = np.repeat(image_diff[:, :, np.newaxis], 3, axis=2)

  # accumulate the difference in the canvas, but mind the data type
  canvas = np.logical_or(canvas.astype(bool),image_diff_mask).astype(np.uint8)*255
  image_last_thres = image_cur_thres

  vidwrite.write(canvas) # write frame into video
  success,image = vidcap.read() # read frame from video

vidwrite.release()

In [22]:
# Further things to do:

# OpenCV provides an access to capture camera input and return the image of each frame,
# You should have a try if that interest you:
# https://docs.opencv.org/3.4/dd/d43/tutorial_py_video_display.html


# This might be very helpful if you are developing mobile apps with OpenCV, say in Android OS:
# https://opencv.org/android/

# We demonstrate the video processing with a very "easy" example which is a white cat, things
# can be more interesting and complicated when trying to detect human, i.e., by their skin.
# Here's an article provides a very straight-forward way to determine the skin pixel by
# a range of colors
# Vezhnevets, Vladimir, Vassili Sazonov, and Alla Andreeva. "A survey on pixel-based skin color detection techniques." Proc. Graphicon. Vol. 3. 2003.
# Kakumanu, Praveen, Sokratis Makrogiannis, and Nikolaos Bourbakis.
# "A survey of skin-color modeling and detection methods." Pattern recognition 40.3 (2007): 1106-1122.
# https://www.graphicon.ru/html/2003/Proceedings/Technical/paper509.pdf
# https://dl.acm.org/doi/abs/10.1016/j.patcog.2006.06.010

# You can try to see if the algorithm detects your face.