In [1]:
# label videos

In [2]:
import numpy as np
import cv2
import os
import uuid
import math
from IPython.core.debugger import Tracer

In [3]:
def draw_text(img, select_frame, internal_idx, frame_idx, frame_count, is_pause=False):
    height = img.shape[0]
    width = img.shape[1]
    text = ["none", "normal", "paging", "shake", "else"]
    speed_text = ["1X", "0.5X", "0.25X"]
    font = cv2.FONT_HERSHEY_SIMPLEX
    img = img.copy()
    cv2.putText(img, str(frame_idx) + '/' + str(frame_count), (10, height - 20), font, 1, (0,0,255), 2)
    if frame_idx in select_frame:
        i = select_frame.index(frame_idx) + 1
        cv2.putText(img, "Page " + str(i), (width - 400, 30), font, 1, (0, 0, 255), 2)
    cv2.putText(img, speed_text[internal_idx], (10, 30), font, 1, (0, 0, 255), 2)
    if is_pause == True:
        blurry = cv2.Laplacian(img, cv2.CV_64F).var()
#         Tracer()()
        cv2.putText(img, "Blurry: {:.2f}".format(blurry), (10, 80),
            font, 1, (0, 0, 255), 2)
    return img

def handle_video(video_path, save_width, save_height):

    cap = cv2.VideoCapture(video_path)

    frame_count = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT))
    label_result = [-1] * frame_count
    fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
    if math.isnan(fps):
        fps = 24
    internal = 1000 / fps
    internal_ary = [int(internal), int(internal * 2), int(internal * 4)]
    internal_idx = 0
    frame_idx = 0
    # number of frames go forward or backward when pressing 'l' or 'h'
    frame_num = 200

    select_frame = []
    while cap.isOpened():
        frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
        ret, frame = cap.read()
        
        # reach the end of the video
        if ret != True:
            confirm = False
            while True:
                k = cv2.waitKey()
                # when pressing 'q', return without saving data
                if k & 0xFF == ord('q'):
                    cap.release()
                    cv2.destroyAllWindows()
                    return False
                # when pressing 'o', confirm to save data
                if k & 0xFF == ord('o'):
                    cap.release()
                    cv2.destroyAllWindows()
                    confirm = True
                    break
                # when pressing 'c', play the video again
                if k & 0xFF == ord('c'):
                    frame_idx = 0
                    cap.set(1, frame_idx)
                    break
            if confirm == True:
                # user presses 'o' to confirm
                break
            else:
                # user presses 'c' to repeat
                continue
        
        frame = cv2.resize(frame, (save_height / 2, save_width / 2), interpolation = cv2.INTER_AREA)
            
        draw_img = draw_text(frame, select_frame, internal_idx, frame_idx, frame_count)
        cv2.imshow('frame',draw_img)
            
        k = cv2.waitKey(internal_ary[internal_idx])
        # when pressing 'q', return without saving data
        if k & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            return False
        # when pressing 'o', save data
        if k & 0xFF == ord('o'):
            cap.release()
            cv2.destroyAllWindows()
            break
        # when pressing 't', pause and wait next input
        if k & 0xFF == ord('t'):
            r = cv2.waitKey()
            # when pressing 't' again, continue to play
            while r & 0xFF != ord('t'):
                # when pressing 'n', go next frame
                if r & 0xFF == ord('n'):
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    ret, frame = cap.read()
                    frame = cv2.resize(frame, (save_height / 2, save_width / 2), interpolation = cv2.INTER_AREA)
                    draw_img = draw_text(frame, select_frame, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing 'p', go previous frame
                if r & 0xFF == ord('p'):
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    frame_idx = np.max([0, frame_idx - 2])
                    cap.set(1, frame_idx)
                    ret, frame = cap.read()
                    frame = cv2.resize(frame, (save_height / 2, save_width / 2), interpolation = cv2.INTER_AREA)
                    draw_img = draw_text(frame, select_frame, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing 'h', go backward frame_num frames
                if r & 0xFF == ord('h'):
                    # go back
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    frame_idx = np.max([0, frame_idx - frame_num])
                    cap.set(1, frame_idx)
                    ret, frame = cap.read()
                    frame = cv2.resize(frame, (save_height / 2, save_width / 2), interpolation = cv2.INTER_AREA)
                    draw_img = draw_text(frame, select_frame, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing 'l', go forward frame_num frames
                if r & 0xFF == ord('l'):
                    # go forward
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    frame_idx = np.min([frame_count - 1, frame_idx + frame_num])
                    cap.set(1, frame_idx)
                    ret, frame = cap.read()
                    frame = cv2.resize(frame, (save_height / 2, save_width / 2), interpolation = cv2.INTER_AREA)
                    draw_img = draw_text(frame, select_frame, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing '0' to '3', set current label
                if r & 0xFF == ord('0'):
                    select_frame.append(frame_idx)
                    select_frame.sort()
                    draw_img = draw_text(frame, select_frame, internal_idx, frame_idx, frame_count)
                    cv2.imshow('frame', draw_img)
                if r & 0xFF == ord('1'):
                    select_frame.remove(frame_idx)
                    draw_img = draw_text(frame, select_frame, internal_idx, frame_idx, frame_count)
                    cv2.imshow('frame', draw_img)
                r = cv2.waitKey()
        # when pressing 'h', go backward frame_num frames
        if k & 0xFF == ord('h'):
            # go back
            frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
            frame_idx = np.max([0, frame_idx - frame_num])
            cap.set(1, frame_idx)
            ret, frame = cap.read()
            frame = cv2.resize(frame, (save_height / 2, save_width / 2), interpolation = cv2.INTER_AREA)
            cv2.imshow('frame', frame)
            current_label = -1
        # when pressing 'l', go forward frame_num frames
        if k & 0xFF == ord('l'):
            # go forward
            frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
            frame_idx = np.min([frame_count - 1, frame_idx + frame_num])
            cap.set(1, frame_idx)
            ret, frame = cap.read()
            frame = cv2.resize(frame, (save_height / 2, save_width / 2), interpolation = cv2.INTER_AREA)
            cv2.imshow('frame', frame)
            current_label = -1
        # when pressing 'j', speed up one level
        if k & 0xFF == ord('j'):
            internal_idx = np.max([internal_idx - 1, 0])
        # when pressing 'k', speed down one level
        if k & 0xFF == ord('k'):
            internal_idx = np.min([internal_idx + 1, 2])

    cv2.destroyAllWindows()

    cap.release()
    
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
        ret, frame = cap.read()
        if ret != True:
            break
        if frame_idx in select_frame:
            i = select_frame.index(frame_idx)
            cv2.imwrite("pages/" + str(i) + ".png", frame)
    
    cap.release()
    return 

In [4]:
save_width = 1080
save_height = 1920

retval = handle_video("IMG_0139.MOV", save_width, save_height)