In [1]:
# label videos

In [5]:
import numpy as np
import cv2
import os
import uuid
import math
from IPython.core.debugger import Tracer

In [6]:
def draw_text(img, label, set_label, internal_idx, frame_idx, frame_count, is_pause=False):
    height = img.shape[0]
    width = img.shape[1]
    text = ["none", "normal", "paging", "shake", "else"]
    speed_text = ["1X", "0.5X", "0.25X"]
    font = cv2.FONT_HERSHEY_SIMPLEX
    img = img.copy()
    cv2.putText(img, str(frame_idx) + '/' + str(frame_count), (10, height - 20), font, 1, (0,0,255), 2)
    cv2.putText(img, "Current set: " + text[set_label + 1], (width - 400, 30), font, 1, (0, 0, 255), 2)
    cv2.putText(img, speed_text[internal_idx], (10, 30), font, 1, (0, 0, 255), 2)
    if label != -1:
        cv2.putText(img, text[label + 1], (width - 200, height - 20), font, 1, (0, 0, 255), 2)
    if is_pause == True:
        blurry = cv2.Laplacian(img, cv2.CV_64F).var()
#         Tracer()()
        cv2.putText(img, "Blurry: {:.2f}".format(blurry), (10, 80),
            font, 1, (0, 0, 255), 2)
    return img

def handle_video(video_path, data_path, data_length, save_width, save_height):

    cap = cv2.VideoCapture(video_path)

    frame_count = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT))
    label_result = [-1] * frame_count
    fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
    if math.isnan(fps):
        fps = 24
    internal = 1000 / fps
    internal_ary = [int(internal), int(internal * 2), int(internal * 4)]
    internal_idx = 0
    frame_idx = 0
    # number of frames go forward or backward when pressing 'l' or 'h'
    frame_num = 200

    current_label = -1
    while cap.isOpened():
        frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
        ret, frame = cap.read()
        
        # reach the end of the video
        if ret != True:
            confirm = False
            while True:
                k = cv2.waitKey()
                # when pressing 'q', return without saving data
                if k & 0xFF == ord('q'):
                    cap.release()
                    cv2.destroyAllWindows()
                    return False
                # when pressing 'o', confirm to save data
                if k & 0xFF == ord('o'):
                    cap.release()
                    cv2.destroyAllWindows()
                    confirm = True
                    break
                # when pressing 'c', play the video again
                if k & 0xFF == ord('c'):
                    frame_idx = 0
                    cap.set(1, frame_idx)
                    break
            if confirm == True:
                # user presses 'o' to confirm
                break
            else:
                # user presses 'c' to repeat
                continue
                
        # label the current frame with the current label
        if current_label != -1:
            label_result[frame_idx] = current_label
            
        draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count)
        cv2.imshow('frame',draw_img)
            
        k = cv2.waitKey(internal_ary[internal_idx])
        # when pressing 'q', return without saving data
        if k & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            return False
        # when pressing 'o', save data
        if k & 0xFF == ord('o'):
            cap.release()
            cv2.destroyAllWindows()
            break
        # when pressing 't', pause and wait next input
        if k & 0xFF == ord('t'):
            r = cv2.waitKey()
            # when pressing 't' again, continue to play
            while r & 0xFF != ord('t'):
                # when pressing 'n', go next frame
                if r & 0xFF == ord('n'):
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    ret, frame = cap.read()
                    # label the current frame with the current label
                    if current_label != -1:
                        label_result[frame_idx] = current_label
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing 'p', go previous frame
                if r & 0xFF == ord('p'):
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    frame_idx = np.max([0, frame_idx - 2])
                    cap.set(1, frame_idx)
                    ret, frame = cap.read()
                    current_label = -1
#                     if current_label != -1:
#                         label_result[frame_idx] = current_label
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing 'h', go backward frame_num frames
                if r & 0xFF == ord('h'):
                    # go back
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    frame_idx = np.max([0, frame_idx - frame_num])
                    cap.set(1, frame_idx)
                    ret, frame = cap.read()
                    current_label = -1
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing 'l', go forward frame_num frames
                if r & 0xFF == ord('l'):
                    # go forward
                    frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
                    frame_idx = np.min([frame_count - 1, frame_idx + frame_num])
                    cap.set(1, frame_idx)
                    ret, frame = cap.read()
                    current_label = -1
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count, True)
                    cv2.imshow('frame', draw_img)
                # when pressing '0' to '3', set current label
                if r & 0xFF == ord('0'):
                    current_label = 0
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count)
                    cv2.imshow('frame', draw_img)
                if r & 0xFF == ord('1'):
                    current_label = 1
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count)
                    cv2.imshow('frame', draw_img)
                if r & 0xFF == ord('2'):
                    current_label = 2
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count)
                    cv2.imshow('frame', draw_img)
                if r & 0xFF == ord('3'):
                    current_label = 3
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count)
                    cv2.imshow('frame', draw_img)
                if r & 0xFF == ord('e'):
                    current_label = -1
                    draw_img = draw_text(frame, label_result[frame_idx], current_label, internal_idx, frame_idx, frame_count)
                    cv2.imshow('frame', draw_img)
                r = cv2.waitKey()
        # when pressing 'h', go backward frame_num frames
        if k & 0xFF == ord('h'):
            # go back
            frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
            frame_idx = np.max([0, frame_idx - frame_num])
            cap.set(1, frame_idx)
            ret, frame = cap.read()
            cv2.imshow('frame', frame)
            current_label = -1
        # when pressing 'l', go forward frame_num frames
        if k & 0xFF == ord('l'):
            # go forward
            frame_idx = int(cap.get(cv2.cv.CV_CAP_PROP_POS_FRAMES))
            frame_idx = np.min([frame_count - 1, frame_idx + frame_num])
            cap.set(1, frame_idx)
            ret, frame = cap.read()
            cv2.imshow('frame', frame)
            current_label = -1
        # when pressing 'j', speed up one level
        if k & 0xFF == ord('j'):
            internal_idx = np.max([internal_idx - 1, 0])
        # when pressing 'k', speed down one level
        if k & 0xFF == ord('k'):
            internal_idx = np.min([internal_idx + 1, 2])
        # when pressing '0' to '3', set current label
        if k & 0xFF == ord('0'):
            current_label = 0
        if k & 0xFF == ord('1'):
            current_label = 1
        if k & 0xFF == ord('2'):
            current_label = 2
        if k & 0xFF == ord('3'):
            current_label = 3
        if k & 0xFF == ord('e'):
            current_label = -1

    cap.release()
    cv2.destroyAllWindows()
    
    # handle all '-1' in label_result
    first_label = -1
    for x in range(len(label_result)):
        # find the first label which is not -1
        if label_result[x] != -1 and first_label == -1:
            first_label = label_result[x]
        # if a label is -1, and its previous label is not -1, set the label as its previous one
        if label_result[x] == -1:
            if x > 1 and label_result[x - 1] != -1:
                label_result[x] = label_result[x-1]
    # if all labels are -1, set the first label as 0
    if first_label == -1:
        first_label = 0
    # set all the beginning -1 labels as the first label
    for x in range(len(label_result)):
        if label_result[x] == -1:
            label_result[x] = first_label
            
        

    # finally, split video and save results (video and label_result)
    cap = cv2.VideoCapture(video_path)
    label_idx = 0
    while True:
        frame_idx = 0
        name = data_path + str(uuid.uuid1()) + '.avi'
        fourcc = cv2.cv.CV_FOURCC(*'XVID')
        out = cv2.VideoWriter(name, fourcc, 25, (save_width, save_height))
        label_start_idx = label_idx
        while cap.isOpened():
            ret, frame = cap.read()
            if ret != True:
                break
            frame = cv2.resize(frame, (save_width, save_height), interpolation = cv2.INTER_AREA)
            out.write(frame)
            frame_idx = frame_idx + 1
            label_idx = label_idx + 1
            if frame_idx >= data_length:
                break
        out.release()
        cv2.destroyAllWindows()
        print(name)
        label_content =label_result[label_start_idx:label_idx]
        label_str = ''.join(map(lambda x: str(x + 1), label_content))
        text_file = open(name.replace('avi', 'txt'), "w")
        text_file.write(label_str)
        text_file.close()
        if frame_idx == 0:
            os.remove(data_path)
        if frame_idx < data_length:
            break
        
    cap.release()
    cv2.destroyAllWindows()
    return 

In [7]:
train_video_path = 'videos/train/'
train_data_path = 'datasets/train/'
data_length = 1000
extension = 'avi'
save_width = 124
save_height = 124

for (dirpath, dirnames, filenames) in os.walk(train_video_path):
    for filename in filenames:
        if filename.endswith(extension) == False:
            continue
        retval = handle_video(train_video_path + filename, train_data_path, data_length, save_width, save_height)
        if retval == False:
            break

datasets/train/88ddb382-ac06-11e6-bfe2-d017c2cf90d9.avi
datasets/train/89c5abce-ac06-11e6-bfe2-d017c2cf90d9.avi
datasets/train/30616608-ac07-11e6-bfe2-d017c2cf90d9.avi
datasets/train/314bb7f8-ac07-11e6-bfe2-d017c2cf90d9.avi
datasets/train/e9a20898-ac07-11e6-bfe2-d017c2cf90d9.avi
datasets/train/ea87de4a-ac07-11e6-bfe2-d017c2cf90d9.avi
datasets/train/7b0b8dea-ac08-11e6-bfe2-d017c2cf90d9.avi
datasets/train/7beecf4c-ac08-11e6-bfe2-d017c2cf90d9.avi
