In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import pickle
import boto3
from botocore.exceptions import ClientError
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from multiprocessing import Pool

In [None]:
def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        print(str(e))
        return False
    return True


def download_video(remote_path, local_path):
    if not os.path.exists(local_path):
        if os.system('aws s3 cp ' + remote_path  + ' ' + local_path) != 0:
            return False
    return True


def fix_tag(tag):
    map_dict = {
        'left h visible': 'left_h',
        'right h visible': 'right_h',
        'left h': 'left_h',
        'right h': 'right_h'
    }
    if tag in map_dict.keys():
        return map_dict[tag]
    else:
        return tag

    
def extract_and_upload_clip(video_path, start_time, end_time, bucket, tag):
    start_time = round(start_time)
    end_time = round(end_time)
    output_video_path = video_path.split('.mp4')[0] + '-' + str(start_time).zfill(6) + '-' + str(end_time).zfill(6) + '.mp4'
    
    vs = cv2.VideoCapture(video_path)
    fps = round(vs.get(cv2.CAP_PROP_FPS))
    height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
    writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height), True)
    
    start_frame = int(start_time) * fps
    end_frame = int(end_time) * fps
    
    vs.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    frame_id = 0
    frames_to_read = end_frame - start_frame
    while True:
        
        grabbed, frame = vs.read()
        if (not grabbed) or (frame_id == frames_to_read):
            break
            
        writer.write(frame)
        frame_id += 1
    
    writer.release()
    vs.release()
    
    object_name = os.path.join(tag, output_video_path.split('/')[-1])
    if upload_file(file_name=output_video_path,
                   bucket=bucket,
                   object_name=object_name):
        os.remove(output_video_path)
        return True
    else:
        return False

In [None]:
VIDEOS_BUCKET = 's3://bf-editor-videos/clubbertv'
QUANTIGO_BUCKET = 'quantigo'
ANNOTATIONS_DIR = '../annotations/'
VIDEOS_DIR = '../raw_videos'
videos = [d for d in os.listdir(ANNOTATIONS_DIR) if not d.startswith('.')]
videos

In [None]:
for video in videos:
    
    # read annotations for the current video
    ann_file = [os.path.join(ANNOTATIONS_DIR, video, f)
                for f in os.listdir(os.path.join(ANNOTATIONS_DIR, video))
                if f.endswith('.csv')][0]
    ann_df = pd.read_csv(ann_file)
    ann_df['tag'] = ann_df['tag'].apply(lambda tag: fix_tag(tag=tag))
    ann_df = ann_df[ann_df['tag'].isin(['left_h', 'right_h'])].reset_index().drop('index', axis=1)
    
    # download the current video
    remote_path = os.path.join(VIDEOS_BUCKET, video + '.mp4')
    local_path = os.path.join(VIDEOS_DIR, video + '.mp4')
    if not download_video(remote_path=remote_path, local_path=local_path):
        raise Exception('Unable to download the video ' + remote_path)
        
    # extract and upload clips
    args_list = [[local_path,
                  row['start_time'],
                  row['end_time'],
                  QUANTIGO_BUCKET,
                  row['tag']]
                 for _, row in ann_df.iterrows()]
    with Pool() as pool:
        pool.starmap(extract_and_upload_clip, args_list)
        
    os.remove(local_path)      
    print(video, 'DONE')