In [1]:
import os
import pandas as pd
import numpy as np
import cv2
import pickle
import boto3
from botocore.exceptions import ClientError
from multiprocessing import Pool
import shutil

In [2]:
def upload_file(file_name, bucket_name, object_name=None):
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket_name, object_name)
    except ClientError as e:
        print(str(e))
        return False
    return True


def get_file(bucket, remote_path, local_path):
    try:
        bucket.download_file(remote_path, local_path)
        return True
    except:
        return False
    

def download_video(remote_path, local_path):
    if not os.path.exists(local_path):
        if os.system('aws s3 cp ' + remote_path  + ' ' + local_path) != 0:
            return False
    return True


def sample_video(bucket_name, video_path, custom_fps):
    # set up temp dir where you are gonna save extracted samples
    video_name = video_path.split('/')[-1].split('.')[0]
    tmp_dir = 'frames_' + video_name
    os.makedirs(tmp_dir, exist_ok=True)
    
    # download video
    remote_path = os.path.join('s3://' + bucket_name, video_path)
    local_path = os.path.join(tmp_dir, video_name + '.mp4')
    if not download_video(remote_path, local_path):
        raise Exception('Unable to download the video ' + video_path)
    
    vs = cv2.VideoCapture(local_path)
    frame_id, saved_frames_counter = 0, 0
    while True:
        
        grabbed, frame = vs.read()
        if not grabbed:
            break
        
        if frame_id % custom_fps == 0 and frame_id != 0:
            cv2.imwrite(os.path.join(tmp_dir, 'img_' + str(saved_frames_counter).zfill(8) + '.jpg'), frame)
            saved_frames_counter +=1
            
        frame_id += 1
    vs.release()
    os.remove(local_path)
    
    if os.system('zip -r ' + tmp_dir + '.zip' + ' ' + tmp_dir) != 0:
        raise Exception('Unable to zip ' + tmp_dir)
      
    if not upload_file(tmp_dir + '.zip', bucket_name, object_name=os.path.join(video_name.split('-')[0], tmp_dir + '.zip')):
            raise Exception('Unable to upload ' + tmp_dir + '.zip')
            
    shutil.rmtree(tmp_dir)
    os.remove(tmp_dir + '.zip')

In [3]:
FPS = 25
QUANTIGO_BUCKET = 'quantigo'

In [4]:
s3 = boto3.resource('s3')
bucket = s3.Bucket(QUANTIGO_BUCKET)

remote_video_paths = []

for tag in ['left_h', 'right_h']:
    for obj in bucket.objects.filter(Prefix=tag + '/'):
        remote_video_paths.append(obj.key)
        
remote_video_paths[:5], remote_video_paths[-5:]

(['left_h/clubber_video1-000276-000315.mp4',
  'left_h/clubber_video1-000380-000389.mp4',
  'left_h/clubber_video1-000496-000516.mp4',
  'left_h/clubber_video1-000545-000551.mp4',
  'left_h/clubber_video1-000556-000569.mp4'],
 ['right_h/clubber_video9-003600-003622.mp4',
  'right_h/clubber_video9-003939-003951.mp4',
  'right_h/clubber_video9-004102-004123.mp4',
  'right_h/clubber_video9-004135-004143.mp4',
  'right_h/clubber_video9-004158-004177.mp4'])

In [5]:
args_list = [[QUANTIGO_BUCKET, video, FPS]
             for video in remote_video_paths]
with Pool() as pool:
    pool.starmap(sample_video, args_list)