In [82]:
import subprocess
import json
import pycamhd.lazycache as pycamhd
import tempfile

frame_stats_path = "../build-Debug/bin/frame_stats"
mov_path = '/RS03ASHS/PN03B/06-CAMHDA301/2016/01/01/CAMHDA301-20160101T000000Z.mov'

# metadata_repo = "/home/aaron/workspace/camhd_analysis/CamHD_motion_metadata"
# data_filename = "/RS03ASHS/PN03B/06-CAMHDA301/2016/01/01/CAMHDA301-20160101T000000Z_optical_flow.json"

metadata_repo = "/tmp"
data_filename = "/CAMHDA301-20160101T000000Z_optical_flow.json"

stride = 10

# Retrieve movie metadata

Get the number of frames in the movie

In [40]:
repo = pycamhd.lazycache()

movie_info = repo.get_metadata( url=mov_path )
print(movie_info)

{'URL': '/RS03ASHS/PN03B/06-CAMHDA301/2016/01/01/CAMHDA301-20160101T000000Z.mov', 'NumFrames': 25155, 'Duration': 839.33856}


In [56]:
def frame_stats( path, start, end = -1, stride = 10 ):
    if end < 0: end = start+1
    with tempfile.NamedTemporaryFile() as t:
        procout = subprocess.run( [frame_stats_path,
                                               "-o", t.name,
                                               "--start-at", str(start),
                                               "--stop-at", str(end), 
                                               "--stride", str(stride),
                                                path ],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                encoding='utf8')
  
        try:
            # Read the JSON from stdout
            results = json.load( t )
     
        #TODO: Check output quality here
    
            return results
        except json.JSONDecodeError:
            return procout.stdout

## Single-threaded version

In [57]:
%%time

frames = range( 5000, 5010, stride )

results = [frame_stats(mov_path, f) for f in frames]
print(results)

[{'contents': {'frame_stats': {'contents': {'optical_flow': '1.0'}}, 'movie': '1.0', 'timing': '1.0'}, 'frame_stats': [{'frame_number': 5000, 'similarity': {'center': [-960.0, -540.0], 'duration_us': 10209514, 'flowScale': 1.0, 'fromFrame': 4995, 'imgScale': 0.25, 'similarity': [1.00015877249236, 0.0, 18.386438176448, 27.9921960533508], 'toFrame': 5005, 'valid': True}}], 'movie': {'Duration': 839.338562011719, 'NumFrames': 25155, 'URL': '/RS03ASHS/PN03B/06-CAMHDA301/2016/01/01/CAMHDA301-20160101T000000Z.mov', 'cacheURL': 'https://camhd-app-dev.appspot.com/v1/org/oceanobservatories/rawdata/files/RS03ASHS/PN03B/06-CAMHDA301/2016/01/01/CAMHDA301-20160101T000000Z.mov'}, 'timing': {'elapsed_system_time_s': 10.447170824}}]
CPU times: user 16 ms, sys: 4 ms, total: 20 ms
Wall time: 10.6 s


## Parallelize using Dask scheduler

In [58]:
frames = range( 5000, 5020, stride )

from dask import compute, delayed
values = [delayed(frame_stats)(mov_path,f) for f in frames]


In [78]:
%%time 

import dask.threaded
results = compute(*values, get=dask.threaded.get)

CPU times: user 8 ms, sys: 16 ms, total: 24 ms
Wall time: 11.3 s


In [83]:
## build a compliant JSON output


joutput = results[0]
for i in range(1, len(results)):
    joutput["frame_stats"].extend(results[i]["frame_stats"])

    
with open(metadata_repo + data_filename,'w') as f:
    json.dump(joutput, f, indent=2)