# Step 1. Inventorize the Files

The first 4 steps in this notebook are the same as for stage 01 (modifications commented out with #SEB)

To run this notebook, first install ffmpeg using:

conda install -c menpo ffmpeg

WARNING: ffmpeg has LGPL license

In [None]:
import os # Import the os module, for the os.walk function
import re
import datetime
import subprocess
from PIL import Image

size = 400, 400

rootDir = 'O:/ProRail'       # Set the directory you want to start from
destDir = 'Y:/susanb/ProRail_iRODS' # Set the iRODS directory you want to write to

full_file_list = []

for dirName, subdirList, fileList in os.walk(rootDir):
    #print('Found directory: %s' % dirName)
    for fname in fileList:
        #print('\t%s' % fname)
        if ("ProRail" in dirName):
            full_file = dirName+"/"+fname
            full_file_list.append(full_file)

In [None]:
#print(full_file_list)

# Step 2. Sort by Color, Infrared and Frame Metadata

In [None]:
color_list = []
infra_list = []
frame_metadata_list = []
ext_list = []
stub_list = []

for file in full_file_list:
 
    stub, ext = file.split(".")
    #if stub not in stub_list:
        #stub_list.append(stub)
    #ext_list.append(ext)
    
    if re.compile("C_DEV").search(stub):
        color_list.append(file)
    if re.compile("NIR_DEV").search(stub):
        infra_list.append(file)
    if re.compile("txt").search(ext):
        if re.compile("timestamp").search(stub):
            next
        else:
            frame_metadata_list.append(file)

In [None]:
#print(frame_metadata_list)

In [None]:
#print(infra_list)

In [None]:
#print(color_list)

# Step 3. Expand out the Frames

In [None]:
# Normally we would write out the frames but there is not enough room

# Step 4. Assign Event IDs and Create Collections

In [None]:
timestamp_list = []

# This saves the timestamp for each filename, saving us work 
colorfile_stamp = {}
infrafile_stamp = {}

# create the color stamps and add to list
for file in color_list:
    drive, root, src = file.split("/")
    year, month, day, hour, minute, second, color, dev, stamp = src.split("_")
    d = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
    timestamp_list.append(d)
    colorfile_stamp[src]=d

# create the infra stamps and add to list
for file in infra_list:
    drive, root, src = file.split("/")
    year, month, day, hour, minute, second, color, dev, stamp = src.split("_")
    d = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
    timestamp_list.append(d)
    infrafile_stamp[src]=d
    #print(src,d)

#sort
timestamp_list.sort()

# save this for collection identification
collection_stamp = timestamp_list[0]

In [None]:
#print(timestamp_list)

In [None]:
# in future we will get this out of the iRODS database
# SEB need to work on this
init_event = 0

#SEB if not os.path.exists(destDir):
#SEB     os.makedirs(destDir)
#SEB 
#SEB # Create a collection for the the json files
#SEB collectionDir = destDir+"/"+str(collection_stamp.strftime("%Y%m%d%H%M%S"))+"_collection_initial_event_"+str(init_event+1)
#SEB if not os.path.exists(collectionDir):
#SEB     os.makedirs(collectionDir)
    
# This is a fixed delta for sensitivity purposes
fixed_delta = datetime.timedelta(seconds=10)

# We need to hash the timestamps with their associated events
timestamp_event_dict = {}
# We also need the first recorded timestamp on that event
event_initiate = {}
event_collection = {}

# This assigns common event numbers
event_no = init_event
event_stamp = datetime.datetime(1900, 1, 1, 0, 0, 0) # initialise
for n,stamp in enumerate(timestamp_list):
    delta = stamp-event_stamp

    if (delta > fixed_delta ):
        event_stamp = stamp
        event_no += 1
        #print("New event!", str(event_no))
        #print(stamp)
        newDir = destDir+"/"+str(stamp.strftime("%Y%m%d%H%M%S"))+"_event_"+str(event_no)
        event_collection[event_no]=newDir
#SEB         if not os.path.exists(newDir):
#SEB             os.makedirs(newDir)
        timestamp_event_dict[stamp]=event_no
        event_initiate[event_no]=stamp
    else:
        #print(stamp)
        timestamp_event_dict[stamp]=event_no
    

# Step 5. Extract Sample Frames

In [None]:
matchstamp = re.compile('...._.._.._.._.._..')
if os.path.exists("tempfile.jpg"): os.remove("tempfile.jpg")
for file in full_file_list:

    startloc = re.search(matchstamp, file)
    stub, ext = file.split(".")
    if startloc and ext == "avi": # file has a date stamp and is an .avi movie
    
        stamp = file[startloc.span()[0]:startloc.span()[1]]
        year, month, day, hour, minute, second = stamp.split("_")
        d = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
        
        if d in timestamp_event_dict.keys(): #ignore files if their timestamp is not in the timestamplist
            event_id=timestamp_event_dict[d]
            drive, root, src = file.split("/")
            base, ext = src.split(".")
            avi_file = event_collection[event_id]+"/"+base+".avi" # for check that .avi already in iRODS
            jpg_file = event_collection[event_id]+"/"+base+".jpg"
            if os.path.exists(avi_file) and not os.path.exists(jpg_file):
                try:
                    #print("from file "+file+" create "+event_collection[event_id]+"/"+base+".jpg")
                    frame_time = 0.5*float(subprocess.check_output(['ffprobe', '-i', file, '-show_entries', \
                                                                    'format=duration'])[19:28])
                    calltext = "ffmpeg -ss "+str(datetime.timedelta(seconds=frame_time))+" -i "+file+" -frames:v 1 tempfile.jpg"
                    print("calltext is "+calltext)
                    subprocess.call(calltext, shell=True)
                    if os.path.exists("tempfile.jpg"):
                        im = Image.open("tempfile.jpg")
                        im.thumbnail(size)
                        im.save(jpg_file, "JPEG")
                        im.close()
                        os.remove("tempfile.jpg")
                except:
                    print("Problem creating thumbnail for "+file)