# Step 1. Inventorize the Files

In [None]:
import os # Import the os module, for the os.walk function
import re
from shutil import copy2
import datetime
import json

rootDir = 'P:/ProRail'       # Set the directory you want to start from - in this case a USB drive
destDir = 'Y:/susanb/ProRail_iRODS' # Set the iRODS directory you want to write to - used davrods to see iRODS as netork drive

full_file_list = []

for dirName, subdirList, fileList in os.walk(rootDir):
    #print('Found directory: %s' % dirName)
    for fname in fileList:
        print('\t%s' % fname)
        if ("ProRail" in dirName):
            full_file = dirName+"/"+fname
            full_file_list.append(full_file)

In [None]:
#print(full_file_list)

# Step 2. Sort by Color, Infrared and Frame Metadata

In [None]:
color_list = []
infra_list = []
frame_metadata_list = []
ext_list = []
stub_list = []

for file in full_file_list:
 
    stub, ext = file.split(".")
    #if stub not in stub_list:
        #stub_list.append(stub)
    #ext_list.append(ext)
    
    if re.compile("C_DEV").search(stub):
        color_list.append(file)
    if re.compile("NIR_DEV").search(stub):
        infra_list.append(file)
    if re.compile("txt").search(ext):
        if re.compile("timestamp").search(stub):
            next
        else:
            frame_metadata_list.append(file)

In [None]:
#print(frame_metadata_list)

In [None]:
#print(infra_list)

In [None]:
#print(color_list)

# Step 3. Expand out the Frames

In [None]:
# Normally we would write out the frames but there is not enough room

# Step 4. Assign Event IDs and Create Collections

In [None]:
timestamp_list = []

# This saves the timestamp for each filename, saving us work 
colorfile_stamp = {}
infrafile_stamp = {}

# create the color stamps and add to list
for file in color_list:
    drive, root, src = file.split("/")
    year, month, day, hour, minute, second, color, dev, stamp = src.split("_")
    d = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
    timestamp_list.append(d)
    colorfile_stamp[src]=d

# create the infra stamps and add to list
for file in infra_list:
    drive, root, src = file.split("/")
    year, month, day, hour, minute, second, color, dev, stamp = src.split("_")
    d = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
    timestamp_list.append(d)
    infrafile_stamp[src]=d
    #print(src,d)

#sort
timestamp_list.sort()

# save this for collection identification
collection_stamp = timestamp_list[0]

In [None]:
#print(timestamp_list)

In [None]:
# in future we will get this out of the iRODS database
# SEB need to work on this
init_event = 0

if not os.path.exists(destDir):
    os.makedirs(destDir)

# Create a collection for the the json files
collectionDir = destDir+"/"+str(collection_stamp.strftime("%Y%m%d%H%M%S"))+"_collection_initial_event_"+str(init_event+1)
if not os.path.exists(collectionDir):
    os.makedirs(collectionDir)
    
# This is a fixed delta for sensitivity purposes
fixed_delta = datetime.timedelta(seconds=10)

# We need to hash the timestamps with their associated events
timestamp_event_dict = {}
# We also need the first recorded timestamp on that event
event_initiate = {}
event_collection = {}

# This assigns common event numbers
event_no = init_event
event_stamp = datetime.datetime(1900, 1, 1, 0, 0, 0) # initialise
for n,stamp in enumerate(timestamp_list):
    delta = stamp-event_stamp

    if (delta > fixed_delta ):
        event_stamp = stamp
        event_no += 1
        #print("New event!", str(event_no))
        #print(stamp)
        newDir = destDir+"/"+str(stamp.strftime("%Y%m%d%H%M%S"))+"_event_"+str(event_no)
        event_collection[event_no]=newDir
        if not os.path.exists(newDir):
            os.makedirs(newDir)
        timestamp_event_dict[stamp]=event_no
        event_initiate[event_no]=stamp
    else:
        #print(stamp)
        timestamp_event_dict[stamp]=event_no
    

In [None]:
#print(timestamp_event_dict)

In [None]:
#print(event_collection)

# Step 5. Copy Over the Files to the Correct Place

In [None]:
matchstamp = re.compile('...._.._.._.._.._..')
for file in full_file_list:
    
    startloc = re.search(matchstamp, file)
    if startloc and os.stat(file).st_size > 0: # if filename has a datestamp and is not empty
        stamp = file[startloc.span()[0]:startloc.span()[1]]
        year, month, day, hour, minute, second = stamp.split("_")
        d = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
        if d in timestamp_event_dict.keys(): #ignore files if their timestamp is not in the timestamplist
            event_id=timestamp_event_dict[d]
            #print(file, event_id, event_collection[event_id])
            drive, root, src = file.split("/")
            if not os.path.exists(event_collection[event_id]+"/"+src): #do not overwrite
                copy2(file, event_collection[event_id])

# Step 7. Make the Manifest

In [None]:

location_schema = {'fields': [{'name': 'location_id', 'type': 'integer'}, {'name': 'location', 'type': 'geopoint'}, \
                              {'name': 'location_name', 'type': 'string'}], "primaryKey":["location_id"] }
camera_schema = {'fields': [{'name': 'camera_id', 'type': 'integer'}, {'name': 'manufacturer', 'type': 'string'}, \
                              {'name': 'spectrum', 'type': 'string'}], "primaryKey":["camera_schema"] }
settings_schema = {'fields': [{'name': 'camera_id', 'type': 'integer'}, \
                              {'name': 'time_stamp', 'type': 'datetime'}, \
                              {'name': 'settings', 'type': 'string'}, \
                              {'name': 'height', 'type': 'integer'}, \
                              {'name': 'width', 'type': 'integer'}], \
                               "primaryKey":["camera_id","time_stamp"]} 

collection_schema = {'fields': [{'name': 'collection_id', 'type': 'integer'}, \
                                {'name': 'location_id', 'type': 'integer'}, \
                                {'name': 'time_stamp', 'type': 'datetime'}, \
                                { 'name': 'source', 'type': 'integer'}], \
                                 "primaryKey": ["collection_id", "location_id","time_stamp"] }
event_schema = {'fields': [{'name': 'event_id', 'type': 'integer'}, {'name': 'collection_id', 'type': 'integer'}, \
                           {'name': 'illumination', 'type': 'integer'}, {'name': 'precipation', 'type': 'integer'}, \
                              {'name': 'time_stamp', 'type': 'datetime'}], "primaryKey":["event_id"]  }
conversion_schema = {'fields': [{'name': 'event_id', 'type': 'integer'}, \
                                {'name': 'camera_id', 'type': 'integer'}, \
                                {'name': 'conversion_id', 'type': 'integer'}, \
                                {'name': 'format', 'type': 'string'}, \
                                {'name': 'speed', 'type': 'integer'}, \
                                {'name': 'description', 'type': 'string'}, \
                                {'name': 'file_location', 'type': 'string'},\
                                {'name': 'conversion_date', 'type': 'datetime'}], \
                                "primaryKey":["event_id","camera_id","conversion_id"] }
frame_schema = {'fields': [{'name': 'event_id', 'type': 'integer'}, {'name': 'camera_id', 'type': 'integer'}, \
                              {'name': 'frame_no', 'type': 'integer'}, {'name': 'capture_time', 'type': 'time'}, \
                              {'name': 'file_location', 'type': 'string'} ], "primaryKey":["event_id","camera_id", \
                              "frame_no"] }


In [None]:
manifest={}
manifest["location"]= location_schema
manifest["camera"]= camera_schema
manifest["settings"] = settings_schema
manifest["collection"]=collection_schema
manifest["event"]= event_schema
manifest["conversion"]=conversion_schema
manifest["frame"]=frame_schema

# Step 8. Write the Tables

In [None]:
# location source
location_source= [{"location_id": 0, "location": [51.8707149,4.2901064], "location_name": "Botlek"}]

In [None]:
# camera source
# not sure about the dimensions of the infrared frame
camera_source= [ {"camera_id":0, "manufacturer": "Battenburg", "spectrum": "optical", "height":500, "width":1080},\
               {"camera_id":1, "manufacturer": "Battenburg", "spectrum": "infrared", "height":500, "width":1080}]

In [None]:
# settings source
# We skip over the settings file 
settings_source=[{"camera_id":0, "time_stamp":str(collection_stamp), "settings":None}, \
                 {"camera_id":1, "time_stamp":str(collection_stamp), "settings":None} ]


In [None]:
# event source
event_source = []

# create the event table
for event in event_initiate:
    d= event_initiate[event]
    events_record = {}
    events_record["event_id"]=event
    events_record["location_id"]=0  
    events_record["time_stamp"]=str(d)
    events_record["precipation"]=0
    events_record["illumination"]=0
    event_source.append(events_record)

In [None]:
#print(event_source)

In [None]:
# collection source
collection_source=[{"collection_id":0, "location_id":0, "time_stamp":str(collection_stamp), "source":1}]    

In [None]:
# conversion source 
for file_name in colorfile_stamp:
    stamp = colorfile_stamp[file_name]
    event_id=timestamp_event_dict[stamp]
    conversion_source=[{"event_id":event_id, "camera_id":0, "conversion_id":0, \
                        "format":"avi", "speed":"full", "file_location":file_name}]
    
for file_name in infrafile_stamp:
    stamp = infrafile_stamp[file_name]
    event_id=timestamp_event_dict[stamp]
    conversion_source=[{"event_id":event_id, "camera_id":1, "conversion_id":0, \
                        "format":"avi", "speed":"full", "file_location":file_name}]


In [None]:
frame_source = []
# frame schema
for n, file in enumerate(frame_metadata_list):
        drive, root, src = file.split('/')
        stub, ext = src.split(".")
        #print(stub)
        assoc_file = stub+".avi"

        if (assoc_file in colorfile_stamp):
            stamp = colorfile_stamp[assoc_file]
            event_no=timestamp_event_dict[stamp]
            camera_id = 0
        elif (assoc_file in infrafile_stamp):
            stamp = infrafile_stamp[assoc_file]
            event_no=timestamp_event_dict[stamp]
            camera_id = 1
        with open(file) as f:
            for line in f:
                line=line.strip()
                frame_no, timedate, ms, mms = line.split('\t')
                timedate_string = timedate+" "+ms+mms
                frame_dict = {'event_id':event_no,'camera_id':camera_id,"frame_no":frame_no,"capture_time":timedate_string,"file_location":assoc_file}
                frame_source.append(frame_dict)

# Step 9. Dump to File

In [None]:
with open(collectionDir+"/manifest.json", 'w') as out:
    json.dump(manifest,out)

In [None]:
with open(collectionDir+"/camera_table.json", 'w') as out:
    json.dump(camera_source,out)

with open(collectionDir+"/settings_table.json", 'w') as out:
    json.dump(settings_source,out)

with open(collectionDir+"/collection_table.json", 'w') as out:
    json.dump(collection_source,out)

with open(collectionDir+"/event_table.json", 'w') as out:
    json.dump(event_source,out)    

with open(collectionDir+"/conversion_table.json", 'w') as out:
    json.dump(conversion_source,out)  

with open(collectionDir+"/frame_table.json", 'w') as out:
    json.dump(frame_source,out)    

# Step 6. Write the Metadata

In [None]:
#iRODS 'imeta' commands are generated in a text file, to be read into iRODS from the command line
metadata_file = open(collectionDir+"/metadata.txt", "w")
for event_id in event_collection:
    location_id = event_source[event_id-1]['location_id']
    metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                        " event_id "+str(event_id)+"\n")
    metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                        " collection_stamp '"+str(collection_stamp)+"'\n")
    metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                        " precipitation "+str(event_source[event_id-1]['precipation'])+"\n")
    metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                        " illumination "+str(event_source[event_id-1]['illumination'])+"\n")
    metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                        " event_time_stamp '"+str(event_source[event_id-1]['time_stamp'])+"'\n")
    metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                        " location_name '"+location_source[location_id]['location_name']+"'\n")

for file_name in colorfile_stamp:
    stub, ext = file_name.split(".")
    if ext == "avi":
        stamp = colorfile_stamp[file_name]
        event_id=timestamp_event_dict[stamp]
        metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                            " color_movie yes "+"\n")
    
for file_name in infrafile_stamp:
    stub, ext = file_name.split(".")
    if ext == "avi":
        stamp = infrafile_stamp[file_name]
        event_id=timestamp_event_dict[stamp]
        metadata_file.write("imeta add -C "+str(event_collection[event_id])+\
                            " IR_movie yes "+"\n")
metadata_file.close()