In [103]:
import os
import json
import pandas as pd
import arrow

In [104]:
# init

cfg = {}
data = pd.DataFrame(columns=['eventID', 'fileRef', 'timestart', 'epochstart', 'timeend', 'epochend'])
eventmodes = {
    "lifespan": {
        "start": ["birth", "start"],
        "end": ["death", "end"]
    }
}

In [105]:
def readJsonFile(filename):
    with open(filename, 'r') as jfl:
        return json.loads(jfl.read())
    
def detTimeKeys(keys, evtmodetype="lifespan"):
    evtmode=eventmodes[evtmodetype]
    timekeys = {}
    for key in keys:
        for pkey in evtmode:
            if key in evtmode[pkey]:
                # timekeys[pkey] = evtmode[pkey].index(key)
                timekeys[pkey] = key
                break
    return timekeys

In [106]:
# read configurations
cfg = readJsonFile('data.json')

In [107]:
# read actual data

for eventfile in cfg['files']:
    # defined only type group
    if (cfg['files'][eventfile]['type'] == "group"):
        # read the sub file
        evtdata = readJsonFile(os.path.join(cfg['config']['datafolder'], cfg['files'][eventfile]['file']))
        
        # loop through the events and add to dataframe
        for evt in evtdata:
            if ("time" in evtdata[evt]):
                # time entry exist - proceed for extraction
                if (evtdata[evt]['type'] in eventmodes):
                    # define the start and end keys
                    startendkey = detTimeKeys(evtdata[evt]['time'][evtdata[evt]['type']].keys(), evtdata[evt]['type'])
                    times = {
                        "start": None,
                        "end": None
                    }
                    
                    # set start and end value which ever applicable
                    if "start" in startendkey:
                        times['start'] = evtdata[evt]['time'][evtdata[evt]['type']][startendkey['start']]
                        
                        tm = arrow.get(times['start'])
                        local = tm.to('Asia/Calcutta')
                        times['epochstart'] = local.timestamp
                        # reference for rev conversion if negative
                        #datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=(-10725091200))
                    if "end" in startendkey:
                        times['end'] = evtdata[evt]['time'][evtdata[evt]['type']][startendkey['end']]
                        
                        tm = arrow.get(times['end'])
                        local = tm.to('Asia/Calcutta')
                        times['epochend'] = local.timestamp
                        
                    data.loc[len(data.index)] = [evt, cfg['files'][eventfile]['file'], times['start'], times['epochstart'], times['end'], times['epochend']]
    else:
        raise Exception("Invalid eventfile type: " + cfg['files'][eventfile]['type'])
data.head(10)

Unnamed: 0,eventID,fileRef,timestart,epochstart,timeend,epochend
0,6a7c96a7-4894-4b5e-a49f-7735d9404234,maratha.json,1630-02-19,-10725091200,1680-04-03,-9143452800
1,0661c7bb-0f52-4cba-bd79-4f4a639391d4,maratha.json,1674-01,-9340790400,1818-01,-4796668800
2,92e3bc7f-4691-48e9-8782-b8b137be470a,maratha.json,1775-01,-6153580800,1782-05-17,-5920905600
3,7389b283-ef27-4ebf-bbeb-4964b3061217,maratha.json,1803-01,-5270054400,1805-01,-5206896000
4,b566b5e6-a171-42bf-b61f-3a1996ce7de0,maratha.json,1817-09-05,-4806864000,1818-06-03,-4783449600


In [108]:
## Filters and Sorts

In [116]:
data[(data.epochstart < -9340790400) & (data.epochend > -9143452801)]

Unnamed: 0,eventID,fileRef,timestart,epochstart,timeend,epochend
0,6a7c96a7-4894-4b5e-a49f-7735d9404234,maratha.json,1630-02-19,-10725091200,1680-04-03,-9143452800
