In [23]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<p>The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.</p>''')

In [3]:
import warnings

warnings.filterwarnings('ignore')

import os
import pandas as pd
import requests
import time
import sys
import numpy as np
import csv
import datetime
import osmnx as ox
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from descartes import PolygonPatch
import networkx as nx
import pandana as pdna
import geopandas as gpd
import ast
import math
import multiprocessing as mproc
import copy

In [4]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from delhi_ambulance_study import gis_util, util

In [5]:
DATA_FOLDER = os.path.join('..', 'data')
with open(os.path.join(DATA_FOLDER, 'keys.csv'), 'r') as f:
    keys = csv.reader(f)
    keys = list(keys)[0]
    
API_KEY = keys[0]
%matplotlib inline

# Hospital Locations

Using RML, Indu Rao,Amarindu, AIIMS, Sushrut, Deen Dayal Upadhyaya, Safdarjung, Lal bahadur shastry, GTB as the list of hospitals in Delhi, **(There is no hospital by the name Amarindu)**

In [9]:
df_delhi_hospitals = pd.DataFrame({'name': ['Ram Manohar Lohia Hospital',
                                            'North DMC Medical College & Hindu Rao Hospital',
                                            'AIIMS',
                                            'Sushrut Trauma Centre',
                                            'Deen Dayal Upadhyay Hospital',
                                            'Safdarjung Hospital',
                                            'Lal Bahadur Shastri Hospital',
                                            'GTB Hospital'
                                           ],
                                   'street': ['Baba Kharak Singh Marg',
                                              'DR. J.S. Kkaranwal Memorial Road',
                                              'Safdarjung Enclave, Aurobindo Marg, Ansari Nagar',
                                              'Ring Road, Behind I.P. College, Near Civil Lines Metro Station, Metcalf Road',
                                              'Clock Tower Chowk, Hari Enclave',
                                              'Safdarjung Campus',
                                              'Near Kalyanvas Colony, Mayur Vihar, Phase -II',
                                              'GTB Enclave'
                                              
                                             ],
                                   'neighbourhood': ['Connaught Place',
                                                   'Malka Ganj',
                                                   'Haus Khas',
                                                   'Civil Lines',
                                                   'Hari Nagar',
                                                   'Ansari Nagar West',
                                                   'Khichripur',
                                                   'Shahdara'],
                                   'city': ['New Delhi',
                                            'New Delhi',
                                            'New Delhi',
                                            'New Delhi',
                                            'New Delhi',
                                            'New Delhi',
                                            'New Delhi',
                                            'New Delhi'
                                           ],
                                   'state': ['Delhi',
                                             'Delhi',
                                             'Delhi',
                                             'Delhi',
                                             'Delhi',
                                             'Delhi',
                                             'Delhi',
                                             'Delhi'
                                            ],
                                   'pin': ['110001',
                                           '110007',
                                           '110029',
                                           '110054',
                                           '110064',
                                           '110029',
                                           '110091',
                                           '110095'
                                          ],
                                   'country': ['India',
                                               'India',
                                               'India',
                                               'India',
                                               'India',
                                               'India',
                                               'India',
                                               'India'
                                             ]
                                  })

In [10]:
addresses = list((df_delhi_hospitals['name'] + ',' + df_delhi_hospitals['street'] + ',' + df_delhi_hospitals['neighbourhood'] + ',' + df_delhi_hospitals['city'] + ',' + df_delhi_hospitals['state'] + ' ' + df_delhi_hospitals['pin'] + "," + df_delhi_hospitals['country']))

Geocoding using Google geocoding API,

In [13]:
results = gis_util.batch_geocode(addresses, DATA_FOLDER, 'delhi_hospitals_latlong.csv', API_KEY)

Geocoded: Ram Manohar Lohia Hospital,Baba Kharak Singh Marg,Connaught Place,New Delhi,Delhi 110001,India: OK
Geocoded: North DMC Medical College & Hindu Rao Hospital,DR. J.S. Kkaranwal Memorial Road,Malka Ganj,New Delhi,Delhi 110007,India: OK
Geocoded: AIIMS,Safdarjung Enclave, Aurobindo Marg, Ansari Nagar,Haus Khas,New Delhi,Delhi 110029,India: OK
Geocoded: Sushrut Trauma Centre,Ring Road, Behind I.P. College, Near Civil Lines Metro Station, Metcalf Road,Civil Lines,New Delhi,Delhi 110054,India: OK
Geocoded: Deen Dayal Upadhyay Hospital,Clock Tower Chowk, Hari Enclave,Hari Nagar,New Delhi,Delhi 110064,India: OK
Geocoded: Safdarjung Hospital,Safdarjung Campus,Ansari Nagar West,New Delhi,Delhi 110029,India: OK
Geocoded: Lal Bahadur Shastri Hospital,Near Kalyanvas Colony, Mayur Vihar, Phase -II,Khichripur,New Delhi,Delhi 110091,India: OK
Geocoded: GTB Hospital,GTB Enclave,Shahdara,New Delhi,Delhi 110095,India: OK


In [14]:
df_delhi_hospitals = df_delhi_hospitals.iloc[[results.index(entry) for entry in results if entry['status'] == 'OK']]

In [15]:
df_delhi_hospitals['lat'] = [entry['latitude'] for entry in results if entry['status'] == 'OK']
df_delhi_hospitals['lng'] = [entry['longitude'] for entry in results if entry['status'] == 'OK']

In [26]:
df_delhi_hospitals['hosp_id'] = range(1, df_delhi_hospitals.shape[0] + 1)

In [27]:
df_delhi_hospitals.to_csv(os.path.join(DATA_FOLDER, 'delhi_hospitals_final_geocoded.csv'), index=False)

In [28]:
df_delhi_hospitals

Unnamed: 0,city,country,name,neighbourhood,pin,state,street,lat,lng,hosp_id
0,New Delhi,India,Ram Manohar Lohia Hospital,Connaught Place,110001,Delhi,Baba Kharak Singh Marg,28.627123,77.207337,1
1,New Delhi,India,North DMC Medical College & Hindu Rao Hospital,Malka Ganj,110007,Delhi,DR. J.S. Kkaranwal Memorial Road,28.650374,77.182668,2
2,New Delhi,India,AIIMS,Haus Khas,110029,Delhi,"Safdarjung Enclave, Aurobindo Marg, Ansari Nagar",28.566827,77.20812,3
3,New Delhi,India,Sushrut Trauma Centre,Civil Lines,110054,Delhi,"Ring Road, Behind I.P. College, Near Civil Lin...",28.679779,77.228379,4
4,New Delhi,India,Deen Dayal Upadhyay Hospital,Hari Nagar,110064,Delhi,"Clock Tower Chowk, Hari Enclave",28.628012,77.112397,5
5,New Delhi,India,Safdarjung Hospital,Ansari Nagar West,110029,Delhi,Safdarjung Campus,28.567839,77.205795,6
6,New Delhi,India,Lal Bahadur Shastri Hospital,Khichripur,110091,Delhi,"Near Kalyanvas Colony, Mayur Vihar, Phase -II",28.617721,77.311242,7
7,New Delhi,India,GTB Hospital,Shahdara,110095,Delhi,GTB Enclave,28.683812,77.311004,8


# Crash & taxi Locations

## Real data availability assumptions

* This study's geographical area of interest spans the entire National Capital Region, which is comprised of a number of districts in Haryana, Rajasthan and Uttar Pradesh along with the National Capital Territory of Delhi. 
* Taxi location data is supposed to include log information of Delhi taxis reported at a frequency of 1 minute, spanning the entire duration of the study, which is [(12 am, 1st January 2016), (12 am, 1st January 2017))
* Crash information is sourced from fatal road accidents data in NCR for the year 2017. This dataset is supposed to have about 2000 records. 

## Simulated Data

* To avoid prohibitively long computation times, the simulated crash dataset will have only 100 records, with timepoints anywhere in [(12 am, 1st January 2016), (12 am, 1st January 2017))
* Simulated data for taxi locations will have 1 log for 10000 taxis for [(12 am, 30th December 2015), (12 am, 1st January 2016)).
* All events in this dataset will be from within the National Capital Territory of Delhi and not the whole NCR.

Using 'as the crow flies' distance to get the closest taxes from crash location will result in data that is not representative of actual travel distances. We need information on the road network. This study uses Open Street Map (OSM)'s road network information. This is accomplished with [osmnx](https://github.com/gboeing/osmnx), a Python package that does API calls to OSM API. We do not directly use Google Directions API, which has the capability to give estimated travel times based on time of the day, as other factors such as API call latency for each query and daily quota mean that we must restrict the number of these queries to as small as possible. An advantage of osmnx is that there will be only one OSM API call, when the entire road network dataset will be downloaded, and all shortest distance computations will be local.

In [None]:
# dctlst_places = [ {'state':'National Capital Territory', 'country':'India'},
#                 {'district': 'Meerut', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Muzzafarnagar', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Ghaziabad', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Gautam Budh Nagar', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Bulandshahr', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Baghpat', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Hapur', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Shamli', 'state':'Uttar Pradesh', 'country':'India'},
#                 {'district': 'Faridabad', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Gurugram', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Mahendragarh', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Bhiwani', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Charkhi Dhadri', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Nuh', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Rohtak', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Sonipat', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Rewari', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Jhajjar', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Panipat', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Palwal', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Jind', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Karnal', 'state': 'Haryana', 'country': 'India'},
#                 {'district': 'Alwar', 'state': 'Rajasthan', 'country': 'India'},
#                 {'district': 'Bharatpur', 'state': 'Rajasthan', 'country': 'India'}
#                 ]

dctlst_places = [ {'state':'National Capital Territory', 'country':'India'}]

ncr_roads = ox.graph_from_place(place_names, network_type='drive_service')

We then proceed with the download of NCR road network using osmnx. The downloaded data can be saved locally as shape files to avoid lengthy download times in subsequent analyses. The saved files will be placed into two folders, Nodes and Edges. Edges have information on road segments and Nodes can be thought of as (lat, long) points that mark curves, intersections, etc., that is essential to reconstruct the roads. 

In [None]:
ncr_network = gis_util.get_street_network(dctlst_places, DATA_FOLDER, 'ncr')
ncr_network = ox.load_graphml(filename='ncr.graphml', folder=os.path.join(DATA_FOLDER, 'ncr'))

gdf_ncr = gpd.read_file(os.path.join(DATA_FOLDER, 'ncr', 'nodes', 'nodes.shp'))

In [None]:
To simulate crashes till we have access to the actual crash data, this study makes uses of these node points. So the nodes datapoints are also saved as a csv file. We sample 2000 random points (with replacement) from this nodes csv and 2000 time points in 2016 to simulate crashes.

In [4]:
gdf_ncr.to_csv(os.path.join(DATA_FOLDER, 'ncr', 'ncr.csv'))
df_delhi_points = pd.read_csv(os.path.join(DATA_FOLDER, 'ncr', 'ncr.csv'), index_col=0)

In [6]:
crash_loc_indices = np.random.choice(df_delhi_points.index, size=2000, replace=True)

In [7]:
time_points = pd.date_range(start='2016-01-01 00:00:00', end='2017-01-01 00:00:00', freq='15min', tz='Asia/Kolkata')

In [8]:
df_crash_events = pd.DataFrame({'event_id': range(1, 2001),
                                'event_time': np.random.choice(time_points, size=2000, replace=True), 
                                'event_lat': df_delhi_points.iloc[crash_loc_indices]['lat'],
                                'event_lng': df_delhi_points.iloc[crash_loc_indices]['lon']
                              })

The simulated crash points' time information should be in IST.

In [9]:
df_crash_events['event_time'] = pd.to_datetime(df_crash_events['event_time']).dt.tz_localize('Asia/Kolkata')

In [10]:
df_crash_events.to_csv(os.path.join(DATA_FOLDER, 'crashes.csv'), index=False)

We create 10000 taxis and add one log entry for each them at a random time point (with replacement) in [(12 am, 30th December 2015), (12 am, 1st January 2016)), with 10000 random locations chosen from nodes csv file with replacement.

In [36]:
taxi_loc_indices = np.random.choice(df_delhi_points.index, size=10000, replace=True)
time_points = pd.date_range(start='2015-12-29 00:00:00', end='2015-12-31 23:59:00', freq='0.5min', tz='Asia/Kolkata')
df_taxi_log = pd.DataFrame({'log_id': range(1, 10001),
                            'taxi_id': range(1, 10001),
                            'log_time': np.random.choice(time_points, size=10000, replace=True), 
                            'log_lat': df_delhi_points.iloc[taxi_loc_indices]['lat'],
                            'log_lng': df_delhi_points.iloc[taxi_loc_indices]['lon']
                            })
df_taxi_log['log_time'] = pd.to_datetime(df_taxi_log['log_time']).dt.tz_localize('Asia/Kolkata')
df_taxi_log['log_on_call'] = True

Since each taxi is assumed to have numerous logs, and it makes sense to restrict it to taxis which are available for hire, we introduce the following fields:

* log_id - Uniquely identify each log. Helps keep track of multiple spatiotemporal logs for each taxi.
* log_on_call - To denote the taxi's availability

In [37]:
df_taxi_log.to_csv(os.path.join(DATA_FOLDER, 'taxi_log.csv'), index=False)

# Getting closest hospital & taxi information

In [6]:
df_delhi_hospitals = pd.read_csv(os.path.join(DATA_FOLDER, 'delhi_hospitals_final_geocoded.csv'))
df_crash_events = pd.read_csv(os.path.join(DATA_FOLDER, 'crashes.csv'))
df_taxi_log = pd.read_csv(os.path.join(DATA_FOLDER, 'taxi_log.csv'))


df_taxi_log['log_time'] = pd.to_datetime(df_taxi_log['log_time']).dt.tz_localize('UTC').dt.tz_convert(
    'Asia/Kolkata')
df_crash_events['event_time'] = pd.to_datetime(df_crash_events['event_time']).dt.tz_localize(
    'UTC').dt.tz_convert('Asia/Kolkata')

ncr_network = ox.load_graphml(filename='ncr.graphml', folder=os.path.join(DATA_FOLDER, 'ncr'))

gdf_ncr = gpd.read_file(os.path.join(DATA_FOLDER, 'ncr', 'nodes', 'nodes.shp'))

Our problem, finding closest taxis and hospitals to crash locations, is similar to the modified closest pair of points. We have two sets (Crashes, Hospitals) or (Crashes, Taxis), and our prerogative is to match each point in set A (Crashes) with the closest point in set B (Hospitals or Taxis). Naive approaches will mean there will be (nCrash X nHospital + nCrash X nTaxi) computations. 

[Pandana](https://github.com/UDST/pandana) is an open source package that has inbuilt features to perform network level aggregations, i.e. buffer queries, which can then be used to reduce the subset to query from to obtain closest n points of interest from each node in a graph. Pandana performs hundreds of thousands of network queries in under a second (for walking-scale distances) using a Pandas-like API. The computations are parallelized for use on multi-core computers using an underlying C library. The impication is that there will only be a single query (1 each for taxi and hospital) for each crash, on the aggregated list of closest points of interest to each node. This aggregated list can be thought of a list of all nodes in the network, along with information on k closest points of interest to each of them. These points of interest can be hospitals or taxi locations.

Let us visualise information on roads provided by OSM.

In [7]:
gdf_nodes = ox.graph_to_gdfs(ncr_network, edges=False)
gdf_edges = ox.graph_to_gdfs(ncr_network, nodes=False)

In [8]:
gdf_edges.head(n=5)

Unnamed: 0,access,area,bridge,geometry,highway,key,landuse,lanes,length,maxspeed,name,oneway,osmid,ref,service,tunnel,u,v,width
0,,,,"LINESTRING (77.1680173 28.5426036, 77.16793149...",residential,0,,,62.355284,,,False,7892104,,,,58047704,58047707,
1,,,,"LINESTRING (77.16769119999999 28.5430787, 77.1...",residential,0,,,64.212083,,,False,7892369,,,,58047707,58051020,
2,,,,"LINESTRING (77.16769119999999 28.5430787, 77.1...",residential,0,,,62.355284,,,False,7892104,,,,58047707,58047704,
3,,,,"LINESTRING (77.16769119999999 28.5430787, 77.1...",residential,0,,,23.925042,,,False,7892369,,,,58047707,2265700198,
4,,,,"LINESTRING (77.16437809999999 28.5383963, 77.1...",residential,0,,,48.454379,,JNU Ring Road,False,7892285,,,,58049717,4231408578,


Closest pair of points in graphs (network) use weights (impedences) on nodes as the flavour in which they are close; the flavour in our case is distance or time taken to travel. Distance alone cannot give a true estimate of required time, since a trip that makes use of too many short side lanes can be slower than a longer path on an expressway. Speed limit thus is valuable information. This however is not available for this road network. We still can mimic this by assigning limits based on the type of the segments. This can then be used to weight the edges. Checking the types of road segments present,

In [9]:
gdf_edges['highway'] = list(map(str, gdf_edges['highway']))

In [10]:
#gdf_edges['highway'].unique()

In [11]:
gdf_edges['maxspeed'] = list(map(util.tidy_maxspeed_tuple_to_int, gdf_edges['maxspeed']))

Some road segments do have max speed information.

In [12]:
gdf_edges.loc[gdf_edges['maxspeed'].notnull()].head(n=5)

Unnamed: 0,access,area,bridge,geometry,highway,key,landuse,lanes,length,maxspeed,name,oneway,osmid,ref,service,tunnel,u,v,width
241,,,,"LINESTRING (77.2388844 28.5780587, 77.2388935 ...",residential,0,,,302.863721,20.0,Prachin Shiv Mandir Road,False,38116089,,,,250100205,250100312,
266,,,,"LINESTRING (77.24312279999999 28.57865, 77.241...",residential,0,,,168.725184,20.0,Prachin Shiv Mandir Road,False,38116089,,,,250100307,250100312,
268,,,,"LINESTRING (77.24312279999999 28.57865, 77.245...",residential,0,,,682.872896,20.0,Prachin Shiv Mandir Road,False,38116089,,,,250100307,448444374,
270,,,,"LINESTRING (77.2413954 28.5786133, 77.24312279...",residential,0,,,168.725184,20.0,Prachin Shiv Mandir Road,False,38116089,,,,250100312,250100307,
272,,,,"LINESTRING (77.2413954 28.5786133, 77.2413382 ...",residential,0,,,302.863721,20.0,Prachin Shiv Mandir Road,False,38116089,,,,250100312,250100205,


In [13]:
#Getting the minimum maxspeed for each category of highways,

In [14]:
dct_min_speed_by_category = dict(gdf_edges.loc[gdf_edges['maxspeed'].notnull()].groupby('highway')['maxspeed'].min())
#dct_min_speed_by_category

Since we already have representative lowerbound speed limits for each of the categories, we can use this to impute the missing speed limits. Assuming the worst case, I proceed to replace missing maxspeed values with the least maxspeed value seen for that particular category.

In [15]:
for idx, row in gdf_edges.loc[gdf_edges['maxspeed'].isnull()].iterrows():
    maxspeed = dct_min_speed_by_category.get(row['highway'], np.nan)
    if math.isnan(maxspeed):
        try:
            combo_types = ast.literal_eval(row['highway'])
            maxspeed = min([speed for speed in [dct_min_speed_by_category.get(highwaytype, np.nan) for highwaytype in combo_types] if not math.isnan(speed)])
            
        except:
            pass
            
    gdf_edges.loc[idx, 'maxspeed'] = maxspeed

In [16]:
#gdf_edges.loc[gdf_edges['maxspeed'].isnull()]['highway'].unique()

Since primary_link has a speed limit of 40 and tertiary_link 30, we can fix secondary_link to a limit of 35.

In [17]:
dct_min_speed_by_category['secondary_link'] = 35
gdf_edges.loc[gdf_edges['highway'] == 'secondary_link', 'maxspeed'] = 35

Since the rest of the segments are types, let us assign the lowest limit possible based on the types in the combo. For example, for an [A, B], let us assign min(min(A), min(B))

Road is the only type that has no maxspeed. Let us investigate what these segments usually mean.

In [18]:
gdf_edges.loc[gdf_edges['highway'] == 'road', 'name'].unique()

array([nan, 'Upper gali'], dtype=object)

'Gali' is Hindi equivalent of street, but not as broad as the Hindi designation for broad streets, पथ (transliterated as 'path'). Also, OSM uses 'road' for a varied classification of path segments. So this is possibly a non homogeneous set. For this reason, I am assigning it a level higher than the lowest designation, residential, assigning it a maxspeed of 15. I do not assign it the lowermost designation, as that would mean that residential segments would then have the same preference as roads.

In [19]:
dct_min_speed_by_category['road'] = 15
gdf_edges.loc[gdf_edges['highway'] == 'road', 'maxspeed'] = 15

Let us construct the time taken to traverse weights,

In [20]:
gdf_edges['time_to_traverse'] = gdf_edges['length']/ gdf_edges['maxspeed']

In [21]:
ox.save_load.save_gdf_shapefile(gdf_edges, filename="edges_edited.shp", folder=os.path.join(DATA_FOLDER, 'ncr'))

We are now all set to proceed with setting up a Pandana network. We use this network for network related queries such as closest hospital and taxis from crash nodes.

In [None]:
twoway = list(~gdf_edges["oneway"].values)
pdnet_ncr=pdna.Network(gdf_nodes["x"], gdf_nodes["y"], gdf_edges["u"], gdf_edges["v"],
                 gdf_edges[["time_to_traverse"]], twoway=twoway)

## Closest Pair matching computation

Pandanas allows aggregated network information related queries on road networks with custom points by allowing new 'points of interest' overlayed on the base network. Closest node without any impedence to each of these 'points of interests' are determined, and these base nodes will then be used to compute distances from other nodes with impedence. For example, say we have a POI 'XYZ Hospital' to be added to Delhi network. Pandanas would choose the closest 'as the crow flies' node to this POI and assign this node as this POI's home node. Then, each of the other nodes' distance from it will be calculated from this node, taking account of usual impedences, such as length or maxspeed of edges connecting them. Expanding this to a list of POIs, pandana's one feature is to compute an aggregated list table for all nodes with k closest POIs to them. This table can be used to get closest POIs (say hospitals) from our nodes of interest, (say crash locations)

Converting the street network to a pandana network object and adding hospitals as 'POI' to them,

In [91]:
pdnet_ncr.set_pois("hospitals", 1000000, 3, df_delhi_hospitals['lng'], df_delhi_hospitals['lat'])

* To accelerate this step, and since this has a great potential to be done in parallel, we use multiprocessing.
* A situation to take note of, when allotting taxis to crashes, is that there can be a timepoint when multiple crashes occur all at once. With non-availability of taxis, the response time should be high. 
* In order to account for the situation above, we can divide the crashes into multiple chunks for parallel processing in such a way that the crashes between chunks are separated by at least 6 hours.
* The present version of code doesn't account for the situation described above.

# Computation system modules:

## Google keys repository management:

1. A dictionary object is used to keep track of the last time a key was used and number of times it was accessed in the day it was used for the last time.
2. Given a list of keys, this module will return a key that either hasn't been used in the past 24 hours, or a key which hasn't been used more than the API limit.
3. If no such key is present, this module will make the entire execution process wait till the key with the earliest last access time's quota is renewed. This key will be returned. For example, say we have keys (1, 2, 3) whose quota have all been exhausted. Lets say key 2 has the earliest last call time. So the program will wait for 24 hours after this last call time and return key 2.

## Get the next weekday: 

An underlying assumption is that trip durations have weekly and hourly patterns. Since trip duration is fetched from Google Directions API, which does not allow historic travel time requests, and since crash events are all in the past, trip duration calculator module will send requests for days in the future which have the same weekday and time of the day as the crash event. The current logic will post date it to a suitable timepoint within a week from the time the API call is made. For example, if a crash happened on Dec 1, 2016, 9 AM IST, which falls on a Thursday, API call request will have the date set to Mar 1, 2018, 9 AM IST. 

## Get nearest POIs:

**Arguments:** crash list,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Streetnetwork graph,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Poi name (can be hospitals or taxis,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;impedence - a measure that determines distance between points,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;npoi - number of closest pois to be returned for further analysis with google API,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;poi location logs - Taxi logs,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;taxi ids - list of taxi ids part of the program<br/>
                      

**Returns:** npoi - pois for each crash<br/>

This submodule matches each crash event with npoi pois which are closest to the crash location. Time taken to travel is the metric used in this calculation. Pandana's spatial aggregation feature that makes use of closest pair of nodes in a graph calculator algorithms makes it easier to arrive at this without resorting to ncrash $*$ ntaxi or  ncrash $*$ nhospital graph distance computations. While closest hospital calculation is straightforward, for taxis, the query set must be restricted to taxis that are available as per the logs prior to the crash event. An important assumption is that the taxi logs are assumed to be at 1 minute frequency, and all taxis report their availability every 1 minute, despite their being on or off service. 

## Get shortest travel times:

**Arguments:** crash & closest poi matches - returned by nearest POI calculator, <br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;dct_key_stats - google API key usage information register,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;crash list,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;poi logs,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;api keys,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;api_name - Google Directions,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;api_limit - 2500 for Google Directions every 24 hrs,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;poi_lat_col - column name in poi log table which has latitude info,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;poi_lng_col - column name in poi log table which has longitude info,<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;poi_id_col - column name in poi log table which has log id<br/>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;crash list,<br/>
                      

**Returns** : minimum time required to get to the closest POI from each crach spot. This comprises of 3 possible values, corresponding to 'best_guess', 'optimistic' and 'pessimistic' scenarios.

Three API calls are made for each crash - POI pair, to fetch 'best_guess', 'optimistic' and 'pessimistic' times. The shortest times in each scenario, among all the POIs matched to a crash, are returned from this block.


Putting all the above modules together,

![title](Modules.png)