In [None]:
import math
import pandas as pd 
import geopandas as gpd

import h3 # h3 bins from uber

In [None]:
def create_crash_df(train_file = '../Inputs/Train.csv'):  
    crash_df = pd.read_csv(train_file, parse_dates=['datetime'])
    return crash_df

def create_temporal_features(df):
    dict_windows = {1: "00-03", 2: "03-06", 3: "06-09", 4: "09-12", 5: "12-15", 6: "15-18", 7: "18-21", 8: "21-24"}
    dict_months = {1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr", 5: "May", 6: "Jun",
               7: "Jul", 8: "Aug", 9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"}
    
    df["time_window"] = df["datetime"].apply(lambda x: math.floor(x.hour / 3) + 1)
    df["time_window_str"] = df["time_window"].apply(lambda x: dict_windows.get(x))
    df["day"] = df["datetime"].apply(lambda x: x.day)
    df["month"] = df["datetime"].apply(lambda x: dict_months.get(x.month))
    df["year"] = df["datetime"].apply(lambda x: x.year)
    df["weekday"] = df["datetime"].apply(lambda x: x.weekday())
    return df

def assign_hex_bin(df,lat_column="latitude",lon_column="longitude"):
    df["h3_zone_5"] = df.apply(lambda x: h3.geo_to_h3(x[lat_column], x[lon_column], 5),axis=1)
    df["h3_zone_6"] = df.apply(lambda x: h3.geo_to_h3(x[lat_column], x[lon_column], 6),axis=1)
    df["h3_zone_7"] = df.apply(lambda x: h3.geo_to_h3(x[lat_column], x[lon_column], 7),axis=1)
    return df

def export_df_to_csv(df,path_file='../Inputs/train_h3.csv'):
    df.to_csv(path_file,index=False)
    print(f'file created {path_file}')
    

In [None]:
# create command line commands for downlaoding uber movement data with OSM segment info
month_list = [('01','31'),
              ('02','28'),
              ('03','31'),
              ('04','30'),
              ('05','31'),
              ('06','30'),
              ('07','31'),
              ('08','31'),
              ('09','30'),
              ('10','31'),
              ('11','30'),
              ('12','31')]
for year in ['2018','2019']:
    for month, end_day in month_list:
        break # remove when you want the commands
        # print([f'mdt speeds-to-geojson nairobi {year}-{month}-01 {year}-{month}-{end_day} --output=Inputs/nairobi_{year}_{month}geojson.geojson'])
        # print([f'mdt speeds-transform historical nairobi {year}-{month}-1 {year}-{month}-{end_day} --output=Inputs/nairobi_{year}_{month}_osm.csv'])

In [None]:
def join_segment_files(path='../Inputs/', road_surveys='Segment_info.csv',segments_geometry='segments_geometry.geojson'):
    ''' 
        Load the survey data, Load the segment geometry, Join the two segment dfs.
        return a combined dataframe
    '''
    road_surveys = pd.read_csv(path+road_surveys)
    road_segment_locs = gpd.read_file(path+segments_geometry)
    segments_merged = pd.merge(road_segment_locs, road_surveys, on='segment_id', how='left')
    segments_merged["longitude"] = segments_merged.geometry.centroid.x
    segments_merged["latitude"] = segments_merged.geometry.centroid.y
    segments_merged = assign_hex_bin(segments_merged)
    return segments_merged

In [None]:
crash_df = create_crash_df(train_file = '../Inputs/Train.csv')
crash_df = create_temporal_features(crash_df)
crash_df = assign_hex_bin(crash_df)
#crash_df.head()

In [None]:
segments_merged = join_segment_files()

In [None]:
segments_merged.describe()

In [None]:
# This needs work
segments_h3_zone_7= segments_merged.groupby(by='h3_zone_7').max()
segments_h3_zone_7['h3_zone_5']= segments_merged.groupby(by='h3_zone_5').latitude.max()
segments_h3_zone_7['h3_zone_6']= segments_merged.groupby(by='h3_zone_6').latitude.max()
segments_h3_zone_7['latitude']= segments_merged.groupby(by='h3_zone_7').latitude.mean()
segments_h3_zone_7['longitude']= segments_merged.groupby(by='h3_zone_7').longitude.mean()
segments_h3_zone_7.head()

In [None]:
path = '../Inputs/'
road_surveys='Segment_info.csv'
segments_geometry='segments_geometry.geojson'
road_segment_locs = gpd.read_file(path+segments_geometry)
road_surveys = pd.read_csv(path+road_surveys)

In [None]:
road_segment_locs.segment_id.nunique()

In [None]:
road_surveys.segment_id.nunique()

In [None]:
def join_segment_crash_files(crash_data=crash_df, segments=segments_merged, h3_zone='h3_zone_5'):
    ''' 
        Combine the segment data and the crash data by chosen hex.
        return a combined dataframe
    '''
    # Add some groupby function here
    segment_crash_df = pd.merge(crash_data, segments, on=h3_zone, how='left')
    return segment_crash_df

In [None]:
segment_crash_df = join_segment_crash_files()

In [None]:
segment_crash_df.head()

### The crash data and the segment data needs to be grouped before this join makes sense
### Also need to deal with the issue of missing segments


In [None]:
uber_movement_osm = pd.read_csv('../Inputs/nairobi_2018_01_osm.csv')

In [None]:
uber_movement_osm.head()

In [None]:
geojsonfile = gpd.read_file('../Inputs/nairobi_2018_01_speeds.geojson', parse_dates=['utc_timestamp'])

In [None]:
geojsonfile.osmhighway.unique()

In [None]:
geojsonfile.speed_mean_kph.nunique()

In [None]:
geojsonfile.head()