# Summary
details the creation of Rush hour and City Labels

In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import sys
import shapely
sys.path.append('../../../Scripts/') # link to scripts
import utils.spatial_utils
%matplotlib inline

In [2]:
CAN_LAM = {'init':'epsg:3347'}

In [3]:
# path to the preprocessed MTL Trajet data 
path_2016 = "../../../Data/mtl_trajet/mtl_trajet_2016_translated.shp"
path_2017 = "../../../Data/mtl_trajet/mtl_trajet_2017_translated.shp"

In [4]:
gdf_2016 = gpd.read_file(path_2016)
gdf_2017 = gpd.read_file(path_2017) 

# Downtown

In [5]:
## downtown
mtl_regions = gpd.read_file('../../../Data/shapes/mtl_regions.json')
city_of_montreal = mtl_regions.loc[(mtl_regions.AIRE > 0) & (mtl_regions.TYPE == 'Arrondissement')]

## Get start and end points

In [None]:
%%time
## calculate start and end points (WGS84)
gdf_2016 = gdf_2016.to_crs(crs={'init':'epsg:4326'})
gdf_2016['start_wgs'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2016['end_wgs'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

## calculate start and end points (Canada Lambert)
gdf_2016 = gdf_2016.to_crs(CAN_LAM)
gdf_2016['start_can'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2016['end_can'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 


## calculate start and end points (WGS84)
gdf_2017 = gdf_2017.to_crs(crs={'init':'epsg:4326'})
gdf_2017['start_wgs'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2017['end_wgs'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

## calculate start and end points (Canada Lambert)
gdf_2017 = gdf_2017.to_crs(CAN_LAM)
gdf_2017['start_can'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2017['end_can'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

In [None]:
%%time
gdf_2016['start_downtown'] = gdf_2016['start_can'].apply(lambda row: any(city_of_montreal.intersects(row)))
gdf_2016['end_downtown'] = gdf_2016['end_can'].apply(lambda row: any(city_of_montreal.intersects(row)))

gdf_2017['start_downtown'] = gdf_2017['start_can'].apply(lambda row: any(city_of_montreal.intersects(row)))
gdf_2017['end_downtown'] = gdf_2017['end_can'].apply(lambda row: any(city_of_montreal.intersects(row)))

In [None]:
print(gdf_2016.start_downtown.value_counts())
print(gdf_2016.end_downtown.value_counts())

print(gdf_2017.start_downtown.value_counts())
print(gdf_2017.end_downtown.value_counts())

In [13]:
# # convert the point columns back into a Point class
# %%time
# gdf['start_wgs'] = gdf['start_wgs'].apply(shapely.wkt.loads)
# gdf['start_can'] = gdf['start_can'].apply(shapely.wkt.loads)
# gdf['end_wgs'] = gdf['end_wgs'].apply(shapely.wkt.loads)
# gdf['end_can'] = gdf['end_can'].apply(shapely.wkt.loads)

# Rush hour

In [None]:
gdf_2016['weekday'] = gdf_2016.starttime.apply(lambda dt: dt.dayofweek < 5)
gdf_2017['weekday'] = gdf.starttime.apply(lambda dt: dt.dayofweek < 5)

In [None]:
print(gdf_2016.weekday.value_counts())
print(gdf_2017.weekday.value_counts())

In [None]:
rush_hour_times = {'morning_min':6,'morning_max':10,'evening_min':15, 'evening_max':19}
def rushhour(row):
    """
    function to find out whether trip is in rush hour or not
    """
    start = False
    end = False
    through = False
    if not row.weekday:
        return start, end
    
    if row.starttime.hour >= rush_hour_times['morning_min'] and row.starttime.hour <= rush_hour_times['morning_max']\
    or row.starttime.hour >= rush_hour_times['evening_min'] and row.starttime.hour <= rush_hour_times['evening_max']:
        start = True
    if row.endtime.hour >= rush_hour_times['morning_min'] and row.endtime.hour <= rush_hour_times['morning_max']\
    or row.endtime.hour >= rush_hour_times['evening_min'] and row.endtime.hour <= rush_hour_times['evening_max']:
        end = True
        
    # check if trip passed through rush hours  
    if start and end:
        if row.starttime.hour >= rush_hour_times['morning_min'] and row.endtime.hour <= rush_hour_times['morning_max']\
        or row.starttime.hour >= rush_hour_times['evening_min'] and row.endtime.hour <= rush_hour_times['evening_max']:
            through = True
        
        
    return start, end, through
    

In [None]:
gdf_2016['stend_rh'] = gdf_2016.apply(rushhour, axis=1)
gdf_2017['stend_rh'] = gdf_2017.apply(rushhour, axis=1)

In [None]:
gdf_rushhour_16 = gdf_2016['stend_rh'].apply(pd.Series)
gdf_rushhour_17 = gdf_2017['stend_rh'].apply(pd.Series)

In [None]:
gdf_rushhour_16.columns = ['startrush', 'endrush', 'thrurush']
gdf_rushhour_17.columns = ['startrush', 'endrush', 'thrurush']

In [None]:
gdf_2016 = pd.concat([gdf_2016, gdf_rushhour_16], axis=1)
gdf_2017 = pd.concat([gdf_2017, gdf_rushhour_17], axis=1)