# Summary
details the creation of Rush hour and City Labels

In [54]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import sys
import shapely
import shapely.wkt
import datetime
import pandas as pd
sys.path.append('../../../Scripts/') # link to scripts
import utils.spatial_utils
%matplotlib inline

In [2]:
CAN_LAM = {'init':'epsg:3347'}

In [3]:
# path to the preprocessed MTL Trajet data 
path_2016 = "../../../Data/mtl_trajet/mtl_trajet_2016_translated.shp"
path_2017 = "../../../Data/mtl_trajet/mtl_trajet_2017_translated.shp"

In [4]:
gdf_2016 = gpd.read_file(path_2016)
gdf_2017 = gpd.read_file(path_2017) 

# Downtown

In [5]:
## downtown
mtl_regions = gpd.read_file('../../../Data/shapes/mtl_regions.json')
city_of_montreal = mtl_regions.loc[(mtl_regions.AIRE > 0) & (mtl_regions.TYPE == 'Arrondissement')]

## Get start and end points

In [6]:
%%time
## calculate start and end points (WGS84)
gdf_2016 = gdf_2016.to_crs(crs={'init':'epsg:4326'})
gdf_2016['start_wgs'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2016['end_wgs'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

## calculate start and end points (Canada Lambert)
gdf_2016 = gdf_2016.to_crs(CAN_LAM)
gdf_2016['start_can'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2016['end_can'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 


## calculate start and end points (WGS84)
gdf_2017 = gdf_2017.to_crs(crs={'init':'epsg:4326'})
gdf_2017['start_wgs'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2017['end_wgs'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

## calculate start and end points (Canada Lambert)
gdf_2017 = gdf_2017.to_crs(CAN_LAM)
gdf_2017['start_can'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2017['end_can'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

CPU times: user 35min 38s, sys: 34.9 s, total: 36min 13s
Wall time: 39min 18s


In [44]:
%%time
# convert the point columns back into a Point class
gdf_2016['start_wgs'] = gdf_2016['start_wgs'].apply(shapely.geometry.Point)
gdf_2016['start_can'] = gdf_2016['start_can'].apply(shapely.geometry.Point)
gdf_2016['end_wgs'] = gdf_2016['end_wgs'].apply(shapely.geometry.Point)
gdf_2016['end_can'] = gdf_2016['end_can'].apply(shapely.geometry.Point)

gdf_2017['start_wgs'] = gdf_2017['start_wgs'].apply(shapely.geometry.Point)
gdf_2017['start_can'] = gdf_2017['start_can'].apply(shapely.geometry.Point)
gdf_2017['end_wgs'] = gdf_2017['end_wgs'].apply(shapely.geometry.Point)
gdf_2017['end_can'] = gdf_2017['end_can'].apply(shapely.geometry.Point)

CPU times: user 17 s, sys: 1.37 s, total: 18.4 s
Wall time: 19.9 s


In [45]:
%%time
gdf_2016['start_downtown'] = gdf_2016['start_can'].apply(lambda row: any(city_of_montreal.intersects(row)))
gdf_2016['end_downtown'] = gdf_2016['end_can'].apply(lambda row: any(city_of_montreal.intersects(row)))

gdf_2017['start_downtown'] = gdf_2017['start_can'].apply(lambda row: any(city_of_montreal.intersects(row)))
gdf_2017['end_downtown'] = gdf_2017['end_can'].apply(lambda row: any(city_of_montreal.intersects(row)))

CPU times: user 9min 35s, sys: 4.47 s, total: 9min 40s
Wall time: 10min 36s


In [46]:
print(gdf_2016.start_downtown.value_counts())
print(gdf_2016.end_downtown.value_counts())

print(gdf_2017.start_downtown.value_counts())
print(gdf_2017.end_downtown.value_counts())

True     213076
False     62548
Name: start_downtown, dtype: int64
True     211945
False     63679
Name: end_downtown, dtype: int64
True     159522
False     25763
Name: start_downtown, dtype: int64
True     158477
False     26808
Name: end_downtown, dtype: int64


# Rush hour

In [55]:
if not type(gdf_2016.starttime[0]) == datetime.datetime or not type(gdf_2017.starttime[0]) == datetime.datetime:
    print("converting start and end timestamps to datetime objects")
    gdf_2016['starttime'] = pd.to_datetime(gdf_2016['starttime'])
    gdf_2016['endtime'] = pd.to_datetime(gdf_2016['endtime'])
    gdf_2017['starttime'] = pd.to_datetime(gdf_2017['starttime'])
    gdf_2017['endtime'] = pd.to_datetime(gdf_2017['endtime'])

converting start and end timestamps to datetime objects


In [56]:
gdf_2016['weekday'] = gdf_2016.starttime.apply(lambda dt: dt.dayofweek < 5)
gdf_2017['weekday'] = gdf_2017.starttime.apply(lambda dt: dt.dayofweek < 5)

In [57]:
print(gdf_2016.weekday.value_counts())
print(gdf_2017.weekday.value_counts())

True     211368
False     64256
Name: weekday, dtype: int64
True     137576
False     47709
Name: weekday, dtype: int64


In [58]:
rush_hour_times = {'morning_min':6,'morning_max':10,'evening_min':15, 'evening_max':19}
def rushhour(row):
    """
    function to find out whether trip is in rush hour or not
    """
    start = False
    end = False
    through = False
    if not row.weekday:
        return start, end
    
    if row.starttime.hour >= rush_hour_times['morning_min'] and row.starttime.hour <= rush_hour_times['morning_max']\
    or row.starttime.hour >= rush_hour_times['evening_min'] and row.starttime.hour <= rush_hour_times['evening_max']:
        start = True
    if row.endtime.hour >= rush_hour_times['morning_min'] and row.endtime.hour <= rush_hour_times['morning_max']\
    or row.endtime.hour >= rush_hour_times['evening_min'] and row.endtime.hour <= rush_hour_times['evening_max']:
        end = True
        
    # check if trip passed through rush hours  
    if start and end:
        if row.starttime.hour >= rush_hour_times['morning_min'] and row.endtime.hour <= rush_hour_times['morning_max']\
        or row.starttime.hour >= rush_hour_times['evening_min'] and row.endtime.hour <= rush_hour_times['evening_max']:
            through = True
        
        
    return start, end, through
    

In [59]:
gdf_2016['stend_rh'] = gdf_2016.apply(rushhour, axis=1)
gdf_2017['stend_rh'] = gdf_2017.apply(rushhour, axis=1)

In [60]:
gdf_rushhour_16 = gdf_2016['stend_rh'].apply(pd.Series)
gdf_rushhour_17 = gdf_2017['stend_rh'].apply(pd.Series)

In [61]:
gdf_rushhour_16.columns = ['startrush', 'endrush', 'thrurush']
gdf_rushhour_17.columns = ['startrush', 'endrush', 'thrurush']

In [62]:
gdf_2016 = pd.concat([gdf_2016, gdf_rushhour_16], axis=1)
gdf_2017 = pd.concat([gdf_2017, gdf_rushhour_17], axis=1)

In [63]:
gdf_2016

Unnamed: 0,id_trip,avg_speed,duration,mode,purpose,n_coord,segments,starttime,endtime,geometry,...,end_wgs,start_can,end_can,start_downtown,end_downtown,weekday,stend_rh,startrush,endrush,thrurush
0,1724206,4.4,460,walking,returning_home,12,"[{""id"": 1150192, ""source"": ""geobase_mtl""}, {""i...",2016-09-07 20:37:26,2017-09-18 04:16:58,LINESTRING (7628287.236741195 1247680.17623496...,...,POINT (-73.600218484 45.53927656139993),POINT (7628287.236741195 1247680.17623496),POINT (7628055.645262061 1247949.380617326),True,True,True,"(False, False, False)",False,False,False
1,1724208,10.7,2146,combination,work,120,"[{""id"": 1140016, ""source"": ""geobase_mtl""}, {""i...",2016-09-08 07:43:23,2017-09-18 06:17:46,LINESTRING (7627830.069607561 1247172.27553199...,...,POINT (-73.57808240529999 45.49740148689994),POINT (7627830.069607561 1247172.275531994),POINT (7631077.858255229 1243916.893272278),True,True,True,"(True, True, True)",True,True,True
2,1889461,15.4,447,public_transport,leisure,36,"[{""id"": 1390715, ""source"": ""geobase_mtl""}, {""i...",2016-09-08 19:46:14,2017-09-18 09:30:24,LINESTRING (7632055.840015981 1247584.15415876...,...,POINT (-73.5724489686 45.53516780669992),POINT (7632055.840015981 1247584.154158761),POINT (7630297.958008135 1248128.900960694),True,True,True,"(True, True, True)",True,True,True
3,1724219,16.8,591,car,returning_home,45,"[{""id"": 1210250, ""source"": ""geobase_mtl""}, {""i...",2016-09-08 21:41:37,2017-09-18 10:02:50,LINESTRING (7630236.131991105 1247926.25343474...,...,POINT (-73.59958149539999 45.54000781329992),POINT (7630236.131991105 1247926.253434745),POINT (7628080.704474011 1248042.784601376),True,True,True,"(False, True, False)",False,True,False
4,2071985,6.9,279,combination,pick_up_drop_off,12,"[{""id"": 1140287, ""source"": ""geobase_mtl""}, {""i...",2016-09-09 16:49:12,2017-09-18 10:18:40,LINESTRING (7628479.88311419 1247193.386491392...,...,POINT (-73.59568662480001 45.53413090109994),POINT (7628479.88311419 1247193.386491392),POINT (7628564.58366184 1247494.516726829),True,True,True,"(True, True, True)",True,True,True
5,2071991,12.4,844,cycling,food_drink,56,"[{""id"": 1150004, ""source"": ""geobase_mtl""}, {""i...",2016-09-09 18:22:22,2017-09-18 10:42:13,LINESTRING (7628002.075274518 1247761.31724215...,...,POINT (-73.62516750650001 45.53706117749991),POINT (7628002.075274518 1247761.31724215),POINT (7626229.644805726 1247149.931923375),True,True,True,"(True, True, True)",True,True,True
6,1667922,4.2,1211,cycling,food_drink,29,"[{""id"": 1240379, ""source"": ""geobase_mtl""}, {""i...",2016-09-09 23:41:59,2017-09-18 10:47:52,LINESTRING (7628517.528697009 1246292.61362128...,...,POINT (-73.5898306544 45.52093026759992),POINT (7628517.528697009 1246292.613621284),POINT (7629431.778009225 1246198.082987967),True,True,True,"(False, True, False)",False,True,False
7,1667924,3.9,1330,walking,returning_home,29,"[{""id"": 1220168, ""source"": ""geobase_mtl""}, {""i...",2016-09-10 00:10:40,2017-09-18 10:53:49,LINESTRING (7629840.839065068 1246606.40804376...,...,POINT (-73.5739219686 45.53360049689993),POINT (7629840.839065068 1246606.40804376),POINT (7630236.131991105 1247926.253434745),True,True,False,"(False, False)",False,False,
8,2071994,9.8,930,cycling,returning_home,49,"[{""id"": 1120087, ""source"": ""geobase_mtl""}, {""i...",2016-09-09 20:04:18,2017-09-18 10:54:50,LINESTRING (7626472.681291105 1247124.48181703...,...,POINT (-73.600218484 45.53927656139993),POINT (7626472.681291105 1247124.481817035),POINT (7628055.645262061 1247949.380617326),True,True,True,"(False, True, False)",False,True,False
9,2072003,4.8,1266,walking,food_drink,34,"[{""id"": 5640, ""source"": ""reseau_cyclable""}, {""...",2016-09-10 10:29:44,2017-09-18 10:57:01,LINESTRING (7628222.92791587 1247461.407682431...,...,POINT (-73.60437498269999 45.52495198219995),POINT (7628222.92791587 1247461.407682431),POINT (7628197.239168105 1246306.438952793),True,True,False,"(False, False)",False,False,
