In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import shapely
import shapely.wkt as wkt
import sys
sys.path.append('../../../Scripts/') # link to scripts
import preprocessing.translate
import utils.spatial_utils
import utils.temporal_utils

%matplotlib inline

# Load in data

In [2]:
# GLOBALS
CAN_LAM = {'init':'epsg:3347'}

In [3]:
# path to the MTL Trajet data 
path_2016 = "../../../Data/mtl_trajet/mtl_trajet_2016.shp"
path_2017 = "../../../Data/mtl_trajet/trajets_mtl_trajet_2017.shp"

In [4]:
# translate and reproject data (see reprojection_timezone_and_translation notebook for details)
gdf_2016 = preprocessing.translate.translate_data(path_2016)
gdf_2017 = preprocessing.translate.translate_data(path_2017)

# initalise the CRS
if gdf_2016.crs == {}:
    print("initialising crs")
    gdf_2016.crs = {'init': 'epsg:4326'}
    
if gdf_2017.crs == {}:
    print("initialising crs")
    gdf_2017.crs = {'init': 'epsg:4326'}
    
# reproject data
if not gdf_2016.crs == {'init': 'epsg:3347'}:
    gdf_2016 = utils.spatial_utils.change_projection(gdf_2016)
    
if not gdf_2017.crs == {'init': 'epsg:3347'}:
    gdf_2017 = utils.spatial_utils.change_projection(gdf_2017)

initialising crs
translating data
translating data


In [None]:
%%time
## calculate start and end points (Canada Lambert)
gdf_2016 = gdf_2016.to_crs(CAN_LAM)
gdf_2016['start_can'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2016['end_can'] =  gdf_2016.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

gdf_2017 = gdf_2017.to_crs(CAN_LAM)
gdf_2017['start_can'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=0) 
gdf_2017['end_can'] =  gdf_2017.geometry.apply(utils.spatial_utils.get_point_from_linestring, X=-1) 

In [5]:
# Montreal Points of interest
mtl_pois = pd.read_csv("../../../Data/supplementary_data/POI/mtl_pois.csv")

# View data

In [7]:
mtl_pois.head()

Unnamed: 0,venue_id,venue_name,lat,lon,city,address,categories_id,categories_name,verified,checkinsCount,usersCount,tipsCount,postcode
0,4bed60019868a5932e435d46,Pont de l'ÃŽle-aux-Tourtes,45.420522,-73.982794,Vaudreuil-Dorion,Autoroute 40_Vaudreuil-Dorion QC_Canada,4bf58dd8d48988d1df941735,Bridge,False,0,0,0,
1,4d475881f046a1cde64106f6,Harmoni Yoga,45.396776,-73.955805,,Canada,4bf58dd8d48988d102941735,Yoga Studio,False,0,0,0,
2,4c6734ddaebea59333a375d0,Terrasse Vaudreuil Bord De Leau,45.391709,-73.994914,,Canada,56aa371be4b08b9a8d5734c3,Waterfront,False,0,0,0,
3,58a610469c954845fa18c3ba,DÃ©panneur Du Coin,45.395748,-73.957807,Ile-Perrot,100 Grand Blvd_Ile-Perrot QC J7V 4W8_Canada,4d954b0ea243a5684a65b473,Convenience Store,True,0,0,0,J7V 4W8
4,4c9b67fd553cef3bfca5db77,Groupe Sutton - Distinction inc. - Ile Perrot,45.39428,-73.9659,Ile Perrot,"25 boulevard Don Quichotte,_Ile Perrot QC J7V ...",,,True,0,0,0,J7V 7X4


In [8]:
# all categories
mtl_pois['categories_name'].unique()

array(['Bridge', 'Yoga Studio', 'Waterfront', 'Convenience Store', nan,
       'Warehouse Store', 'Hardware Store', 'Grocery Store',
       'Gas Station', 'Automotive Shop', 'Harbor / Marina',
       'Financial or Legal Service', 'Miscellaneous Shop',
       'Fast Food Restaurant', 'Restaurant', 'Arts & Crafts Store',
       'Train', 'Sandwich Place', 'Jewelry Store', 'Greek Restaurant',
       'Video Store', 'Italian Restaurant', 'Pizza Place',
       'Mexican Restaurant', 'Rental Car Location', 'Train Station',
       'Breakfast Spot', 'Japanese Restaurant', 'Pharmacy', 'Coffee Shop',
       'Farm', 'Field', 'High School', 'School', 'Factory',
       'Historic Site', "Dentist's Office", 'Office', 'Post Office',
       'Hotel', 'Veterinarian', 'Real Estate Office', 'Scenic Lookout',
       'Auto Dealership', 'Donut Shop', 'Pet Store', 'Fire Station',
       'College Soccer Field', 'College Gym', 'College Science Building',
       'College Library', 'College Academic Building', 'Lake',

In [16]:
# verified POIs or not
mtl_pois.verified.value_counts()

False    49696
True      5894
Name: verified, dtype: int64

# Begin sub-categorising data

In [10]:
mtl_pois['categories_name'].nunique()

647

In [11]:
sub_categories = ['restaurant','office','school','store','gym','train','library',\
                 'museum', 'stadium','shop','station','hotel','bar','venue','education',\
                 'salon', 'college','farm','pool']
tricky_categories = {'parking':'parking', 'automotive shop': 'automotive shop',\
                    'barbershop':'salon', 'Shop & Service': 'automotive shop',\
                     'college library':'library','pub':'bar', 'public art':'museum',\
                     'police station':'police','theme park':'theme park',\
                    'tv station':'tv station', 'ev charging station':'charging station',\
                    'pharmacy':'shop','hostel':'hotel',\
                    'hotel pool':'pool','hotel bar':'bar','bus line':'station',\
                    'medical center':'healthcare','bakery':'shop','breakfast spot':'restaurant',\
                    'university':'college','general entertainment':'entertainment',\
                    'coworking space':'office','tech startup':'office'}

In [12]:
def categorise_data(row, sub_categories):
    ## returns new category with capitalize
    if str(row).lower() in  tricky_categories.keys():
        return tricky_categories[str(row).lower()].capitalize()
    if 'store' in str(row).lower():
        return 'Shop'
    elif 'place' in str(row).lower() or 'joint' in str(row).lower() or 'caf' in str(row).lower()\
        or 'diner' in str(row).lower():
        return 'Restaurant'
    elif 'health' in str(row).lower() or 'clinic' in str(row).lower() or 'hospital' in str(row).lower():
        return 'Healthcare'
    elif 'church' in str(row).lower() or 'temple' in str(row).lower():
        return 'Religious'
    elif 'spot' in str(row).lower() or 'club' in str(row).lower() or 'boutique' in str(row).lower():
        return 'Entertainment'
    for sub_cat in sub_categories:
        if sub_cat in str(row).lower():
            return sub_cat.capitalize()
    return row

In [13]:
mtl_pois['sub_category'] = mtl_pois['categories_name'].apply(categorise_data, args=(sub_categories,))

In [14]:
mtl_pois['sub_category'].nunique()

304

In [15]:
mtl_pois['sub_category'].value_counts()[:20]

Shop                                        12596
Restaurant                                   7501
Office                                       5155
Station                                      1552
Building                                     1513
Residential Building (Apartment / Condo)     1259
Entertainment                                1013
Bar                                           960
College                                       947
School                                        909
Park                                          902
Automotive shop                               880
Healthcare                                    817
Bank                                          763
Gym                                           708
Bus Stop                                      507
Religious                                     491
Factory                                       476
Road                                          434
Spa                                           313


In [17]:
mtl_pois.loc[mtl_pois.verified == True].sub_category.value_counts()

Shop                      2386
Restaurant                 912
Station                    291
Office                     253
Bank                       204
                          ... 
Moving Target                1
Transportation Service       1
Airport Service              1
Daycare                      1
Laser Tag                    1
Name: sub_category, Length: 135, dtype: int64

# Turn data into shapefile

In [None]:
## get only verified for example
ver_mtl_pois = mtl_pois.loc[mtl_pois.verified == True].reset_index()

In [None]:
# Turn lat, lon into Point and make the Geometry
ver_mtl_pois['geometry'] = ver_mtl_pois.apply(lambda row: shapely.geometry.Point(row['lon'],row['lat']), axis=1)
# make GeoDataFrame and change CRS
ver_mtl_pois = gpd.GeoDataFrame(ver_mtl_pois)
ver_mtl_pois.crs = {'init':'epsg:4326'}
ver_mtl_pois = ver_mtl_pois.to_crs(CAN_LAM)

In [None]:
ver_mtl_pois.head(2)

# Add buffer of 50 meters

In [None]:
ver_mtl_pois['geometry'] = ver_mtl_pois['geometry'].apply(lambda row: row.buffer(50))

In [None]:
start_points = gpd.GeoDataFrame(gdf[['id_trip','start_can']])
start_points = start_points.rename(columns={'start_can':'geometry'})
start_points['geometry'] = start_points['geometry'].apply(wkt.loads)

end_points = gpd.GeoDataFrame(gdf[['id_trip','end_can']])
end_points = end_points.rename(columns={'end_can':'geometry'})
end_points['geometry'] = end_points['geometry'].apply(wkt.loads)

# Merge data with buffer

In [None]:
poi_and_end = gpd.sjoin(end_points, ver_mtl_pois, how='left').groupby(['id_trip','sub_category']\
                                                                 ).agg({'sub_category':'count'})

In [None]:
poi_and_end.rename(columns={'sub_category':'count'})

In [None]:
unstacked_pois = poi_and_end.unstack()

In [None]:
unstacked_pois.fillna(0)