## How to get this data

In [1]:
def get_opening_hours_from_osm_data(path_to_osm_geojson):
    """
        path_to_osm_geojson (geojson): path to the geojson file. We get data from https://download.bbbike.org/osm/bbbike/ 
    """

    f = open(path_to_osm_geojson, "r")
    all_lines = f.readlines()
    print("Number of lines in file:", len(all_lines))

    line_index = 0
    opening_hours_indexes = []
    
    f = open(path_to_osm_geojson, "r")
    for x in f:
        if 'opening_hours' in x:
            opening_hours_indexes.append(line_index)
        line_index += 1
    print("Number of opening_hours entries:", len(opening_hours_indexes))
    return all_lines, opening_hours_indexes


def write_opening_hours_to_geojson(path_to_output_geojson, all_lines, opening_hours_indexes):
    """
        all_lines (list): all lines in the overall file
        
    """
    with open(path_to_output_geojson, "w+") as my_file:
        my_file.write(all_lines[0])
        for line in opening_hours_indexes[:-1]:
            my_file.write(all_lines[line])
        # remove comma from final line
        final_line = all_lines[opening_hours_indexes[-1]].replace(",\n","\n")
        my_file.write(final_line)
        my_file.write(all_lines[-1])
        my_file.close()


In [2]:
# all_lines, opening_hours_indexes = get_opening_hours_from_osm_data(path_to_osm_geojson="../Data/shapes/Montreal.osm.geojson")
# write_opening_hours_to_geojson(all_lines=all_lines, opening_hours_indexes=opening_hours_indexes, path_to_output_geojson="../Data/model_inputs/mtl_opening_hours.geojson")
##then
# COLUMNS_TO_USE = ['name','opening_hours','amenity','tourism', 'shop', 'healthcare', 'leisure', 'sport','craft', 'building','geometry']
# all_opening_hours[COLUMNS_TO_USE].to_csv('../Data/model_inputs/mtl_opening_hours_cleaned.csv',index=False, encoding='utf-8')

# Opening Hour analysis

In [122]:
import geopandas as gpd
import pandas as pd
import shapely
import datetime
import re
import numpy as np

In [4]:
all_opening_hours = gpd.read_file("../../Data/model_inputs/mtl_opening_hours.geojson")

X_all = pd.read_csv('../../Data/model_inputs/gdf_2017_X.csv')
y_all = pd.read_csv('../../Data/model_inputs/gdf_2017_y.csv')


In [5]:
all_opening_hours = all_opening_hours.dropna(subset=['opening_hours'])

In [6]:
all_opening_hours.head()

Unnamed: 0,url,name,phone,name:en,tourism,wikidata,wikipedia,wheelchair,alt_name:en,alt_name:fr,...,u-pick,building:part,fuel:octane_92,fuel:octane_98,layer,lit,service:vehicle:tyres,service:vehicle:suspension,playground:theme,geometry
0,http://www.mbam.qc.ca,Musée des Beaux-Arts de Montréal,+1 514 285 2000,Montreal Museum of Fine Arts,museum,Q860812,en:Montreal Museum of Fine Arts,yes,Museum of Fine Arts,Musée des Beaux-Arts,...,,,,,,,,,,POINT (-73.57940 45.49870)
1,,Noodles Star,+1-514-932-2888,,,,,limited,,,...,,,,,,,,,,POINT (-73.58042 45.49307)
2,,,,,,,,,,,...,,,,,,,,,,POINT (-73.51726 45.49226)
3,,Maxi,+1-450-672-3201,,,,,,,,...,,,,,,,,,,POINT (-73.46781 45.46025)
4,,Ben & Florentine,,,,,,,,,...,,,,,,,,,,POINT (-73.46731 45.46930)


In [7]:
all_opening_hours['opening_hours']

0                               We-Su 10:00-17:00; Mo off
1                                       Mo-Su 11:00-23:00
2                                              6:00-22:00
3                                       Mo-Su 08:00-22:00
4                       Mo-Sa 06:00-15:00, Su 07:00-15:00
                              ...                        
2044                 Mo-Fr 09:00-18:00; Sa-Su 09:00-17:00
2045                    Mo-Fr 08:30-18:00; Sa 08:00-13:00
2046    Apr-Nov: Th-Tu 08:00-17:00;Nov-Apr: Su 08:00-1...
2047                                           6:00-22:00
2048                                           6:00-22:00
Name: opening_hours, Length: 2042, dtype: object

In [8]:
all_opening_hours['shop'].value_counts()

supermarket         98
convenience         72
clothes             65
hairdresser         46
bakery              39
                    ..
anime                1
brewing_supplies     1
fabric               1
radiotechnics        1
second_hand          1
Name: shop, Length: 107, dtype: int64

In [9]:
def change_crs_of_X(X, crs_from="EPSG:4236",crs_to="EPSG:3347"):
    """
        Function for translating the data into Canada Lambert projection 
        EPSG: 3347, so that the base unit is 1 m
    """
    new_X = X.copy()
    new_X['geometry'] = new_X.apply(lambda row: shapely.geometry.Point(row['long'],row['lat']),axis=1)
    new_X = gpd.GeoDataFrame(new_X,crs=crs_from)
    new_X = new_X.to_crs(crs_to)
    return new_X

In [10]:
## append end time to trips
start_end_times = gpd.read_file('../../Data/mtl_trajet/mtl_trajet_2017_final.shp')[['id_trip','starttime','endtime']]
X_all = X_all.merge(start_end_times, on='id_trip')

In [11]:
buffer_size_m = 50
geo_X = change_crs_of_X(X_all)

geo_X['buffers'] = geo_X['geometry'].apply(lambda row: row.buffer(buffer_size_m))

# get a geo-dataframe with only the Trip ID,trip end buffer and trip purpose    
only_buffers = geo_X[['id_trip','endtime','buffers']]
only_buffers =  gpd.GeoDataFrame(only_buffers.rename(columns={'buffers':'geometry'}), crs="EPSG:3347")

In [12]:
all_opening_hours = gpd.GeoDataFrame(all_opening_hours, crs='EPSG:4326')
all_opening_hours = all_opening_hours.to_crs('EPSG:3347')

In [13]:
def get_prop_open_hours(buffers, opening_hours):
    joined_data = gpd.sjoin(buffers, opening_hours, op='intersects', how='left')
    return joined_data

In [14]:
joined_data = get_prop_open_hours(only_buffers, all_opening_hours)

In [18]:
joined_data = joined_data.dropna(subset=['opening_hours'])

In [66]:
for i in joined_data['opening_hours'].unique():
    print(i)

Mo-Tu 09:00-18:00;We-Fr 09:00-21:00; Sa 10:00-17:00; Su 11:00-17:00
Mo-Fr 6:30-15:00; Sa-Su 6:30-16:00
Mo-Su 11:30-22:00
Tu-Su 08:00-18:00
Mo-Su 07:00-23:00
Mo-Su 11:00-04:00
Mo-Su 15:00-03:00
Fr-Sa 22:00-03:00
Tu-Su 11:00-21:00
Tu-Th 17:00-22:00; Fr-Sa 12:00-21:00; Su 15:00-21:00
09:00-18:00
Mo-Su 11:00-23:00
Tu-Th 12:00-23:00, Fr-Sa 12:00-01:00, Su 12:30-23:00
Mo-Su 08:00-22:00
12:00-21:00
Mo-Fr 10:00-21:00; Sa 10:00-18:00; Su 11:00-17:00
Mo-We 10:00-19:00; Th,Fr 10:00-21:00; Sa 10:00-17:00; Su 11:00-17:00
Mo-Fr 06:30-16:00; Sa,Su 07:00-17:00
Mo-Fr 10:00-22:00; Sa,Su 11:00-22:00
Mo-Sa 09:00-18:00; Th,Fr 09:00-19:00; Su 10:00-16:00
Tu-Fr 10:00-19:00; Sa 11:00-17:00
Mo-Th 10:00-21:00; Tu-Su 10:00-18:00
Mo-Fr 08:00-21:00; Sa 09:00-17:00; Su 10:00-15:00
24/7
Mo-Fr 09:00-21:00; Sa-Su 09:00-18:00
Mo,Tu 16:00-01:00; We-Fr 16:00-03:00; Sa,Su 10:00-15:00,16:00-03:00
Mo-Su 20:00-01:00
Mo-Fr 12:00-19:00; Tu-We 10:00-19:00; Sa-Su 10:00-17:00
Mo-Fr 09:00-17:00; We-Th 09:00-20:00
We-Su 10:00-17:00

In [33]:
joined_data['endtime'] = pd.to_datetime(joined_data['endtime'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  joined_data['endtime'] = pd.to_datetime(joined_data['endtime'])


In [71]:
IS_OPEN = ["24/7", "All Day, 7/24", "7/24"]
DAYOFWEEK_NAMES = {0:"Mo", 1:"Tu", 2:"We", 3:"Th", 4:"Fr", 5:"Sa", 6:"Su"}
LIST_DAYS = list(DAYOFWEEK_NAMES.values())
# EDGE CASES
# if no day
# Mo off; 
# Sa, Su off
# May-Dec
# 17:00+
# No time
# dim-lun
# until 22h
# 10:00AM to 5:00PM

In [116]:
datetime.datetime.strptime('03:55', '%H:%M').time() == datetime.datetime.strptime('03:55', '%H:%M').time()

True

In [136]:
test_ot = "Mo-We 11:30-24:00, Th 11:30-02:00, Fr,Sa 11:30-03:00, Su 11:30-24:00"

In [139]:
extract_time(test_ot)

TODO:  ['Mo', 'We 11:30', '24:00, Th 11:30', '02:00, Fr,Sa 11:30', '03:00, Su 11:30', '24:00']


In [138]:
def work_out_if_open(end_time, opening_time):
    day_of_week = DAYOFWEEK_NAMES[end_time.dayofweek]
    time_of_day = str(end_time.hour) + ':' + str(end_time.minute)
    
    for opening_time_part in opening_time.split(';'):
        # check if opening time string contains any days in it (will return True or False)
        all_days_not_in_opening_time = all([day not in opening_time_part for day in LIST_DAYS])

        ## 1. Check if just the time is provided or always open
        if all_days_not_in_opening_time:
            bound_start, bound_end = extract_time(opening_time_part, day_of_week)
            if within_opening_hours(bound_start, bound_end, time_of_day):
                return True
        elif opening_time_part in IS_OPEN:
            return True
        
        ## 2. Check day if open during
        elif day_of_week in opening_time_part:
            bound_start, bound_end = extract_time(opening_time_part, day_of_week)
            
        else:
            pass
        
    return False

        
def extract_time(opening_time_part, day_of_week=None):
    split_parts = opening_time_part.split('-')
    if not day_of_week:
        if len(split_parts) == 2:
            return [datetime.datetime.strptime(part, '%H:%M').time() for part in split_parts]
    
        else:
            print('TODO: ', split_parts)
    

            
def within_opening_hours(bound_start, bound_end, time_of_day):
    # if the opening time is 22:00 - 03:00 for example
    if bound_start > bound_end:
        is_within = bound_start < time_of_day
    else:
        is_within = bound_start < time_of_day < bound_end
        
    return is_within
        

In [135]:
list(DAYOFWEEK_NAMES.values()).index(day_of_week)

0

In [51]:
test_date = joined_data['endtime'].iloc[0]
test_date

Timestamp('2017-09-18 17:36:34')

In [63]:
day_of_week = DAYOFWEEK_NAMES[test_date.dayofweek]
time_of_day = str(test_date.hour) + ':' + str(test_date.minute)
time_of_trip_end = day_of_week + ' ' + time_of_day

In [65]:
time_of_trip_end

'Mo 17:36'