In [5]:
def get_opening_hours_from_osm_data(path_to_osm_geojson):
    """
        path_to_osm_geojson (geojson): path to the geojson file. We get data from https://download.bbbike.org/osm/bbbike/ 
    """

    f = open(path_to_osm_geojson, "r")
    all_lines = f.readlines()
    print("Number of lines in file:", len(all_lines))

    line_index = 0
    opening_hours_indexes = []
    
    f = open(path_to_osm_geojson, "r")
    for x in f:
        if 'opening_hours' in x:
            opening_hours_indexes.append(line_index)
        line_index += 1
    print("Number of opening_hours entries:", len(opening_hours_indexes))
    return all_lines, opening_hours_indexes


def write_opening_hours_to_geojson(path_to_output_geojson, all_lines, opening_hours_indexes):
    """
        all_lines (list): all lines in the overall file
        
    """
    with open(path_to_output_geojson, "w+") as my_file:
        my_file.write(all_lines[0])
        for line in opening_hours_indexes[:-1]:
            my_file.write(all_lines[line])
        # remove comma from final line
        final_line = all_lines[opening_hours_indexes[-1]].replace(",\n","\n")
        my_file.write(final_line)
        my_file.write(all_lines[-1])
        my_file.close()

In [6]:
all_lines, opening_hours_indexes = get_opening_hours_from_osm_data(path_to_osm_geojson="../Data/shapes/Montreal.osm.geojson")

Number of lines in file: 973099
Number of opening_hours entries: 2049


In [8]:
write_opening_hours_to_geojson(all_lines=all_lines, opening_hours_indexes=opening_hours_indexes, path_to_output_geojson="../Data/model_inputs/mtl_opening_hours.geojson")

In [10]:
import geopandas as gpd

In [11]:
all_opening_hours = gpd.read_file("../Data/model_inputs/mtl_opening_hours.geojson")

In [12]:
all_opening_hours['opening_hours']

0                               We-Su 10:00-17:00; Mo off
1                                       Mo-Su 11:00-23:00
2                                              6:00-22:00
3                                       Mo-Su 08:00-22:00
4                       Mo-Sa 06:00-15:00, Su 07:00-15:00
                              ...                        
2044                 Mo-Fr 09:00-18:00; Sa-Su 09:00-17:00
2045                    Mo-Fr 08:30-18:00; Sa 08:00-13:00
2046    Apr-Nov: Th-Tu 08:00-17:00;Nov-Apr: Su 08:00-1...
2047                                           6:00-22:00
2048                                           6:00-22:00
Name: opening_hours, Length: 2049, dtype: object

In [13]:
all_opening_hours['shop'].value_counts()

supermarket     98
convenience     72
clothes         65
hairdresser     46
bakery          39
                ..
carpet           1
anime            1
food             1
estate_agent     1
beverages        1
Name: shop, Length: 107, dtype: int64

In [14]:
all_opening_hours['tourism'].value_counts()

museum         14
gallery         5
attraction      3
guest_house     2
apartment       2
yes             2
hotel           2
Name: tourism, dtype: int64

In [15]:
all_opening_hours['amenity'].value_counts() # pharamacy, school

restaurant                357
cafe                      135
fast_food                  97
pharmacy                   59
bank                       55
bar                        41
library                    31
fuel                       28
pub                        18
bicycle_repair_station     16
clinic                     13
atm                        12
veterinary                 12
post_office                12
dentist                    11
ice_cream                  10
car_rental                 10
social_facility             8
parking                     7
community_centre            6
charging_station            6
bureau_de_change            6
marketplace                 4
toilets                     3
recycling                   3
shop|clothes                3
childcare                   3
nightclub                   3
driving_school              3
food_court                  2
car_wash                    2
dojo                        2
theatre                     2
embassy   

In [17]:
all_opening_hours['healthcare'].value_counts()

pharmacy           42
dentist             4
clinic              4
alternative         2
physiotherapist     2
centre              2
hospital            1
Name: healthcare, dtype: int64

In [18]:
all_opening_hours['leisure'].value_counts()

park              34
fitness_centre    11
sports_centre      9
ice_rink           4
garden             2
picnic_table       2
tanning_salon      2
hackerspace        2
playground         2
dog_park           2
bowling_alley      2
sauna              1
dance              1
horse_riding       1
Name: leisure, dtype: int64

In [19]:
all_opening_hours['sport'].value_counts()

climbing                                          3
skating;hockey                                    2
billiards                                         2
10pin                                             2
yoga;pilates;essentrics                           2
Skating, Hockey, Basketball                       2
All                                               1
karate;boxing;kickboxing;exercise;martial_arts    1
Kenjutsu                                          1
kendo                                             1
Name: sport, dtype: int64

In [20]:
all_opening_hours['building'].value_counts()

yes                212
retail              40
roof                10
commercial           8
civic                6
public               6
public_building      2
farm_auxiliary       2
library              2
garage               2
university           2
residential          2
hotel                2
office               1
warehouse            1
Name: building, dtype: int64

In [21]:
all_opening_hours['craft'].value_counts()

tailor                4
shoemaker             4
electronics_repair    3
brewery               2
electrician           1
construction          1
sign_maker            1
gardener              1
optician              1
mason                 1
floorer               1
locksmith             1
caterer               1
handicraft            1
Name: craft, dtype: int64

In [16]:
for col in all_opening_hours.columns[1:]:
    print(col)
    print(all_opening_hours[col].value_counts())
    

name
McDonald's        25
Pharmaprix        23
Tim Hortons       21
Service Canada    16
A&W               10
                  ..
Le Blueboy         1
Aqu@net            1
FOFA Gallery       1
Globex 2000        1
Touchattou         1
Name: name, Length: 1554, dtype: int64
phone
+1 514 937 7620    3
+1 514 485 3543    2
+1-450-736-1323    2
+1-514-271-9622    2
+1-800-363-9028    2
                  ..
+1-514-419-7997    1
+1-514-483-7777    1
+1-514-625-3061    1
+1-514-663-8042    1
+1 514 274 0919    1
Name: phone, Length: 839, dtype: int64
name:en
National Bank                                           3
Heinz Garage                                            2
Hymus Auto Parts                                        2
Costco Gas                                              2
Shoppers Drug Mart                                      2
                                                       ..
Olive + gourmando                                       1
LOV                                

Name: service:bicycle:diy, dtype: int64
comment
Wednesday is "women-trans-femme only"    1
Name: comment, dtype: int64
created_by
iLOE 1.9    2
Name: created_by, dtype: int64
building:levels
1      6
3      4
0      3
2.5    2
5      2
7      2
Name: building:levels, dtype: int64
bicycle_parking
stands    1
Name: bicycle_parking, dtype: int64
male
yes    4
no     2
Name: male, dtype: int64
female
no     3
yes    2
Name: female, dtype: int64
drinking_water
yes    1
Name: drinking_water, dtype: int64
image
File:Parc de Dieppe, panneau d'accueil.jpg                                                                                  2
http://www.britandchips.com/english/wp-content/uploads/2012/07/britpic1_lg2.jpeg                                            1
http://www.shaktirockgym.com/wp-content/uploads/2012/ftp/IMG_0001.jpg                                                       1
https://commons.wikimedia.org/wiki/File:L%27Insoumise_Montreal.jpg                                              

Name: socket:type1_combo, dtype: int64
languages
Français, English    1
Name: languages, dtype: int64
bic
BOFMCAM2    1
Name: bic, dtype: int64
service:bicycle:fitting
yes    1
Name: service:bicycle:fitting, dtype: int64
surface
asphalt    4
grass      2
wood       1
Name: surface, dtype: int64
cash_withdrawal
No    1
Name: cash_withdrawal, dtype: int64
operator:wikidata
Q483551    1
Name: operator:wikidata, dtype: int64
operator:wikipedia
en:Walmart    1
Name: operator:wikipedia, dtype: int64
ref:isil
QMBN    2
Name: ref:isil, dtype: int64
name:formal
Grande Bibliothèque    2
Name: name:formal, dtype: int64
building:material
glass    2
Name: building:material, dtype: int64
building:levels:underground
3    2
Name: building:levels:underground, dtype: int64
loc_name
Costco               2
Costco rue Bridge    2
Name: loc_name, dtype: int64
membership
yes    6
Name: membership, dtype: int64
attribution
Natural Resources Canada    12
Name: attribution, dtype: int64
canvec:CODE
2010252    6

In [25]:
COLUMNS_TO_USE = ['name','opening_hours','amenity','tourism', 'shop', 'healthcare', 'leisure', 'sport','craft', 'building','geometry']

In [36]:
all_opening_hours[COLUMNS_TO_USE].to_csv('../Data/model_inputs/mtl_opening_hours_cleaned.csv',index=False, encoding='utf-8')