## Importing Libraries And Setting Working Directory

In [62]:
import pandas as pd
import os # use this to access your environment variables
import requests # this will be used to call the APIs
import numpy as np
import time


In [None]:
os.chdir("C:/Users/Laven/Documents/Data_Analysis/Capstone_Project")

## Getting API Key


In [7]:
google_api_key = os.getenv('GOOGLE_PLACES_API_KEY')

## Querying Database

In [5]:
google_parameters = {
    'query': 'bar in Montreal',
    'key': google_api_key,
    'location': '45.5088,-73.5878',
    'radius': 30000, 
}

In [19]:
data = []
response = requests.get('https://maps.googleapis.com/maps/api/place/textsearch/json', params=google_parameters)


In [20]:
while True:
    if response.status_code == 200:
        response_json = response.json()
        results = response_json.get('results', [])
        for place in results:
            data.append({
                'Name': place.get('name'),
                'Address': place.get('formatted_address'),
                'Latitude': place['geometry']['location']['lat'],
                'Longitude': place['geometry']['location']['lng'],
                'Rating': place.get('rating', 'N/A'),  # Default if not available
                'Price Level': place.get('price_level', 'N/A')  # Default if not available
            })
        next_page_token = response_json.get('next_page_token')
        if not next_page_token:
            break
        time.sleep(2)  # Pause before making the next request
        google_parameters['pagetoken'] = next_page_token
        response = requests.get('https://maps.googleapis.com/maps/api/place/textsearch/json', params=google_parameters)
    else:
        print(f"Request failed with status code {response.status_code}")
        break

    

In [31]:
data

[{'Name': 'Bootlegger cocktail bar & cuisine',
  'Address': '3481 St Laurent Blvd 2eme Etage, Montreal, Quebec H2X 2T6, Canada',
  'Latitude': 45.5131942,
  'Longitude': -73.5706793,
  'Rating': 4.6,
  'Price Level': 2},
 {'Name': 'Cloakroom Bar',
  'Address': '2175 Rue de la Montagne #100, Montréal, QC H3G 1Z8, Canada',
  'Latitude': 45.4994655,
  'Longitude': -73.5778416,
  'Rating': 4.7,
  'Price Level': 3},
 {'Name': 'Bar Courcelle',
  'Address': '4685 Notre-Dame St W, Montreal, Quebec H4C 1S7, Canada',
  'Latitude': 45.472933,
  'Longitude': -73.5888025,
  'Rating': 4.4,
  'Price Level': 2},
 {'Name': 'Philemon Bar',
  'Address': '111 Saint-Paul St W, Montreal, Quebec H2Y 1Z5, Canada',
  'Latitude': 45.5040576,
  'Longitude': -73.5547406,
  'Rating': 4,
  'Price Level': 2},
 {'Name': 'Grumpys Bar',
  'Address': '1242 Bishop St, Montreal, Quebec H3G 2E3, Canada',
  'Latitude': 45.4960667,
  'Longitude': -73.5757711,
  'Rating': 4.4,
  'Price Level': 1},
 {'Name': 'La Distillerie no

In [32]:
len(data)

60

In [63]:
# Convert the data into a DataFrame
montreal_bars_df = pd.DataFrame(data)

In [23]:
montreal_bars_df



Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level
0,Bootlegger cocktail bar & cuisine,"3481 St Laurent Blvd 2eme Etage, Montreal, Que...",45.513194,-73.570679,4.6,2.0
1,Cloakroom Bar,"2175 Rue de la Montagne #100, Montréal, QC H3G...",45.499465,-73.577842,4.7,3.0
2,Bar Courcelle,"4685 Notre-Dame St W, Montreal, Quebec H4C 1S7...",45.472933,-73.588802,4.4,2.0
3,Philemon Bar,"111 Saint-Paul St W, Montreal, Quebec H2Y 1Z5,...",45.504058,-73.554741,4.0,2.0
4,Grumpys Bar,"1242 Bishop St, Montreal, Quebec H3G 2E3, Canada",45.496067,-73.575771,4.4,1.0
5,La Distillerie no.1,"300 Ontario St E, Montreal, Quebec H2X 1H6, Ca...",45.51481,-73.565424,4.6,2.0
6,London Pub,"1337 Maisonneuve Blvd W, Montreal, Quebec H3G ...",45.498297,-73.577499,4.3,2.0
7,N sur Mackay,"1244 Mackay St, Montreal, Quebec H3G 2H4, Canada",45.4953,-73.576237,4.5,2.0
8,Bar Le Mal Nécessaire,"1106B St Laurent Blvd, Montreal, Quebec H2Z 1J...",45.508523,-73.561196,4.5,2.0
9,Nhậu bar,"600 William St, Montreal, Quebec H3C 1N6, Canada",45.499734,-73.556187,4.2,


### Roadblock. At this point I checked with the documentation and found out that Google Places API returns a maximum of 60 locations per API call. The problem is not with my code but the API itself. Therefore I have prepared a list of the coordinates of all the subway, light rail, and citybike stations in montreal. I'm going to run my an API query for all of them and then concatenate the resulting data frames together and remove duplicates.

In [72]:
lat_lon_list_full_montreal = pd.read_csv("lat_lon_list_full_montreal_ex.csv")


In [73]:
lat_lon_list_full_montreal

Unnamed: 0,Latitude,Longitude
0,45.510253,-73.556777
1,45.510599,-73.566925
2,45.513294,-73.550578
3,45.514339,-73.561685
4,45.518143,-73.568004
...,...,...
475,45.440000,-73.490000
476,45.440000,-73.480000
477,45.440000,-73.470000
478,45.440000,-73.460000


In [246]:
append_this = [(45.52, -73.66), (45.52, -73.65), (45.52, -73.64), (45.52, -73.63), (45.52, -73.62), (45.52, -73.61), (45.52, -73.6), (45.52, -73.59), (45.52, -73.58), (45.52, -73.57), (45.52, -73.76)]

In [248]:
new_tuples_df = pd.DataFrame(append_this, columns=['Latitude', 'Longitude'])

In [249]:
new_tuples_df

Unnamed: 0,Latitude,Longitude
0,45.52,-73.66
1,45.52,-73.65
2,45.52,-73.64
3,45.52,-73.63
4,45.52,-73.62
5,45.52,-73.61
6,45.52,-73.6
7,45.52,-73.59
8,45.52,-73.58
9,45.52,-73.57


In [132]:
lat_lon_list_full_montreal = pd.concat([lat_lon_list_full_montreal, new_tuples_df], ignore_index=True)


In [133]:
lat_lon_list_full_montreal

Unnamed: 0,Latitude,Longitude
0,45.510253,-73.556777
1,45.510599,-73.566925
2,45.513294,-73.550578
3,45.514339,-73.561685
4,45.518143,-73.568004
...,...,...
511,45.520000,73.600000
512,45.520000,73.590000
513,45.520000,73.580000
514,45.520000,73.570000


In [140]:
lat_lon_list_full_montreal.tail(50)

Unnamed: 0,Latitude,Longitude
466,45.44,-73.58
467,45.44,-73.57
468,45.44,-73.56
469,45.44,-73.55
470,45.44,-73.54
471,45.44,-73.53
472,45.44,-73.52
473,45.44,-73.51
474,45.44,-73.5
475,45.44,-73.49


In [135]:
lat_lon_list_full_montreal.to_csv('lat_lon_list_full_montreal_4', index=False)

In [136]:
lat_lon_list_full_montreal_1 = lat_lon_list_full_montreal.iloc[:10,:]
lat_lon_list_full_montreal_2 = lat_lon_list_full_montreal.iloc[10:20,:]
lat_lon_list_full_montreal_3 = lat_lon_list_full_montreal.iloc[20:30,:]
lat_lon_list_full_montreal_4 = lat_lon_list_full_montreal.iloc[30:40,:]
lat_lon_list_full_montreal_5 = lat_lon_list_full_montreal.iloc[40:50,:]
lat_lon_list_full_montreal_6 = lat_lon_list_full_montreal.iloc[50:60,:]
lat_lon_list_full_montreal_7 = lat_lon_list_full_montreal.iloc[60:70,:]
lat_lon_list_full_montreal_8 = lat_lon_list_full_montreal.iloc[70:80,:]
lat_lon_list_full_montreal_9 = lat_lon_list_full_montreal.iloc[80:90,:]
lat_lon_list_full_montreal_10 = lat_lon_list_full_montreal.iloc[90:100,:]
lat_lon_list_full_montreal_11 = lat_lon_list_full_montreal.iloc[100:110,:]
lat_lon_list_full_montreal_12 = lat_lon_list_full_montreal.iloc[110:120,:]
lat_lon_list_full_montreal_13 = lat_lon_list_full_montreal.iloc[120:130,:]
lat_lon_list_full_montreal_14 = lat_lon_list_full_montreal.iloc[130:140,:]
lat_lon_list_full_montreal_15 = lat_lon_list_full_montreal.iloc[140:150,:]
lat_lon_list_full_montreal_16 = lat_lon_list_full_montreal.iloc[150:160,:]
lat_lon_list_full_montreal_17 = lat_lon_list_full_montreal.iloc[160:170,:]
lat_lon_list_full_montreal_18 = lat_lon_list_full_montreal.iloc[170:180,:]
lat_lon_list_full_montreal_19 = lat_lon_list_full_montreal.iloc[180:190,:]
lat_lon_list_full_montreal_20 = lat_lon_list_full_montreal.iloc[190:200,:]
lat_lon_list_full_montreal_21 = lat_lon_list_full_montreal.iloc[200:210,:]
lat_lon_list_full_montreal_22 = lat_lon_list_full_montreal.iloc[210:220,:]
lat_lon_list_full_montreal_23 = lat_lon_list_full_montreal.iloc[220:230,:]
lat_lon_list_full_montreal_24 = lat_lon_list_full_montreal.iloc[230:240,:]
lat_lon_list_full_montreal_25 = lat_lon_list_full_montreal.iloc[240:250,:]
lat_lon_list_full_montreal_26 = lat_lon_list_full_montreal.iloc[250:260,:]
lat_lon_list_full_montreal_27 = lat_lon_list_full_montreal.iloc[260:270,:]
lat_lon_list_full_montreal_28 = lat_lon_list_full_montreal.iloc[270:280,:]
lat_lon_list_full_montreal_29 = lat_lon_list_full_montreal.iloc[280:290,:]
lat_lon_list_full_montreal_30 = lat_lon_list_full_montreal.iloc[290:300,:]
lat_lon_list_full_montreal_31 = lat_lon_list_full_montreal.iloc[300:310,:]
lat_lon_list_full_montreal_32 = lat_lon_list_full_montreal.iloc[310:320,:]
lat_lon_list_full_montreal_33 = lat_lon_list_full_montreal.iloc[320:330,:]
lat_lon_list_full_montreal_34 = lat_lon_list_full_montreal.iloc[330:340,:]
lat_lon_list_full_montreal_35 = lat_lon_list_full_montreal.iloc[340:350,:]
lat_lon_list_full_montreal_36 = lat_lon_list_full_montreal.iloc[350:360,:]
lat_lon_list_full_montreal_37 = lat_lon_list_full_montreal.iloc[360:370,:]
lat_lon_list_full_montreal_38 = lat_lon_list_full_montreal.iloc[370:380,:]
lat_lon_list_full_montreal_39 = lat_lon_list_full_montreal.iloc[380:390,:]
lat_lon_list_full_montreal_40 = lat_lon_list_full_montreal.iloc[390:400,:]
lat_lon_list_full_montreal_41 = lat_lon_list_full_montreal.iloc[400:410,:]
lat_lon_list_full_montreal_42 = lat_lon_list_full_montreal.iloc[410:420,:]
lat_lon_list_full_montreal_43 = lat_lon_list_full_montreal.iloc[420:430,:]
lat_lon_list_full_montreal_44 = lat_lon_list_full_montreal.iloc[430:440,:]
lat_lon_list_full_montreal_45 = lat_lon_list_full_montreal.iloc[440:450,:]
lat_lon_list_full_montreal_46 = lat_lon_list_full_montreal.iloc[450:460,:]
lat_lon_list_full_montreal_47 = lat_lon_list_full_montreal.iloc[460:470,:]
lat_lon_list_full_montreal_48 = lat_lon_list_full_montreal.iloc[470:480,:]
lat_lon_list_full_montreal_49 = lat_lon_list_full_montreal.iloc[480:490,:]
lat_lon_list_full_montreal_50 = lat_lon_list_full_montreal.iloc[480:490,:]
lat_lon_list_full_montreal_51 = lat_lon_list_full_montreal.iloc[490:500,:]
lat_lon_list_full_montreal_52 = lat_lon_list_full_montreal.iloc[500:510,:]
lat_lon_list_full_montreal_53 = lat_lon_list_full_montreal.iloc[510:520,:]

In [76]:
lat_lon_list_full_montreal_1

Unnamed: 0,Latitude,Longitude
0,45.510253,-73.556777
1,45.510599,-73.566925
2,45.513294,-73.550578
3,45.514339,-73.561685
4,45.518143,-73.568004
5,45.51086,-73.54983
6,45.507144,-73.555119
7,45.50761,-73.551836
8,45.502054,-73.573465
9,45.506314,-73.559671


In [77]:
lat_lon_list_full_montreal_2

Unnamed: 0,Latitude,Longitude
10,45.505312,-73.560891
11,45.504242,-73.553469
12,45.499965,-73.556154
13,45.50294,-73.56064
14,45.501143,-73.560773
15,45.500643,-73.565408
16,45.50206,-73.56295
17,45.521564,-73.570367
18,45.541448,-73.598394
19,45.49837,-73.560556


In [168]:
lat_lon_list_full_montreal_53

Unnamed: 0,Latitude,Longitude
510,45.52,73.61
511,45.52,73.6
512,45.52,73.59
513,45.52,73.58
514,45.52,73.57
515,45.52,73.76


In [78]:
data = []  # This will hold data for all bars from all queries



## Running API Call Loop.

## The Loop Returns A Maximum Of 60 Results Per Call. So I'm Running It Many Times Over A List Of Latitude-Longitude Coordinate Pairs. This Will Find All The Restaurants In Montreal That Are In The Downtown Area Or The Transit Serviced Areas Covered By My Coordinate Pairs.

## This Will Have Enough Redundancy That The Same Restaurants Will Be Found Many Times. The Duplicates Will Be Filtered Out

In [250]:
# Loop through each coordinate pair
for index, row in new_tuples_df.iterrows():
    google_parameters = {
        'query': 'bars',
        'key': google_api_key,
        'location': f"{row['Latitude']},{row['Longitude']}",
        'radius': 1000,
        'type': 'bar'
    }

    response = requests.get('https://maps.googleapis.com/maps/api/place/textsearch/json', params=google_parameters)

    # Collect results for the current location
    local_data = []
    while True:
        if response.status_code == 200:
            response_json = response.json()
            results = response_json.get('results', [])
            for place in results:
                local_data.append({
                    'Name': place.get('name'),
                    'Address': place.get('formatted_address'),
                    'Latitude': place['geometry']['location']['lat'],
                    'Longitude': place['geometry']['location']['lng'],
                    'Rating': place.get('rating', 'N/A'),
                    'Price Level': place.get('price_level', 'N/A')
                })

            # Check for a next page token and continue if present
            next_page_token = response_json.get('next_page_token')
            if not next_page_token:
                break
            time.sleep(2)
            google_parameters['pagetoken'] = next_page_token
            response = requests.get('https://maps.googleapis.com/maps/api/place/textsearch/json', params=google_parameters)
        else:
            print(f"Request failed for {row['Latitude']},{row['Longitude']} with status code {response.status_code}")
            break
    # Extend the master data list with the results from this local query
    data.extend(local_data)

In [80]:
# Convert the data collected so far into a DataFrame
bars_1_df = pd.DataFrame(data)

In [81]:
bars_1_df

Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level
0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.552500,3.9,2
1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1
2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.515570,-73.564079,4.2,2
3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.511730,-73.561981,3.8,2
4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2
...,...,...,...,...,...,...
595,Bar Pamplemousse,"1579 St Laurent Blvd, Montreal, Quebec H2X 2S9",45.510883,-73.565665,4.5,2
596,Mad Hatter Pub,"1240 Crescent St, Montreal, Quebec H3G 2A9",45.496702,-73.575262,4.3,1
597,Pub Quartier Latin,"318 Ontario St E, Montreal, Quebec H2X 1H6",45.515003,-73.565362,4.6,2
598,HOOTERS,"1433 Crescent St, Montreal, Quebec H3G 2B2",45.497673,-73.576425,3.2,2


In [83]:
bars_1_df.nunique(axis=0)

Name           125
Address        124
Latitude       126
Longitude      126
Rating          20
Price Level      4
dtype: int64

In [88]:
# Convert the data collected so far into a DataFrame
bars_2_df = pd.DataFrame(data)

In [89]:
bars_2_df


Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level
0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.552500,3.9,2
1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1
2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.515570,-73.564079,4.2,2
3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.511730,-73.561981,3.8,2
4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2
...,...,...,...,...,...,...
1195,Restaurant & Bar Le Kube,"Restaurant Hôtel, 900 Rue De la Gauchetière O,...",45.499151,-73.565686,3.4,
1196,Le Warehouse,"1446 Crescent St, Montreal, Quebec H3G 2B6",45.497574,-73.576840,4.0,1
1197,Bar Sarah B,"360 Rue Saint-Antoine O, Montréal, QC H2Y 3X4",45.502808,-73.560584,4.2,3
1198,Le Bureau Bar Tapas,"1642 Notre-Dame St W, Montreal, Quebec H3J 1M1",45.489383,-73.567560,4.4,2


In [91]:
bars_3_df = pd.DataFrame(data)

In [92]:
len(bars_3_df)

1800

In [94]:
bars_4_df = pd.DataFrame(data)
len(bars_4_df)

2400

In [96]:
bars_5_df = pd.DataFrame(data)
len(bars_5_df)

3000

In [98]:
bars_6_df = pd.DataFrame(data)
len(bars_6_df)

3600

In [100]:
bars_7_df = pd.DataFrame(data)
len(bars_7_df)

4200

In [102]:
bars_8_df = pd.DataFrame(data)
len(bars_8_df)

4800

In [113]:
bars_9_df = pd.DataFrame(data)
len(bars_9_df)

6000

In [115]:
bars_10_df = pd.DataFrame(data)
len(bars_10_df)

6600

In [117]:
bars_11_df = pd.DataFrame(data)
len(bars_11_df)

7200

In [120]:
bars_12_df = pd.DataFrame(data)
len(bars_12_df)

7800

In [138]:
bars_13_df = pd.DataFrame(data)
len(bars_13_df)

8400

In [141]:
bars_14_df = pd.DataFrame(data)
len(bars_14_df)

9000

In [143]:
bars_15_df = pd.DataFrame(data)
len(bars_15_df)

9600

In [145]:
bars_16_df = pd.DataFrame(data)
len(bars_16_df)

10200

In [147]:
bars_17_df = pd.DataFrame(data)
len(bars_17_df)

10800

In [149]:
bars_18_df = pd.DataFrame(data)
len(bars_18_df)

11340

In [151]:
bars_19_df = pd.DataFrame(data)
len(bars_19_df)

11940

In [154]:
bars_20_df = pd.DataFrame(data)
len(bars_20_df)

13140

In [156]:
bars_21_df = pd.DataFrame(data)
len(bars_21_df)

13740

In [160]:
bars_21_df.to_csv('bars_21_df', index=False)

In [162]:
bars_22_df = pd.DataFrame(data)
len(bars_22_df)

14340

In [164]:
bars_23_df = pd.DataFrame(data)
len(bars_23_df)

14940

In [166]:
bars_24_df = pd.DataFrame(data)
len(bars_24_df)

15540

In [169]:
bars_25_df = pd.DataFrame(data)
len(bars_25_df)

16140

In [171]:
bars_26_df = pd.DataFrame(data)
len(bars_26_df)

16740

In [173]:
bars_27_df = pd.DataFrame(data)
len(bars_27_df)

17340

In [175]:
bars_28_df = pd.DataFrame(data)
len(bars_28_df)

17940

In [177]:
bars_29_df = pd.DataFrame(data)
len(bars_29_df)

18540

In [179]:
bars_30_df = pd.DataFrame(data)
len(bars_30_df)

19140

In [181]:
bars_31_df = pd.DataFrame(data)
len(bars_31_df)

19740

In [183]:
bars_32_df = pd.DataFrame(data)
len(bars_32_df)

20340

In [186]:
bars_33_df = pd.DataFrame(data)
len(bars_33_df)

20940

In [188]:
bars_34_df = pd.DataFrame(data)
len(bars_34_df)

21540

In [190]:
bars_35_df = pd.DataFrame(data)
len(bars_35_df)

22140

In [192]:
bars_36_df = pd.DataFrame(data)
len(bars_36_df)

22740

In [194]:
bars_37_df = pd.DataFrame(data)
len(bars_37_df)

23340

In [196]:
bars_38_df = pd.DataFrame(data)
len(bars_38_df)

23940

In [198]:
bars_39_df = pd.DataFrame(data)
len(bars_39_df)

24540

In [200]:
bars_40_df = pd.DataFrame(data)
len(bars_40_df)

25140

In [202]:
bars_41_df = pd.DataFrame(data)
len(bars_41_df)

25740

In [204]:
bars_42_df = pd.DataFrame(data)
len(bars_42_df)

26340

In [206]:
bars_43_df = pd.DataFrame(data)
len(bars_43_df)

26940

In [208]:
bars_44_df = pd.DataFrame(data)
len(bars_44_df)

27540

In [210]:
bars_45_df = pd.DataFrame(data)
len(bars_45_df)

28140

In [212]:
bars_46_df = pd.DataFrame(data)
len(bars_46_df)

28740

In [214]:
bars_47_df = pd.DataFrame(data)
len(bars_47_df)

29340

In [216]:
bars_48_df = pd.DataFrame(data)
len(bars_48_df)

29940

In [218]:
bars_49_df = pd.DataFrame(data)
len(bars_49_df)

30540

In [220]:
bars_50_df = pd.DataFrame(data)
len(bars_50_df)

31140

In [222]:
bars_51_df = pd.DataFrame(data)
len(bars_51_df)

31740

In [224]:
bars_52_df = pd.DataFrame(data)
len(bars_52_df)


32340

In [226]:
bars_53_df = pd.DataFrame(data)
len(bars_53_df)



32700

In [252]:
bars_54_df = pd.DataFrame(data)

In [253]:
len(bars_54_df)

33360

In [227]:
bars_53_df

Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level
0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.552500,3.9,2
1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1
2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.515570,-73.564079,4.2,2
3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.511730,-73.561981,3.8,2
4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2
...,...,...,...,...,...,...
32695,Barvikha,"Konayev Ave 39, Shymkent",42.329149,69.601676,4.6,
32696,SVOYBAR.KZ,"Magzhan Zhumabaev Avenue 24, Astana 020000",51.140154,71.488687,2.9,
32697,Угли Bar & More Лаунж-бар,"Respublika Ave 23/1, Astana 010000",51.164357,71.426272,4.6,
32698,lounge bar,"9V4X+VM4, Stepnogorsk 020000",52.357152,71.899304,3.2,


# Find and filter out duplicates


In [None]:

duplicate_rows = bars_54_df.duplicated()


In [255]:
num_duplicates = duplicate_rows.sum()


In [256]:
num_duplicates

32765

In [257]:
duplicates = bars_53_df[bars_53_df.duplicated(keep=False)]


In [258]:
duplicates

Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level
0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.552500,3.9,2
1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1
2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.515570,-73.564079,4.2,2
3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.511730,-73.561981,3.8,2
4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2
...,...,...,...,...,...,...
32695,Barvikha,"Konayev Ave 39, Shymkent",42.329149,69.601676,4.6,
32696,SVOYBAR.KZ,"Magzhan Zhumabaev Avenue 24, Astana 020000",51.140154,71.488687,2.9,
32697,Угли Bar & More Лаунж-бар,"Respublika Ave 23/1, Astana 010000",51.164357,71.426272,4.6,
32698,lounge bar,"9V4X+VM4, Stepnogorsk 020000",52.357152,71.899304,3.2,


In [259]:
bars_in_montreal_collected_unique = bars_54_df.drop_duplicates(keep='first')

In [260]:
bars_in_montreal_collected_unique.tail(20)

Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level
29995,Guten Pub,"Mukimi Street 88, Tashkent, Toshkent Shahri, U...",41.285095,69.234993,4.1,
29996,Radisson Бар Cigar,"Saryarka Avenue 2, Astana 020000",51.159281,71.412689,5.0,
29997,Barsuk Restobar,"Молдагалиева 29Б, Atyrau",47.093408,51.915408,4.3,
29998,WWK Bar,"Prospekt Mangilik Yel. 27, Astana 020000",51.109234,71.43108,5.0,
29999,Baoli Lounge & Cafe Nur-Sultan,"Turkistan Street 16, Astana 020000",51.106972,71.424823,4.1,2.0
30043,Resto Bar 24/7,"R3FF+JC8, Karaganda 100000",49.824014,73.073505,5.0,
30046,Мята Lounge Мендикуловта,"Khadzhimukan Street 49, Almaty 050051",43.225372,76.954621,4.3,2.0
30047,Мята lounge на Кунаева,"Kunaev St 83, Almaty 050000",43.256972,76.949263,4.7,
30048,Kafe Gril' Bar Kabinet,"Nurken Abdirov Avenue 32, Karaganda 100000",49.805729,73.10121,5.0,
30051,Pivnoy Bar,"4FJH+H94, Astana 020000",51.131247,71.47838,0.0,


In [261]:
bars_in_montreal_collected_unique.to_csv('bars_in_montreal_collected_unique.csv', index=False)



In [263]:
bars_in_montreal_collected_unique.to_csv('bars_in_montreal_collected_unique_conv.csv', index=False)



In [264]:
bars_in_montreal_collected_unique.to_csv('bars_in_montreal_collected_unique_utf8.csv', index=False, encoding='utf-8-sig')


## Add GeoHashes

In [265]:
import geohash

# Function to generate geohash
def compute_geohash(row, precision=5):
    return geohash.encode(row['Latitude'], row['Longitude'], precision=precision)


In [266]:
# Apply the function to each row in the DataFrame
bars_in_montreal_collected_unique['Geohash'] = bars_in_montreal_collected_unique.apply(compute_geohash, axis=1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bars_in_montreal_collected_unique['Geohash'] = bars_in_montreal_collected_unique.apply(compute_geohash, axis=1)


In [267]:
bars_in_montreal_collected_unique


Unnamed: 0,Name,Address,Latitude,Longitude,Rating,Price Level,Geohash
0,Pub St-Paul,"124 Rue Saint-Paul E, Montréal, QC H2Y 1G6",45.506944,-73.552500,3.9,2,f25dy
1,Piranha Bar,"680 Saint-Catherine St W, Montreal, Quebec H3B...",45.502852,-73.569942,4.1,1,f25dv
2,Patrick's Pub,"1707 Saint Denis St, Montreal, Quebec H2X 3K4",45.515570,-73.564079,4.2,2,f25dy
3,MVP Resto-Bar Sportif,"200 St Catherine St E, Montreal, Quebec H2X 1L1",45.511730,-73.561981,3.8,2,f25dy
4,Pub John Michaels,"458 Pl. Jacques-Cartier, Montréal, QC H2Y 3Z2",45.508011,-73.553953,4.0,2,f25dy
...,...,...,...,...,...,...,...
30953,Tennisi Arena Pub,"Zhandosov Street 2, Almaty 050000",43.236895,76.913159,4.5,2,txwty
30957,"Лаунж-бар ""Gin""","R432+VFF, Nurken Abdirov Avenue, Karaganda 100000",49.804691,73.101182,4.1,,v8ggr
33155,Barbossa,"3956 A St Laurent Blvd, Montreal, Quebec H2W 1Y3",45.516651,-73.578984,4.1,2,f25dv
33236,KARAOKE BAR 3 MINOTS,"3812 St Laurent Blvd, Montreal, Quebec H2W 1X6",45.515533,-73.576472,3.4,1,f25dv


In [268]:
unique_geohashes = bars_in_montreal_collected_unique['Geohash'].nunique()
unique_geohashes


77

In [None]:
bars_in_montreal_collected_unique.to_csv('bars_in_montreal_collected_unique_utf8.csv', index=False, encoding='utf-8-sig')


This code worked to get all the bars in these areas of montreal. I accidently looped over some coordinates in the Eastern Hemisphere. (I already removed the code where I did that above. You cannot see it.) I removed the mistaken results in Excel.