In [1]:
# !conda install -c conda-forge geopy --yes 
# !pip install geocoder
# !pip install --upgrade pandas pgeocode
# !pip install pgeocode
# !conda install -c conda-forge folium=0.5.0 --yes
import time                             # For having a time gap in api calls
import pandas as pd                     # For Dataframes, plotting etc
import numpy as np                      # For Mathematical calculation 
import zipfile                          # For unzipping the web scraped files
import os                               # For correcting the file paths
import requests                         # For geting files and jsons
import json                             # For reading and wrangling json files
from bs4 import BeautifulSoup           # For scraping html data from websites
from pandas import json_normalize       # For formatting the jsons readable
import folium                           # For plotting maps
import matplotlib.pyplot as plt                # For plotting charts
import matplotlib.cm as cm              # For functions to handle colormaps
import matplotlib.colors as colors      # For colour maps
import pgeocode                         # For getting lat and lon for postal codes
from geopy.geocoders import Nominatim   # For address into latitude and longitude values on maps
from sklearn.cluster import KMeans      # import k-means from clustering stage
import warnings                         # For ignoreing all warnings 
from api_key import google_key          # For importing the API key hidden in the gitignore
from api_key import places_api_key
from pprint import pprint               # For printing json
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
# 11126	Business and Professional Services > Office > Corporate Coffee Shop
# 13032	Dining and Drinking > Cafe, Coffee, and Tea House
# 13033	Dining and Drinking > Cafe, Coffee, and Tea House > Bubble Tea Shop
# 13034	Dining and Drinking > Cafe, Coffee, and Tea House > Café
# 13035	Dining and Drinking > Cafe, Coffee, and Tea House > Coffee Shop
# 13036	Dining and Drinking > Cafe, Coffee, and Tea House > Tea Room
# 13063	Dining and Drinking > Cafe, Coffee, and Tea House > Pet Café
# 13372	Dining and Drinking > Restaurant > Turkish Restaurant > Turkish Coffeehouse

toronto_data = pd.read_csv('extracted_files/toronto_data.csv')

# Create a Nominatim instance for Canada
nomi = pgeocode.Nominatim('CA')

# Create new columns for latitude and longitude
toronto_data['Latitude'] = None
toronto_data['Longitude'] = None

# Iterate over each row to get the latitude and longitude
for index, row in toronto_data.iterrows():
    location = nomi.query_postal_code(row['Postal Code'])
    toronto_data.at[index, 'Latitude'] = location.latitude
    toronto_data.at[index, 'Longitude'] = location.longitude

# Your Foursquare API credentials
def get_coffee_shops(latitude, longitude, radius=500, categories="13032,13033,13034,13035,13036,13063,13072"):
    url = "https://api.foursquare.com/v3/places/search"
    headers = {
        "Accept": "application/json",
        "Authorization": places_api_key
    }
    params = {
        "ll": f"{latitude},{longitude}",
        "radius": radius,
        "categories": categories,  # Category for coffee shops
        "limit": 50  # Limit to 100 results
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        coffee_shop_names = [place['name'] for place in data['results']]
        return len(coffee_shop_names), coffee_shop_names
    else:
        print(f"Failed to fetch data: {response.status_code}")
        return 0, []
# Add new columns for the number of coffee shops and their names
toronto_data['Coffee Counts'] = 0
toronto_data['Coffee Names'] = None
# Iterate over each row to get the number and names of coffee shops
for index, row in toronto_data.iterrows():
    if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
        num_coffee_shops, coffee_shop_names = get_coffee_shops(row['Latitude'], row['Longitude'])
        toronto_data.at[index, 'Coffee Counts'] = num_coffee_shops
        toronto_data.at[index, 'Coffee Names'] = ', '.join(coffee_shop_names)
# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 500)
# Display the updated DataFrame
toronto_data

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Population,Average Income,Cafe Counts,Cafe Names,Coffee Counts,Coffee Names
0,M1B,Scarborough,Malvern / Rouge,43.8113,-79.193,65555.0,69126.0,0,,0,
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564,35642.0,109785.0,0,,0,
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.7678,-79.1866,48033.0,62047.0,4,"Tim Hortons, Eggsmart, McDonald's, Mr. Puffs",4,"Tim Hortons, Kapit Bahay, Wok Stop Chinese Res..."
3,M1G,Scarborough,Woburn,43.7712,-79.2144,30894.0,54450.0,0,,1,Korean Grill House
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,23964.0,58492.0,0,,0,
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323,37002.0,54507.0,0,,0,
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.7298,-79.2639,48175.0,53260.0,1,Tim Hortons,5,"Tim Hortons, Tagpuan Inc, Hakka No 1 Restauran..."
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.7122,-79.2843,35833.0,56779.0,2,"Tim Hortons, Tim Hortons",3,"Tim Hortons, Bakery on the Go, Tim Hortons"
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.7247,-79.2312,23258.0,68550.0,1,Tim Hortons,5,"Tim Hortons, The Village Tea Room, Sushi & Tea..."
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.6952,-79.2646,22976.0,73256.0,2,"The Birchcliff, City Cottage Market",2,"The Birchcliff, Ume Fashion Sushi"
