## Part 1

In [1]:
import numpy as np # for working with vectorized data
import pandas as pd # for data analysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import requests # needed to retrive html from a web address
from bs4 import BeautifulSoup # for parsing html so data can be extracted

The first step is to scrape the relevent data and store it in a parsable object

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M' # toronto postcodes begin with M and the table has all desired data so joins are not needed to complete the dataframe
html_data = requests.get(url).text # saves html as a single string
soup = BeautifulSoup(html_data, 'html5lib') # creates a BeautifulSoup object which parses the html_data in the same way as a browser

The next step is to create an itterable of the elements each containing a single record

In [3]:
table = soup.tbody.find_all('td') # finds the first tbody element and creates an iterable of td elements within (cells from the table)

The records will now be iterated through, cleaned, and reorganised into a useful format

In [4]:
data = {'PostalCode':[], 'Borough':[], 'Neighborhood':[]} # creates a dictionary of empty lists. the keys will become column titles
for cell in table: # iterates through cells in the table taken from wikipedia
    
    if cell.span.text == 'Not assigned': # checks if the post code is not in use and skips if True
        continue
    
    else:
        data['PostalCode'].append(cell.text[1:4]) # slices postcode from cell because length and format are consistent and appends to the 'PostalCode' list in data
        
        borough, neighborhood = cell.text[4:].split('(', 1) # splits all text after the postcode at the first bracket (how borough and neighborhood are seperated on the website table)
        
        data['Borough'].append(borough) # borough is already cleaned and gets appended to the 'Borough' list in data
        
        neighborhood = neighborhood.strip('\n()').replace(' /', ',') # before neighborhood can be appended to its list all remaining brackets and newlines are stripped and replace is used to change / seperators into comma seperators
        
        # checks if the neighborhood(s) is listed for the borough and lables as the borough if not, although the table doesn't actually have this issue currently so it is commented out
        #if neighborhood == 'Not assigned': 
            #neighborhood = borough
        #else:
            #pass
            
        data['Neighborhood'].append(neighborhood) 

data

{'PostalCode': ['M3A',
  'M4A',
  'M5A',
  'M6A',
  'M7A',
  'M9A',
  'M1B',
  'M3B',
  'M4B',
  'M5B',
  'M6B',
  'M9B',
  'M1C',
  'M3C',
  'M4C',
  'M5C',
  'M6C',
  'M9C',
  'M1E',
  'M4E',
  'M5E',
  'M6E',
  'M1G',
  'M4G',
  'M5G',
  'M6G',
  'M1H',
  'M2H',
  'M3H',
  'M4H',
  'M5H',
  'M6H',
  'M1J',
  'M2J',
  'M3J',
  'M4J',
  'M5J',
  'M6J',
  'M1K',
  'M2K',
  'M3K',
  'M4K',
  'M5K',
  'M6K',
  'M1L',
  'M2L',
  'M3L',
  'M4L',
  'M5L',
  'M6L',
  'M9L',
  'M1M',
  'M2M',
  'M3M',
  'M4M',
  'M5M',
  'M6M',
  'M9M',
  'M1N',
  'M2N',
  'M3N',
  'M4N',
  'M5N',
  'M6N',
  'M9N',
  'M1P',
  'M2P',
  'M4P',
  'M5P',
  'M6P',
  'M9P',
  'M1R',
  'M2R',
  'M4R',
  'M5R',
  'M6R',
  'M7R',
  'M9R',
  'M1S',
  'M4S',
  'M5S',
  'M6S',
  'M1T',
  'M4T',
  'M5T',
  'M1V',
  'M4V',
  'M5V',
  'M8V',
  'M9V',
  'M1W',
  'M4W',
  'M5W',
  'M8W',
  'M9W',
  'M1X',
  'M4X',
  'M5X',
  'M8X',
  'M4Y',
  'M7Y',
  'M8Y',
  'M8Z'],
 'Borough': ['North York',
  'North York',
  'Downtown Tor

Now that the data is appropriatly formated as a dictionary of equal length lists it can be converted into a pandas dataframe

In [5]:
df = pd.DataFrame(data)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills)North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [6]:
df.shape

(103, 3)

## Part 2

Now that we have collected 103 postcodes with their list of neighborhoods, we can find the corresponding latitudes and longitudes

In [7]:
#!pip install geocoder # uncomment if geocoder is not installed
import geocoder # for converting addresses into latitude and longitude
from geopy.geocoders import Nominatim # an alternitive api library to geocoder in case it doesn't work

```python
# couldn't get the geocoder library to work

latitude = []

longitude = []

for post_code in df['PostalCode']:
    
    lat_lng_coords = None # initialize variable to None
    
    while(lat_lng_coords is None): # loop until the server responds
    
        g = geocoder.osm('{}, Toronto, Ontario'.format(post_code))
        lat_lng_coords = g.osm
        
    latitude.append(lat_lng_coords['x'])
    longitude.append(lat_lng_coords['y'])

df['Latitude'] = latitude
df['Longitude'] = longitude
```

```python
# other library also timed out
geolocator = Nominatim(user_agent="scott_pilgrim")
latitude = []
longitude = []

for address in df['PostalCode']:
    location = None
    while(location == None):
        location = geolocator.geocode('{}, Toronto, Ontario'.format(address))

    print('{} address got'.format(address))
    latitude.append(location.latitude)
    longitude.append(location.longitude)

df['Latitude'] = latitude
df['Longitude'] = longitude
```

In [8]:
# had to resort to using a premade csv
lat_lng_df = pd.read_csv('Geospatial_Coordinates.csv')
lat_lng_df

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


Downloding a full dataframe like this requires another step to combine the previous dataframe

In [9]:
df = pd.merge(df, lat_lng_df, left_on='PostalCode', right_on='Postal Code') # uses post code as a key to align the tables
df = df.drop('Postal Code', axis=1)

## Part 3

In [10]:
address = 'Toronto, ON' # create search string. this doesn't need a set format but performs better on comma seperated key words

geolocator = Nominatim(user_agent="canada_eh") # creates an object out of the geocoder service's API, which abstracts its functions
location = geolocator.geocode(address) # the geocode method turns a query string into a dictionary like geopy.location.Location object for any geocoder
latitude = location.latitude # latitude and longitude are keys
longitude = location.longitude
print('Toronto is located at {}, {}.'.format(latitude, longitude))

Toronto is located at 43.6534817, -79.3839347.


In [11]:
# fixed argumets for Foursquare api
CLIENT_ID = 'IESGEVMGJBKRSU3C4PVB2HKV5DSTHLKH1DSQKYS2WQ4JHPP2' # Foursquare ID
CLIENT_SECRET = 'EPYE2PPFYKLAR23NB50ZODOAFKOJCW5DBISX25DKWXORZHFP' # Foursquare Secret
VERSION = '20210623' # Foursquare API version YYYY/MM/DD = 2021/06/23
RADIUS = 500 # meters away that venues can be from the coordinates given
LIMIT = 100 # the maximum number of items the API will return

Here I will collect information on boroughs rather than individule neighborhoods to limit api calls

In [12]:
toronto_df = df.copy(deep=True) # creates a copy so this section can be done seperately

# iterates through each row
for i in range(toronto_df.shape[0]):
    
    borough_latitude = toronto_df.loc[i, 'Latitude'] # borough latitude value
    borough_longitude = toronto_df.loc[i, 'Longitude'] # borough longitude value

    # creates an api call to be used for each borough coordinates
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID,CLIENT_SECRET,borough_latitude,borough_longitude,VERSION,RADIUS,LIMIT)
    # creates a GET request and filters it to just the relevent list
    results = requests.get(url).json()["response"]['groups'][0]['items']
        
    # extracts all venue types and creates a column for each which counts the number within 500m of the borough's coordinates
    for v in results:
        
        # filter applied to each item the api returns
        venue_type = v['venue']['categories'][0]['name']
        
        #  skips neighborhood venue type since it is uninformative
        if venue_type == 'Neighborhood':
            continue
        
        # if the venue types has not appeared in the data previously this will make a column of zero counts on each row
        elif venue_type not in toronto_df.columns:
            
            toronto_df[venue_type] = 0
            
        # itterates the relevent cell by one for each occurence 
        toronto_df.loc[i, venue_type] += 1
    
toronto_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Fast Food Restaurant,Park,Hotel,Food & Drink Shop,Hockey Arena,Portuguese Restaurant,Coffee Shop,Intersection,Bakery,Distribution Center,Spa,Restaurant,Historic Site,Farmers Market,Chocolate Shop,Performing Arts Venue,Greek Restaurant,Dessert Shop,Pub,Breakfast Spot,Event Space,Bank,Café,Sandwich Place,Theater,French Restaurant,Asian Restaurant,Art Gallery,Beer Store,Mexican Restaurant,Electronics Store,Discount Store,Wine Shop,Antique Shop,Boutique,Vietnamese Restaurant,Clothing Store,Miscellaneous Shop,Carpet Store,Furniture / Home Store,Sushi Restaurant,Yoga Studio,Burger Joint,Persian Restaurant,Diner,Japanese Restaurant,Burrito Place,Bar,Gym,College Auditorium,College Cafeteria,Falafel Restaurant,Caribbean Restaurant,Gastropub,Gym / Fitness Center,Pharmacy,Pizza Place,Flea Market,Athletics & Sports,Comic Shop,Music Venue,Middle Eastern Restaurant,Plaza,Modern European Restaurant,Steakhouse,Tea Room,Cosmetics Shop,Ramen Restaurant,Bookstore,Thai Restaurant,Ethiopian Restaurant,Seafood Restaurant,College Rec Center,Video Game Store,Shopping Mall,Chinese Restaurant,Juice Bar,Bubble Tea Shop,Italian Restaurant,New American Restaurant,Cocktail Bar,Candy Store,Lingerie Store,Office,Ice Cream Shop,Kitchen Supply Store,Shoe Store,Wine Bar,Hookah Bar,Poutine Place,Playground,Print Shop,Sporting Goods Shop,Dim Sum Restaurant,Supermarket,Bus Line,Smoke Shop,Dance Studio,Skating Rink,Creperie,BBQ Joint,American Restaurant,Salon / Barbershop,Molecular Gastronomy Restaurant,Vegetarian / Vegan Restaurant,Grocery Store,Cheese Shop,German Restaurant,Beer Bar,Comfort Food Restaurant,Department Store,Moroccan Restaurant,Bistro,Belgian Restaurant,Field,Garden,Trail,Liquor Store,Shopping Plaza,Convenience Store,Pet Store,Rental Car Location,Donut Shop,Medical Center,Health Food Store,Museum,Jazz Club,Basketball Stadium,Bagel Shop,Beach,Butcher,Indian Restaurant,Women's Store,Pool,Korean BBQ Restaurant,Soccer Field,Sports Bar,Fish & Chips Shop,Brewery,Bike Shop,Poke Place,Arts & Crafts Store,Salad Place,Korean Restaurant,Video Store,Baby Store,Nightclub,Hakka Restaurant,Gas Station,Golf Course,Mediterranean Restaurant,Dog Run,Bridal Shop,Gift Shop,Warehouse Store,Concert Hall,Opera House,Lounge,Speakeasy,Record Shop,Deli / Bodega,Gluten-free Restaurant,Brazilian Restaurant,Colombian Restaurant,Soup Place,Toy / Game Store,Food Court,Movie Theater,Men's Store,Luggage Store,Jewelry Store,Accessories Store,Bus Station,Baseball Field,Lake,IT Services,Train Station,History Museum,Aquarium,Monument / Landmark,Scenic Lookout,Fried Chicken Joint,Baseball Stadium,Indie Movie Theater,Hotel Bar,Gym Pool,Cuban Restaurant,Malay Restaurant,Cupcake Shop,Airport,Frozen Yogurt Shop,Climbing Gym,Stadium,Metro Station,Basketball Court,Construction & Landscaping,Motel,Food Truck,Fish Market,Stationery Store,Latin American Restaurant,Thrift / Vintage Store,General Entertainment,College Stadium,Business Service,Swim School,Light Rail Station,Cajun / Creole Restaurant,Auto Garage,Martial Arts School,Flower Shop,Eastern European Restaurant,Mobile Phone Shop,Tennis Court,College Gym,College Arts Building,Noodle House,Gourmet Shop,Supplement Shop,South American Restaurant,Organic Grocery,Gaming Cafe,Doner Restaurant,Bed & Breakfast,Hospital,Harbor / Marina,Airport Lounge,Airport Food Court,Airport Terminal,Airport Gate,Plane,Airport Service,Sculpture Garden,Boat or Ferry,Hobby Shop,Church,Drugstore,Garden Center,Truck Stop,Taiwanese Restaurant,Snack Place,Market,River,Theme Restaurant,Adult Boutique,Escape Room,Gay Bar,Sake Bar,Afghan Restaurant,Strip Club,Auto Workshop,Skate Park,Wings Joint,Tanning Salon,Kids Store
0,M3A,North York,Parkwoods,43.753259,-79.329656,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,3,1,0,0,0,7,0,3,1,1,1,1,1,1,1,1,1,3,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,3,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,0,1,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,1,1,1,1,1,2,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,M3B,North York,Don Mills)North,43.745906,-79.352188,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,1,3,0,0,0,13,0,1,1,1,1,0,0,0,0,0,0,0,0,0,3,6,7,2,0,0,0,0,0,0,0,0,0,0,1,7,1,0,1,0,0,1,0,1,3,1,1,0,0,0,1,0,1,0,1,3,0,0,1,1,2,1,1,1,1,3,2,2,2,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


This code was used to determin "can only concatenate str (not "int") to str" errors were caused by neighborhood being a venue type in the raw data
```python
try:
    toronto_df.loc[i, venue_type] += 1
            
except: 
    print('The title is {}, the row is {}, the dtype is {}, and...'.format(venue_type, i, toronto_df[venue_type].dtypes))
    print('This is the venue:', v)
```

Now there is a glut of data to build a clustering model

In [24]:
from sklearn.preprocessing import StandardScaler # will be used to normalize the dataset
from sklearn.cluster import KMeans # the k means model builder

centres = 5 # the number of clusters the data will be split into

X = toronto_df.values[:,5:] # the features to be normalized
cluster_dataset = StandardScaler().fit_transform(X)
print("normalized data: ", cluster_dataset)
k_means = KMeans(init="k-means++", n_clusters=centres, n_init=20)
k_means.fit(cluster_dataset)
labels = k_means.labels_

df["Labels"] = labels # the catagories are appended to the original dataframe
df.head()

normalized data:  [[ 1.21603891  0.78716867  0.61864129 ... -0.09901475 -0.09901475
  -0.09901475]
 [-0.43201382 -0.71428268 -0.30483774 ... -0.09901475 -0.09901475
  -0.09901475]
 [-0.43201382  3.79007136  0.61864129 ... -0.09901475 -0.09901475
  -0.09901475]
 ...
 [ 1.21603891  0.78716867 -0.30483774 ... -0.09901475 -0.09901475
  -0.09901475]
 [-0.43201382 -0.71428268 -0.30483774 ... -0.09901475 -0.09901475
  -0.09901475]
 [-0.43201382 -0.71428268 -0.30483774 ... 10.09950494 10.09950494
  10.09950494]]


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Labels
0,M3A,North York,Parkwoods,43.753259,-79.329656,0
1,M4A,North York,Victoria Village,43.725882,-79.315572,0
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,0


In [14]:
import folium # module for rendering a map

# cm stands for color map. 
# it takes an array of floats, maps them to positions on a given spectrum and returns arrays of floats denoting intensity of:
# Red, Green, Blue, and alpha (RGBa)
import matplotlib.cm as cm

# This module includes functions and classes for color specification conversions
# will be used to convert RGBa float tuples into  hex codes, which are in ubiquitus use for rendering colour (can also convert RGB, html names, or a single float for greyscale)
import matplotlib.colors as colors 

In [25]:
# set colour scheme for the clusters
colors_array = cm.rainbow(np.linspace(0, 1, centres)) # creates evenly spaced values on the 'rainbow' spectrum
rainbow = [colors.rgb2hex(i) for i in colors_array]  # converts RGBa array to hex code array

In [34]:
# will create a map centered on Toronto
toronto_map = folium.Map(location=[latitude + 0.07, longitude], zoom_start=11) # adjusted latitude for better view of whole city

# adds markers to the map for each borough
for lat, lon, name, cluster in zip(df['Latitude'], df['Longitude'], df['Neighborhood'], df['Labels']):
    label = folium.Popup(str(name) + ' assigned to Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = rainbow[cluster],
        fill = True,
        fill_color = rainbow[cluster],
        fill_opacity = 0.9).add_to(toronto_map)
       
toronto_map