This is the Applied Science Data Science Capstone for the IBM Coursera Specialisation. 

In this Capstone we'll be comparing two iconic cities - Melbourne and New York. 

### Business Case Scenario
I'm a post-graduate applicant considering offers from two universities - Columbia University and the University of Melbourne. Both universities are equally attractive and the tie-breaker is city livability. 
I am measuring this by evaluating both cities based on two questions:

1. What amenities are available in each university area within walking distance (<= 5 km or 3.11 miles)?
2. Which university area has the lowest crime rate?

### Note: I will be referencing my datasets to those mentioned in the report. Please see the .pdf report  on GitHub :)

# Installation of Python 3 libraries

In [5]:
# Import the necessary libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 

import requests 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans


!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print('Folium installed')
print('Libraries imported.')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Folium installed
Libraries imported.


In [6]:
print('Hello Capstone Project Course!')

Hello Capstone Project Course!


In [7]:
print('Test!')

Test!


# Load the Melbourne Neighbourhood Boundary Data into a Dataframe

In [8]:
# Load the australian postcodes [Dataset E] onto a dataframe
df = pd.read_csv('australian_postcodes(clean).csv') 
df.head()

Unnamed: 0,id,postcode,locality,state,long,lat,dc,type,status,sa3,sa3name,sa4,sa4name
0,230,200,Anu,ACT,0.0,0.0,,,,,,,
1,21820,200,Australian National University,ACT,149.1189,-35.2777,,,Added 19-Jan-2020,,,,
2,232,800,Darwin,NT,130.83668,-12.458684,,,Updated 6-Feb-2020,70101.0,Darwin City,701.0,Darwin
3,233,801,Darwin,NT,130.83668,-12.458684,,,Updated 25-Mar-2020 SA3,70101.0,Darwin City,701.0,Darwin
4,234,804,Parap,NT,130.873315,-12.428017,,,Updated 25-Mar-2020 SA3,70102.0,Darwin Suburbs,701.0,Darwin


In [9]:
df_mel = df[(df['state']=='VIC') & (df['type']=='Delivery Area')]
df_mel.head(20)

Unnamed: 0,id,postcode,locality,state,long,lat,dc,type,status,sa3,sa3name,sa4,sa4name
6099,4746,3000,Melbourne,VIC,144.956776,-37.817403,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner
6101,4748,3002,East Melbourne,VIC,144.982207,-37.818517,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner
6102,4749,3003,West Melbourne,VIC,144.949592,-37.810871,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner
6103,4750,3004,Melbourne,VIC,144.970161,-37.844246,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20605.0,Port Phillip,206.0,Melbourne - Inner
6106,4752,3005,World Trade Centre,VIC,144.950858,-37.824608,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner
6107,20754,3006,South Wharf,VIC,144.952074,-37.825287,Melbourne Metro,Delivery Area,Updated 6-Feb-2020,20605.0,Port Phillip,206.0,Melbourne - Inner
6108,4753,3006,Southbank,VIC,144.965926,-37.823258,SOUTH MELBOURNE DC,Delivery Area,Updated 6-Feb-2020,20605.0,Port Phillip,206.0,Melbourne - Inner
6109,4754,3008,Docklands,VIC,144.948039,-37.814719,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner
6110,4755,3010,University Of Melbourne,VIC,144.961351,-37.796152,CARLTON DC,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner
6111,4756,3011,Footscray,VIC,144.907953,-37.807101,WEST FOOTSCRAY DC,Delivery Area,Updated 6-Feb-2020,21303.0,Maribyrnong,213.0,Melbourne - West


In [10]:
# Rename the columns
df_mel = df_mel.dropna()
df_mel.rename(columns={'locality':'neighborhood', 'sa4name':'region'},inplace=True)

# Select the columns and create a new dataframe
df_mel = df_mel[['neighborhood','lat','long',]]
df_mel = df_mel.drop_duplicates(subset = ['neighborhood'])


# Select only the neighbourhoods around the University of Melbourne campus
df_mel = df_mel.set_index('neighborhood')
df_mel = df_mel.loc[['North Melbourne','Melbourne','East Melbourne','Carlton','Carlton North','Brunswick','Carlton North',
            'Collingwood','Fitzroy','Fitzroy North','Parkville','South Yarra']]
df_mel.reset_index(drop=False,inplace=True)
df_mel.rename(columns={'neighborhood':'Neighborhood'},inplace=True)

Ensure the Melbourne dataframe is parred down to the university surrounds

In [11]:
print(df_mel.shape)
df_mel.head()

(12, 3)


Unnamed: 0,Neighborhood,lat,long
0,North Melbourne,-37.8006,144.943559
1,Melbourne,-37.817403,144.956776
2,East Melbourne,-37.818517,144.982207
3,Carlton,-37.803569,144.966112
4,Carlton North,-37.786971,144.96724


# University of Melbourne Area Analysis
### Step 1: Send a GET request to the FourSquare API for venue data

In [65]:
CLIENT_ID = '#########2G40IE' # your Foursquare ID
CLIENT_SECRET = '#########CEYRRG2U' # your Foursquare Secret
VERSION = '20180604'

In [64]:
# Obtain the latlong coordinates for the University of Melbourne
address = 'University of Melbourne'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

-37.7970796 144.96130134414312


In [63]:
# Set a 5km radius limit around the University of Melbourne 
radius = 5000
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)

In [62]:
results = requests.get(url).json()

In [16]:
# Define a "get venue categories" function
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name'] 
    
# load and structure the venues into a dataframe
venues = results['response']['groups'][0]['items']
    
melbourne_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.neighborhood', 'venue.location.city', 'venue.location.lat', 'venue.location.lng']
melbourne_venues =melbourne_venues.loc[:, filtered_columns]

# filter the category for each row
melbourne_venues['venue.categories'] = melbourne_venues.apply(get_category_type, axis=1)

# clean the venue columns
melbourne_venues.columns = [col.split(".")[-1] for col in melbourne_venues.columns]

melbourne_venues.drop(columns=['neighborhood'],inplace=True)
melbourne_venues.rename(columns={'city':'neighborhood'},inplace=True)
melbourne_venues = pd.DataFrame(melbourne_venues).dropna()
melbourne_venues.head()                      

  app.launch_new_instance()


Unnamed: 0,name,categories,neighborhood,lat,lng
0,University House,Lounge,Parkville,-37.796282,144.960054
1,Pidapipó Gelateria,Ice Cream Shop,Melbourne,-37.7983,144.96706
2,Readings,Bookstore,Carlton,-37.797971,144.967405
3,Carlton Wine Room,Wine Bar,Carlton,-37.798584,144.96861
4,Carte Crêpes,Café,Parkville,-37.799052,144.960357


Check the collection of venues around the University of Melbourne

In [17]:
print('{} venues were returned by Foursquare.'.format(melbourne_venues.shape[0]))

98 venues were returned by Foursquare.


### Step 2: Neighbourhood Analysis

Define a function to get nearby venues around the University of Melbourne

In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    MELBnearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    MELBnearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(MELBnearby_venues)

Collect the venues in the neighbourhoods surround the university based on our original melbourne dataframe

In [19]:
# Create a new dataframe called Melb neighbourhoods

venues = results['response']['groups'][0]['items']
    
MELBnearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
MELBnearby_venues = MELBnearby_venues.loc[:, filtered_columns]

# filter the category for each row
MELBnearby_venues['venue.categories'] = MELBnearby_venues.apply(get_category_type, axis=1)

# clean columns
MELBnearby_venues.columns = [col.split(".")[-1] for col in MELBnearby_venues.columns]
MELBnearby_venues.head()

melbourne_venues = getNearbyVenues(names=df_mel['Neighborhood'],
                                   latitudes=df_mel['lat'],
                                   longitudes=df_mel['long']
                                  )

  """


North Melbourne
Melbourne
East Melbourne
Carlton
Carlton North
Brunswick
Carlton North
Collingwood
Fitzroy
Fitzroy North
Parkville
South Yarra


In [20]:
melbourne_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Brunswick,100,100,100,100,100,100
Carlton,100,100,100,100,100,100
Carlton North,200,200,200,200,200,200
Collingwood,100,100,100,100,100,100
East Melbourne,100,100,100,100,100,100
Fitzroy,100,100,100,100,100,100
Fitzroy North,100,100,100,100,100,100
Melbourne,100,100,100,100,100,100
North Melbourne,100,100,100,100,100,100
Parkville,100,100,100,100,100,100


### Step 3: Venue Analysis

Summarize the data into a onehot encoding to generate the frequency mean of each venue

In [21]:
# one hot encoding
melbourne_onehot = pd.get_dummies(melbourne_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
melbourne_onehot['Neighborhood'] = melbourne_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [melbourne_onehot.columns[-1]] + list(melbourne_onehot.columns[:-1])
melbourne_onehot = melbourne_onehot[fixed_columns]

melbourne_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,Argentinian Restaurant,Art Gallery,Asian Restaurant,Australian Restaurant,BBQ Joint,Bakery,Bar,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bookstore,Botanical Garden,Boutique,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Café,Cheese Shop,Chinese Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Comic Shop,Community Center,Concert Hall,Cricket Ground,Cultural Center,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Dog Run,Donut Shop,Ethiopian Restaurant,Falafel Restaurant,Farm,Farmers Market,Fish & Chips Shop,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gastropub,General Entertainment,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Lebanese Restaurant,Library,Liquor Store,Lounge,Malay Restaurant,Market,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Monument / Landmark,Movie Theater,Museum,Music Venue,Night Market,Opera House,Outdoors & Recreation,Park,Pedestrian Plaza,Performing Arts Venue,Peruvian Restaurant,Pizza Place,Playground,Plaza,Polish Restaurant,Pool,Pub,Racetrack,Restaurant,River,Road,Roof Deck,Sake Bar,Sandwich Place,Seafood Restaurant,Shopping Mall,South American Restaurant,Speakeasy,Sri Lankan Restaurant,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tennis Stadium,Thai Restaurant,Theater,Track,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio,Zoo,Zoo Exhibit
0,North Melbourne,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,North Melbourne,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,North Melbourne,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,North Melbourne,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,North Melbourne,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [22]:
# Group melbourne neighbourhoods and analyse their venues
melbourne_grouped = melbourne_onehot.groupby('Neighborhood').mean().reset_index()
melbourne_grouped

Unnamed: 0,Neighborhood,African Restaurant,Argentinian Restaurant,Art Gallery,Asian Restaurant,Australian Restaurant,BBQ Joint,Bakery,Bar,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bookstore,Botanical Garden,Boutique,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Café,Cheese Shop,Chinese Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Comic Shop,Community Center,Concert Hall,Cricket Ground,Cultural Center,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Dog Run,Donut Shop,Ethiopian Restaurant,Falafel Restaurant,Farm,Farmers Market,Fish & Chips Shop,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gastropub,General Entertainment,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Lebanese Restaurant,Library,Liquor Store,Lounge,Malay Restaurant,Market,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Monument / Landmark,Movie Theater,Museum,Music Venue,Night Market,Opera House,Outdoors & Recreation,Park,Pedestrian Plaza,Performing Arts Venue,Peruvian Restaurant,Pizza Place,Playground,Plaza,Polish Restaurant,Pool,Pub,Racetrack,Restaurant,River,Road,Roof Deck,Sake Bar,Sandwich Place,Seafood Restaurant,Shopping Mall,South American Restaurant,Speakeasy,Sri Lankan Restaurant,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tennis Stadium,Thai Restaurant,Theater,Track,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio,Zoo,Zoo Exhibit
0,Brunswick,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.05,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.14,0.01,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.03,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.03,0.03,0.0,0.03,0.01,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.03,0.01,0.0,0.02,0.01,0.0,0.01,0.03
1,Carlton,0.01,0.01,0.0,0.01,0.0,0.0,0.04,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.13,0.01,0.01,0.0,0.04,0.1,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.02,0.03,0.0,0.01,0.02,0.02,0.02,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.01,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.03,0.03,0.01,0.0,0.0,0.0
2,Carlton North,0.02,0.01,0.0,0.01,0.0,0.01,0.04,0.05,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.01,0.0,0.0,0.18,0.01,0.0,0.0,0.03,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.01,0.01,0.03,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.01,0.04,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.02,0.03,0.02,0.0,0.0,0.0
3,Collingwood,0.0,0.01,0.01,0.02,0.0,0.01,0.03,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.13,0.01,0.01,0.0,0.04,0.08,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.02,0.03,0.01,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.07,0.01,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.02,0.0,0.02,0.04,0.01,0.0,0.0,0.0
4,East Melbourne,0.0,0.01,0.01,0.02,0.0,0.01,0.02,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.12,0.0,0.01,0.0,0.04,0.09,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.02,0.02,0.03,0.0,0.0,0.0,0.02,0.01,0.01,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.02,0.0,0.01,0.01,0.0,0.01,0.0,0.08,0.01,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.01,0.01,0.02,0.03,0.01,0.0,0.0,0.0
5,Fitzroy,0.0,0.01,0.01,0.02,0.0,0.01,0.03,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.01,0.0,0.13,0.01,0.01,0.0,0.04,0.09,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.02,0.03,0.01,0.01,0.0,0.02,0.01,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.06,0.01,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.03,0.0,0.02,0.04,0.01,0.0,0.0,0.0
6,Fitzroy North,0.01,0.01,0.0,0.01,0.0,0.01,0.04,0.05,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.03,0.0,0.0,0.19,0.01,0.0,0.0,0.03,0.05,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.03,0.02,0.01,0.03,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.01,0.02,0.04,0.02,0.0,0.0,0.0
7,Melbourne,0.01,0.01,0.01,0.01,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.02,0.01,0.0,0.01,0.01,0.13,0.0,0.01,0.0,0.02,0.1,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.02,0.03,0.01,0.0,0.0,0.01,0.02,0.02,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.01,0.01,0.01,0.01,0.01,0.0,0.07,0.01,0.02,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0
8,North Melbourne,0.01,0.01,0.01,0.01,0.01,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.01,0.02,0.17,0.01,0.01,0.0,0.02,0.1,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.01,0.02,0.0,0.01,0.01,0.02,0.02,0.01,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.01,0.01,0.01,0.0,0.01,0.0,0.01,0.03,0.01,0.0,0.01,0.01,0.01,0.03,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.03,0.01,0.0,0.0,0.0,0.0
9,Parkville,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.01,0.18,0.01,0.0,0.0,0.03,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.01,0.02,0.0,0.02,0.01,0.01,0.0,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.0,0.01,0.01,0.01,0.0,0.01,0.04,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.02,0.03,0.01,0.0,0.0,0.0


In [23]:
# Create a fuction to return the most common venues based on frequency of occurence
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [24]:
# Create a listing of top 10 venues for each neighbourhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = melbourne_grouped['Neighborhood']

for ind in np.arange(melbourne_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(melbourne_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Brunswick,Café,Bakery,Bar,Zoo Exhibit,Thai Restaurant,Falafel Restaurant,Indian Restaurant,Italian Restaurant,Middle Eastern Restaurant,Park
1,Carlton,Café,Coffee Shop,Bakery,Cocktail Bar,Ice Cream Shop,Park,Wine Bar,Whisky Bar,Vegetarian / Vegan Restaurant,Bar
2,Carlton North,Café,Coffee Shop,Bar,Park,Bakery,Wine Bar,Bookstore,Vegetarian / Vegan Restaurant,Cocktail Bar,Italian Restaurant
3,Collingwood,Café,Coffee Shop,Park,Cocktail Bar,Bar,Wine Bar,Ice Cream Shop,Bakery,Hotel,Brewery
4,East Melbourne,Café,Coffee Shop,Park,Cocktail Bar,Bar,Ice Cream Shop,Plaza,Wine Bar,Asian Restaurant,Monument / Landmark
5,Fitzroy,Café,Coffee Shop,Park,Cocktail Bar,Wine Bar,Bar,Bakery,Ice Cream Shop,Vegetarian / Vegan Restaurant,Asian Restaurant
6,Fitzroy North,Café,Coffee Shop,Bar,Wine Bar,Vegetarian / Vegan Restaurant,Bakery,Ice Cream Shop,Bookstore,Italian Restaurant,Cocktail Bar
7,Melbourne,Café,Coffee Shop,Park,Plaza,Hotel,Bookstore,Whisky Bar,Australian Restaurant,Bar,Japanese Restaurant
8,North Melbourne,Café,Coffee Shop,Plaza,Whisky Bar,Park,Bar,Bookstore,Ice Cream Shop,Gym / Fitness Center,Cocktail Bar
9,Parkville,Café,Coffee Shop,Park,Bar,Vegetarian / Vegan Restaurant,Bookstore,Cocktail Bar,Wine Bar,Mexican Restaurant,Korean Restaurant


### Step 4: Cluster neighbourhoods

Cluster Melbourne neighbourhoods and label them

In [25]:
import sklearn.cluster.k_means_ as kmean
kmeans = kmean.KMeans()

# set number of clusters
kclusters = 3

melbourne_grouped_clustering = melbourne_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(melbourne_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 



array([2, 0, 2, 0, 0, 0, 2, 0, 0, 2])

In [26]:
# add clustering labels
neighborhoods_venues_sorted.insert(1, 'Cluster Labels', kmeans.labels_)

# Merge neighbourhood clusters with original Melbourne neighbourhood dataframe to obtain the latlong coordinates
df1 = neighborhoods_venues_sorted
df2 = df_mel

melbourne_merged = pd.merge(df1,df2,on='Neighborhood')
melbourne_merged

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,lat,long
0,Brunswick,2,Café,Bakery,Bar,Zoo Exhibit,Thai Restaurant,Falafel Restaurant,Indian Restaurant,Italian Restaurant,Middle Eastern Restaurant,Park,-37.766292,144.960108
1,Carlton,0,Café,Coffee Shop,Bakery,Cocktail Bar,Ice Cream Shop,Park,Wine Bar,Whisky Bar,Vegetarian / Vegan Restaurant,Bar,-37.803569,144.966112
2,Carlton North,2,Café,Coffee Shop,Bar,Park,Bakery,Wine Bar,Bookstore,Vegetarian / Vegan Restaurant,Cocktail Bar,Italian Restaurant,-37.786971,144.96724
3,Carlton North,2,Café,Coffee Shop,Bar,Park,Bakery,Wine Bar,Bookstore,Vegetarian / Vegan Restaurant,Cocktail Bar,Italian Restaurant,-37.786971,144.96724
4,Collingwood,0,Café,Coffee Shop,Park,Cocktail Bar,Bar,Wine Bar,Ice Cream Shop,Bakery,Hotel,Brewery,-37.804846,144.986906
5,East Melbourne,0,Café,Coffee Shop,Park,Cocktail Bar,Bar,Ice Cream Shop,Plaza,Wine Bar,Asian Restaurant,Monument / Landmark,-37.818517,144.982207
6,Fitzroy,0,Café,Coffee Shop,Park,Cocktail Bar,Wine Bar,Bar,Bakery,Ice Cream Shop,Vegetarian / Vegan Restaurant,Asian Restaurant,-37.802608,144.977795
7,Fitzroy North,2,Café,Coffee Shop,Bar,Wine Bar,Vegetarian / Vegan Restaurant,Bakery,Ice Cream Shop,Bookstore,Italian Restaurant,Cocktail Bar,-37.790974,144.98622
8,Melbourne,0,Café,Coffee Shop,Park,Plaza,Hotel,Bookstore,Whisky Bar,Australian Restaurant,Bar,Japanese Restaurant,-37.817403,144.956776
9,North Melbourne,0,Café,Coffee Shop,Plaza,Whisky Bar,Park,Bar,Bookstore,Ice Cream Shop,Gym / Fitness Center,Cocktail Bar,-37.8006,144.943559


In [27]:
print(melbourne_merged.shape)

(12, 14)


In [28]:
# Map the 5 melbourne neighbourhood clusters

# create map
mel_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# To further refine our search for ideal neighbourhood clusters within walking distance from our university, 
# we mark out a 1-km (0.62 mile) radius from the university by using the .Circle method and setting radius to 1000
folium.features.Circle(
    [latitude, longitude],
    radius=1000,
    color='steelblue',
    popup='University of Melbourne',
    fill = True,
    fill_color = 'steelblue',
    fill_opacity = 0.5
).add_to(mel_map_clusters)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(melbourne_merged['lat'], melbourne_merged['long'], melbourne_merged['Neighborhood'], melbourne_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)],
        fill=True,
        fill_color=rainbow[int(cluster)],
        fill_opacity=0.7).add_to(mel_map_clusters)
       
mel_map_clusters

### Examine Melbourne Cluster 1

In [29]:
melbourne_merged.loc[melbourne_merged['Cluster Labels'] == 0, 'Neighborhood':'10th Most Common Venue']

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Carlton,0,Café,Coffee Shop,Bakery,Cocktail Bar,Ice Cream Shop,Park,Wine Bar,Whisky Bar,Vegetarian / Vegan Restaurant,Bar
4,Collingwood,0,Café,Coffee Shop,Park,Cocktail Bar,Bar,Wine Bar,Ice Cream Shop,Bakery,Hotel,Brewery
5,East Melbourne,0,Café,Coffee Shop,Park,Cocktail Bar,Bar,Ice Cream Shop,Plaza,Wine Bar,Asian Restaurant,Monument / Landmark
6,Fitzroy,0,Café,Coffee Shop,Park,Cocktail Bar,Wine Bar,Bar,Bakery,Ice Cream Shop,Vegetarian / Vegan Restaurant,Asian Restaurant
8,Melbourne,0,Café,Coffee Shop,Park,Plaza,Hotel,Bookstore,Whisky Bar,Australian Restaurant,Bar,Japanese Restaurant
9,North Melbourne,0,Café,Coffee Shop,Plaza,Whisky Bar,Park,Bar,Bookstore,Ice Cream Shop,Gym / Fitness Center,Cocktail Bar


### Examine Melbourne Cluster 2

In [30]:
melbourne_merged.loc[melbourne_merged['Cluster Labels'] == 1, 'Neighborhood':'10th Most Common Venue']

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,South Yarra,1,Café,Park,Ice Cream Shop,Coffee Shop,Beach,Plaza,Pub,Restaurant,Breakfast Spot,Beer Bar


### Examine Melbourne Cluster 3

In [31]:
melbourne_merged.loc[melbourne_merged['Cluster Labels'] == 2, 'Neighborhood':'10th Most Common Venue']

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Brunswick,2,Café,Bakery,Bar,Zoo Exhibit,Thai Restaurant,Falafel Restaurant,Indian Restaurant,Italian Restaurant,Middle Eastern Restaurant,Park
2,Carlton North,2,Café,Coffee Shop,Bar,Park,Bakery,Wine Bar,Bookstore,Vegetarian / Vegan Restaurant,Cocktail Bar,Italian Restaurant
3,Carlton North,2,Café,Coffee Shop,Bar,Park,Bakery,Wine Bar,Bookstore,Vegetarian / Vegan Restaurant,Cocktail Bar,Italian Restaurant
7,Fitzroy North,2,Café,Coffee Shop,Bar,Wine Bar,Vegetarian / Vegan Restaurant,Bakery,Ice Cream Shop,Bookstore,Italian Restaurant,Cocktail Bar
10,Parkville,2,Café,Coffee Shop,Park,Bar,Vegetarian / Vegan Restaurant,Bookstore,Cocktail Bar,Wine Bar,Mexican Restaurant,Korean Restaurant


# Analyzing Melbourne Crime Rate

Here, we are going to generate the Choropleth map of using a 
1) a json file marking neighbourhood boundaries [Dataset F], and 
2) a VIC crime statistics report [Dataset D]

### Load melbourne crime statistics into a dataframe 

In [32]:
## Load VIC crime statistics - 2010 to 2019 [Dataset D]
mel_crim = pd.read_excel('VIC_Crime_2010_to_2019.xlsx')
mel_crim.head()

Unnamed: 0,Year,Year ending,Local Government Area,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded
0,2019,December,Alpine,3691,Dederang,B Property and deception offences,B30 Burglary/Break and enter,B321 Residential non-aggravated burglary,1
1,2019,December,Alpine,3691,Dederang,E Justice procedures offences,E10 Justice procedures,E14 Pervert the course of justice or commit pe...,1
2,2019,December,Alpine,3691,Dederang,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,1
3,2019,December,Alpine,3691,Dederang,D Public order and security offences,D20 Disorderly and offensive conduct,D22 Drunk and disorderly in public,2
4,2019,December,Alpine,3691,Dederang,B Property and deception offences,B20 Property damage,B21 Criminal damage,1


In [33]:
## Transform and load the crime statistics [Dataset D] onto a dataframe
mel_crim = mel_crim.set_index('Suburb/Town Name')
mel_crim = mel_crim.loc[['North Melbourne','Melbourne','East Melbourne','Carlton','Carlton North','Brunswick','Carlton North',
            'Collingwood','Fitzroy','Fitzroy North','Parkville','South Yarra']]
mel_crim = mel_crim.groupby(['Postcode','Suburb/Town Name'])['Incidents Recorded'].sum().reset_index()
mel_crim = mel_crim.rename(columns={'Suburb/Town Name':'Neighbourhood','Incidents Recorded':'Count'})
mel_crim 

Unnamed: 0,Postcode,Neighbourhood,Count
0,3000,Melbourne,148273
1,3002,East Melbourne,7572
2,3004,Melbourne,7453
3,3010,Parkville,880
4,3050,Parkville,4045
5,3051,North Melbourne,12432
6,3052,Parkville,5028
7,3053,Carlton,14720
8,3054,Carlton North,8274
9,3056,Brunswick,22249


In [34]:
# We matched the json file and the crime stats datasets, using Postcode.
# Here we must remember to change the Postcode to a STRING. the key_on function in Folium matches strings, not integers

mel_crim = mel_crim.astype({'Postcode': 'str','Count':'int64'})
mel_crim.head()
mel_crim.dtypes

Postcode         object
Neighbourhood    object
Count             int64
dtype: object

In [35]:
mel_crim

Unnamed: 0,Postcode,Neighbourhood,Count
0,3000,Melbourne,148273
1,3002,East Melbourne,7572
2,3004,Melbourne,7453
3,3010,Parkville,880
4,3050,Parkville,4045
5,3051,North Melbourne,12432
6,3052,Parkville,5028
7,3053,Carlton,14720
8,3054,Carlton North,8274
9,3056,Brunswick,22249


### Plot the consolidated University of Melbourne map

In [36]:
mel_geo = 'Melb_Postcodes.geojson'

mel_crim_map = folium.Map(location=[-37.7970, 144.9613], zoom_start=12)
mel_crim_map.choropleth(
    geo_data=mel_geo,
    data=mel_crim,
    columns=['Postcode','Count'],
    key_on='feature.properties.mccid_int',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    threshold_scale=[0, 50000, 100000, 150000],
    legend_name='Crime Rate in Melbourne',
    reset=True
)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

folium.features.Circle(
    [latitude, longitude],
    radius=1000,
    color='steelblue',
    popup='University of Melbourne',
    fill = True,
    fill_color = 'steelblue',
    fill_opacity = 0.5
).add_to(mel_crim_map)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(melbourne_merged['lat'], melbourne_merged['long'], melbourne_merged['Neighborhood'], melbourne_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)],
        fill=True,
        fill_color=rainbow[int(cluster)],
        fill_opacity=0.7).add_to(mel_crim_map)

# display map
mel_crim_map

# Load the NYC Neighbourhood Boundary Data into a Dataframe

In [37]:
# Load City of New York Boundary Data [Dataset C] onto a Dataframe
with open('nyu_2451_34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [38]:
# define key to access all relevant data 
neighborhoods_data = newyork_data['features']

# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [39]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


# Columbia University Area Analysis
### Step 1: Send a GET request to the FourSquare API for venue data

In [40]:
# We will restrict the dataset to the borough of Manhattan, wherein Columbia University is located
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [41]:
# Get Columbia University latlong coordinates
address = 'Columbia University'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

40.8079488 -73.96179735775709


In [67]:
# Set a 5km (or 3.11 miles) radius limit around the Columbia University
radius = 5000
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)

In [66]:
results = requests.get(url).json()

In [44]:
# Define a function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']                   

In [45]:
# Collect the nearby venues from the Columbia University surrounds
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  after removing the cwd from sys.path.


Unnamed: 0,name,categories,lat,lng
0,Riverside Park,Park,40.806809,-73.968651
1,Levain Bakery,Bakery,40.804897,-73.955098
2,The Winery,Wine Shop,40.804334,-73.955138
3,Absolute Bagels,Bagel Shop,40.802494,-73.96745
4,Morningside Park Farmers Market,Farmers Market,40.8014,-73.959606


### Step 2: Neighbourhood Analysis

In [46]:
# Define a get nearby venues function

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [47]:
# Consolidate the nearby venues in Manhattan

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude']
                                  )

  """


Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


### Step 3: Venue Analysis

In [48]:
# one hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Cafeteria,Café,Cambodian Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Bookstore,College Cafeteria,College Theater,Comedy Club,Community Center,Concert Hall,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doctor's Office,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Financial or Legal Service,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,High School,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Laundry Service,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Venue,Nail Salon,New American Restaurant,Newsstand,Nightclub,Non-Profit,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Paella Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Pie Shop,Pier,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pub,Public Art,Ramen Restaurant,Record Shop,Rental Car Location,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Rock Club,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shopping Mall,Skate Park,Smoke Shop,Snack Place,Soba Restaurant,Soccer Field,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stables,Steakhouse,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Tiki Bar,Tourist Information Center,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [49]:
# Group by Manhattan neighbourhoods and calculate the mean of the frequency of each category
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Cafeteria,Café,Cambodian Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Bookstore,College Cafeteria,College Theater,Comedy Club,Community Center,Concert Hall,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doctor's Office,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Financial or Legal Service,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,High School,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Laundry Service,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Venue,Nail Salon,New American Restaurant,Newsstand,Nightclub,Non-Profit,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Paella Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Pie Shop,Pier,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pub,Public Art,Ramen Restaurant,Record Shop,Rental Car Location,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Rock Club,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shopping Mall,Skate Park,Smoke Shop,Snack Place,Soba Restaurant,Soccer Field,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stables,Steakhouse,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Tiki Bar,Tourist Information Center,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Battery Park City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.015873,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.015873,0.0,0.031746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.015873,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.079365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.063492,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.047619,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.015873,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031746,0.031746,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.031746,0.0,0.0,0.0
1,Carnegie Hill,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.023529,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.094118,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.035294,0.035294,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.047059,0.0,0.0,0.0,0.0,0.023529,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.011765,0.035294,0.0,0.0,0.035294
2,Central Harlem,0.0,0.0,0.0,0.065217,0.043478,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.043478,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.07,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
4,Chinatown,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.04,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.02,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Civic Center,0.0,0.0,0.0,0.0,0.045455,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.011364,0.011364,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.011364,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.011364,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.022727,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.056818,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.011364,0.0,0.0,0.022727,0.034091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.022727,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.011364,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.0,0.011364,0.011364,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.011364,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.034091,0.0,0.0,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.011364,0.0,0.0,0.0,0.022727,0.022727,0.0,0.0,0.034091
6,Clinton,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.03,0.0,0.0,0.0
7,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.02381,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.119048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,East Village,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.06,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0
9,Financial District,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.03,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01


In [50]:
# Load the top 10 venues by neighbourhood clusters into a dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [51]:
# Sort the clusters by neighbourhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Park,Coffee Shop,Hotel,Memorial Site,Gym,Boat or Ferry,Gourmet Shop,Food Court,Shopping Mall,Wine Shop
1,Carnegie Hill,Coffee Shop,Pizza Place,Yoga Studio,Gym,Wine Shop,Bar,Bookstore,Café,Japanese Restaurant,Grocery Store
2,Central Harlem,African Restaurant,Fried Chicken Joint,American Restaurant,Bar,Cosmetics Shop,French Restaurant,Seafood Restaurant,Chinese Restaurant,Café,Tapas Restaurant
3,Chelsea,Art Gallery,Coffee Shop,Café,Ice Cream Shop,American Restaurant,Market,Seafood Restaurant,Boutique,Cupcake Shop,Cycle Studio
4,Chinatown,Chinese Restaurant,Cocktail Bar,Bubble Tea Shop,Bakery,Coffee Shop,Salon / Barbershop,Vietnamese Restaurant,Optical Shop,American Restaurant,Spa


### Step 4: Cluster Neighbourhoods

In [52]:
# set number of clusters
kclusters = 3

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 0, 1, 0, 1, 1, 0, 0, 1])

In [53]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,1,Sandwich Place,Coffee Shop,Gym,Ice Cream Shop,Department Store,Pharmacy,Diner,Discount Store,Donut Shop,Bank
1,Manhattan,Chinatown,40.715618,-73.994279,0,Chinese Restaurant,Cocktail Bar,Bubble Tea Shop,Bakery,Coffee Shop,Salon / Barbershop,Vietnamese Restaurant,Optical Shop,American Restaurant,Spa
2,Manhattan,Washington Heights,40.851903,-73.9369,0,Café,Bakery,Pizza Place,Mobile Phone Shop,Chinese Restaurant,Grocery Store,Latin American Restaurant,Donut Shop,Sandwich Place,Supplement Shop
3,Manhattan,Inwood,40.867684,-73.92121,0,Mexican Restaurant,Café,Lounge,Pizza Place,Restaurant,Deli / Bodega,Chinese Restaurant,Spanish Restaurant,Caribbean Restaurant,Park
4,Manhattan,Hamilton Heights,40.823604,-73.949688,0,Pizza Place,Coffee Shop,Café,Deli / Bodega,Mexican Restaurant,Cocktail Bar,Indian Restaurant,Sushi Restaurant,Park,Yoga Studio


### Create map with cluster and Columbia University markers

In [54]:
# create map
ny_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

folium.features.Circle(
    [latitude, longitude],
    radius=1000,
    color='steelblue',
    popup='Columbia University',
    fill = True,
    fill_color = 'steelblue',
    fill_opacity = 0.5
).add_to(ny_map_clusters)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster],
        fill=True,
        fill_color=rainbow[cluster],
        fill_opacity=0.7).add_to(ny_map_clusters)
       
ny_map_clusters

### Examine Manhattan Cluster 1

In [55]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Chinese Restaurant,Cocktail Bar,Bubble Tea Shop,Bakery,Coffee Shop,Salon / Barbershop,Vietnamese Restaurant,Optical Shop,American Restaurant,Spa
2,Washington Heights,Café,Bakery,Pizza Place,Mobile Phone Shop,Chinese Restaurant,Grocery Store,Latin American Restaurant,Donut Shop,Sandwich Place,Supplement Shop
3,Inwood,Mexican Restaurant,Café,Lounge,Pizza Place,Restaurant,Deli / Bodega,Chinese Restaurant,Spanish Restaurant,Caribbean Restaurant,Park
4,Hamilton Heights,Pizza Place,Coffee Shop,Café,Deli / Bodega,Mexican Restaurant,Cocktail Bar,Indian Restaurant,Sushi Restaurant,Park,Yoga Studio
5,Manhattanville,Coffee Shop,Seafood Restaurant,Italian Restaurant,Deli / Bodega,Park,Sushi Restaurant,Mexican Restaurant,Café,Bike Trail,Boutique
6,Central Harlem,African Restaurant,Fried Chicken Joint,American Restaurant,Bar,Cosmetics Shop,French Restaurant,Seafood Restaurant,Chinese Restaurant,Café,Tapas Restaurant
7,East Harlem,Mexican Restaurant,Bakery,Latin American Restaurant,Thai Restaurant,Deli / Bodega,Restaurant,Beer Bar,French Restaurant,Liquor Store,Steakhouse
8,Upper East Side,Italian Restaurant,Bakery,Gym / Fitness Center,Spa,American Restaurant,Exhibit,Hotel,Juice Bar,Yoga Studio,Wine Shop
9,Yorkville,Italian Restaurant,Coffee Shop,Gym,Sushi Restaurant,Bar,Deli / Bodega,Mexican Restaurant,Japanese Restaurant,Diner,Wine Shop
10,Lenox Hill,Coffee Shop,Italian Restaurant,Pizza Place,Café,Sushi Restaurant,Cocktail Bar,Gym / Fitness Center,Gym,Burger Joint,Salad Place


### Examine Manhattan Cluster 2

In [56]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Sandwich Place,Coffee Shop,Gym,Ice Cream Shop,Department Store,Pharmacy,Diner,Discount Store,Donut Shop,Bank
11,Roosevelt Island,Playground,Park,Gym,Dry Cleaner,Greek Restaurant,Coffee Shop,Outdoors & Recreation,Sandwich Place,Scenic Lookout,School
13,Lincoln Square,Italian Restaurant,Café,Plaza,Gym / Fitness Center,Theater,Performing Arts Venue,Concert Hall,Wine Shop,French Restaurant,Grocery Store
14,Clinton,Theater,Coffee Shop,Gym / Fitness Center,Hotel,Gym,Italian Restaurant,Wine Shop,Sandwich Place,Spa,American Restaurant
15,Midtown,Coffee Shop,Hotel,Theater,Pizza Place,Cuban Restaurant,Bakery,Japanese Restaurant,Clothing Store,Cosmetics Shop,Spa
16,Murray Hill,Hotel,Sandwich Place,Coffee Shop,Gym / Fitness Center,Steakhouse,Japanese Restaurant,Pizza Place,Chinese Restaurant,Indian Restaurant,Juice Bar
17,Chelsea,Art Gallery,Coffee Shop,Café,Ice Cream Shop,American Restaurant,Market,Seafood Restaurant,Boutique,Cupcake Shop,Cycle Studio
21,Tribeca,Italian Restaurant,Park,Wine Bar,Café,Spa,Hotel,Coffee Shop,Bakery,Steakhouse,Greek Restaurant
28,Battery Park City,Park,Coffee Shop,Hotel,Memorial Site,Gym,Boat or Ferry,Gourmet Shop,Food Court,Shopping Mall,Wine Shop
29,Financial District,Coffee Shop,Hotel,Pizza Place,American Restaurant,Sandwich Place,Salad Place,Gym / Fitness Center,Juice Bar,Café,Cocktail Bar


### Examine Manhattan Cluster 3

In [57]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,Stuyvesant Town,Park,Playground,Gas Station,Gym,Gym / Fitness Center,Baseball Field,Cocktail Bar,Harbor / Marina,German Restaurant,Bistro


# Analyzing the Manhattan Crime Rate

In [58]:
# Load Manhattan crime statistics - 2006 to 2017 [Dataset A] into a dataframe
manhattan_crim = pd.read_csv('NYPD_Complaint_Data_Historic.csv')
manhattan_crim.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,CMPLNT_NUM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,ADDR_PCT_CD,RPT_DT,KY_CD,OFNS_DESC,PD_CD,PD_DESC,CRM_ATPT_CPTD_CD,LAW_CAT_CD,BORO_NM,LOC_OF_OCCUR_DESC,PREM_TYP_DESC,JURIS_DESC,JURISDICTION_CODE,PARKS_NM,HADEVELOPT,HOUSING_PSA,X_COORD_CD,Y_COORD_CD,SUSP_AGE_GROUP,SUSP_RACE,SUSP_SEX,TRANSIT_DISTRICT,Latitude,Longitude,Lat_Lon,PATROL_BORO,STATION_NAME,VIC_AGE_GROUP,VIC_RACE,VIC_SEX
0,876525802,04/10/2008,19:10:00,,,73.0,04/10/2008,341.0,PETIT LARCENY,321.0,"LARCENY,PETIT FROM AUTO",COMPLETED,MISDEMEANOR,BROOKLYN,,STREET,N.Y. POLICE DEPT,0.0,,,,1008495.0,183171.0,,,,,40.669414,-73.912603,"(40.669413836, -73.91260308)",PATROL BORO BKLYN NORTH,,18-24,BLACK,M
1,402693528,06/03/2007,15:23:00,,,28.0,06/03/2007,236.0,DANGEROUS WEAPONS,782.0,"WEAPONS, POSSESSION, ETC",COMPLETED,MISDEMEANOR,MANHATTAN,,STREET,N.Y. POLICE DEPT,0.0,,,,999336.0,231461.0,,,,,40.801978,-73.945511,"(40.801978284, -73.945511151)",PATROL BORO MAN NORTH,,,UNKNOWN,E
2,832030069,02/16/2010,20:50:00,02/16/2010,20:50:00,102.0,02/16/2010,105.0,ROBBERY,375.0,"ROBBERY,PHARMACY",COMPLETED,FELONY,QUEENS,INSIDE,GROCERY/BODEGA,N.Y. POLICE DEPT,0.0,,,,1031117.0,194344.0,,,,,40.69999,-73.830978,"(40.699990268, -73.830977746)",PATROL BORO QUEENS SOUTH,,,UNKNOWN,D
3,538308771,11/10/2009,16:35:00,11/10/2009,16:45:00,79.0,11/10/2009,341.0,PETIT LARCENY,333.0,"LARCENY,PETIT FROM STORE-SHOPL",COMPLETED,MISDEMEANOR,BROOKLYN,INSIDE,FOOD SUPERMARKET,N.Y. POLICE DEPT,0.0,,,,996722.0,187385.0,,,,,40.681005,-73.955035,"(40.681004729, -73.955034577)",PATROL BORO BKLYN NORTH,,,UNKNOWN,D
4,965693192,04/11/2006,09:30:00,04/11/2006,10:00:00,123.0,04/25/2006,112.0,THEFT-FRAUD,739.0,"FRAUD,UNCLASSIFIED-FELONY",COMPLETED,FELONY,STATEN ISLAND,,COMMERCIAL BUILDING,N.Y. POLICE DEPT,0.0,,,,,,,,,,,,,PATROL BORO STATEN ISLAND,,25-44,WHITE,M


In [59]:
# Transform and load the data onto a dataframe
# We need to match our police precinct json file [Dataset B] with the NY crime statistics file [Dataset A] by the precinct column
# Again, we need to remember to convert the Precinct column to STRING, as Folium cannot key on integers.

manhattan_crim = manhattan_crim.groupby(['ADDR_PCT_CD','BORO_NM']).count()
manhattan_crim = manhattan_crim.reset_index()
manhattan_crim = manhattan_crim.rename(columns={'ADDR_PCT_CD':'Precinct','BORO_NM':'Borough','CMPLNT_NUM':'Count'})
manhattan_crim = manhattan_crim.loc[:,'Precinct':'Count']
manhattan_crim = manhattan_crim.astype({"Precinct":'int',"Borough":'str',"Count":'int64'}) 
manhattan_crim = manhattan_crim.astype({"Precinct":'str'})
manhattan_crim.head()

Unnamed: 0,Precinct,Borough,Count
0,1,MANHATTAN,13658
1,5,BROOKLYN,1
2,5,MANHATTAN,9763
3,6,BROOKLYN,1
4,6,MANHATTAN,12245


In [60]:
manhattan_crim.dtypes

Precinct    object
Borough     object
Count        int64
dtype: object

### Plot the consolidated University of Melbourne map

In [61]:
manhattan_geo = 'Police Precincts.geojson'

manhattan_crim_map = folium.Map(location=[40.8079, -73.9618], zoom_start=12)
manhattan_crim_map.choropleth(
    geo_data=manhattan_geo,
    data=manhattan_crim,
    columns=['Precinct','Count'],
    key_on='feature.properties.precinct',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    threshold_scale=[0,6000,12000,18000,24000,30000],
    legend_name='Crime Rate in New York City',
    reset=True
)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add Columbia University to the map
# To further refine our search for ideal neighbourhood clusters within walking distance from our university, 
# we mark out a 1-km (0.62 mile) radius from the university by using the .Circle method and setting radius to 1000
folium.features.Circle(
    [40.8079, -73.9618],
    radius=1000,
    color='steelblue',
    popup='Columbia University',
    fill = True,
    fill_color = 'steelblue',
    fill_opacity = 0.5
).add_to(manhattan_crim_map)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster],
        fill=True,
        fill_color=rainbow[cluster],
        fill_opacity=0.7).add_to(manhattan_crim_map)

# display map
manhattan_crim_map

# Thanks for your time, and have a great week! :)