# The Battle of Neighbourhoods

## Importing libraries for webscraping

In [2]:
import bs4 as bs
import urllib.request
from bs4 import BeautifulSoup
import requests
import pandas as pd

## Scraping table from Wikipedia

In [3]:
url='https://en.wikipedia.org/wiki/Parishes_of_Barbados'

source = urllib.request.urlopen(url).read()
soup = bs.BeautifulSoup(source,'html.parser')

table = soup.find('table', 'wikitable sortable')
table_rows = table.find_all('tr')

list_html = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        list_html.append(row)

## Convert table to dataframe

In [4]:
columns = ['Nr.', 'Parish', 'Official Long Name', 'Capitals', 'Land Area', 'Population (2010 Census)', 'Density', 'Historic Vestry']
df = pd.DataFrame(list_html, columns=columns)
df.set_index('Nr.')

Unnamed: 0_level_0,Parish,Official Long Name,Capitals,Land Area,Population (2010 Census),Density,Historic Vestry
Nr.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Christ Church,The Parish of Christ Church[6],Oistins,57.0,54336,868.4,Lead church of the Parish.
2,St. Andrew,The Parish of Saint Andrew[7],Greenland,36.0,5139,145.9,Lead church of the Parish.
3,St. George,The Parish of Saint George,Bulkeley,44.0,19767,406.1,
4,St. James,The Parish of Saint James[7],Holetown,31.0,28498,733.6,
5,St. John,The Parish of Saint John,Four Roads,34.0,8963,261.0,Lead church of the Parish.
6,St. Joseph,The Parish of Saint Joseph[7],Bathsheba,26.0,6620,261.7,Lead church of the Parish.
7,St. Lucy,The Parish of Saint Lucy[7],Crab Hill,36.0,9758,259.1,
8,St. Michael,The Parish of Saint Michael[7],Bridgetown,39.0,88529,2145.7,
9,St. Peter,The Parish of Saint Peter,Speightstown,34.0,11300,314.7,Lead church of the Parish.
10,St. Philip,The Parish of Saint Philip[7],Crane,60.0,30662,342.3,


In [5]:
df.shape

(12, 8)

## Cleaning the Dataframe

### Dropping Unnecessary Columns

In [6]:
df = df.drop(['Nr.', 'Official Long Name', 'Land Area', 'Density', 'Historic Vestry'], axis=1)
df

Unnamed: 0,Parish,Capitals,Population (2010 Census)
0,Christ Church,Oistins,54336
1,St. Andrew,Greenland,5139
2,St. George,Bulkeley,19767
3,St. James,Holetown,28498
4,St. John,Four Roads,8963
5,St. Joseph,Bathsheba,6620
6,St. Lucy,Crab Hill,9758
7,St. Michael,Bridgetown,88529
8,St. Peter,Speightstown,11300
9,St. Philip,Crane,30662


### Dropping Bottom Row 

In [7]:
df = df.drop([11])
df

Unnamed: 0,Parish,Capitals,Population (2010 Census)
0,Christ Church,Oistins,54336
1,St. Andrew,Greenland,5139
2,St. George,Bulkeley,19767
3,St. James,Holetown,28498
4,St. John,Four Roads,8963
5,St. Joseph,Bathsheba,6620
6,St. Lucy,Crab Hill,9758
7,St. Michael,Bridgetown,88529
8,St. Peter,Speightstown,11300
9,St. Philip,Crane,30662


### Importing libraries for Segmenting and Clustering

In [8]:
from geopy.geocoders import Nominatim
import numpy as np
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import json

### Retrieving the Longitudes & Latitudes of Each Parish

In [9]:
lat_list= []
long_list= []

for parish in zip(df['Parish']):
    Parish = '{}, Barbados'.format(parish)
    geolocate = Nominatim(user_agent="bdos_botn")
    it_locate= geolocate.geocode(Parish)
    lati= it_locate.latitude
    longi = it_locate.longitude
    lat_list.append(lati)
    long_list.append(longi)

In [10]:
ll = np.array([lat_list, long_list]).transpose()
ll

array([[ 13.1500331 , -59.5250305 ],
       [ 13.24890535, -59.57571419],
       [ 13.1469035 , -59.5478922 ],
       [ 13.1878895 , -59.62810396],
       [ 13.15791345, -59.50431584],
       [ 13.19763575, -59.54542302],
       [ 13.30422945, -59.61431954],
       [ 13.1180295 , -59.60098579],
       [ 13.2614031 , -59.61996712],
       [ 13.1500331 , -59.5250305 ],
       [ 13.1793733 , -59.58682352]])

In [11]:
columns_ll = ['Latitude', 'Longitude']
ll_df = pd.DataFrame(ll, columns = columns_ll)
ll_df

Unnamed: 0,Latitude,Longitude
0,13.150033,-59.52503
1,13.248905,-59.575714
2,13.146904,-59.547892
3,13.18789,-59.628104
4,13.157913,-59.504316
5,13.197636,-59.545423
6,13.304229,-59.61432
7,13.118029,-59.600986
8,13.261403,-59.619967
9,13.150033,-59.52503


### Merging Longitude & Latitude Dataframe with Main Dataframe

In [12]:
df_n = pd.concat([df, ll_df], axis = 1)
df_n

Unnamed: 0,Parish,Capitals,Population (2010 Census),Latitude,Longitude
0,Christ Church,Oistins,54336,13.150033,-59.52503
1,St. Andrew,Greenland,5139,13.248905,-59.575714
2,St. George,Bulkeley,19767,13.146904,-59.547892
3,St. James,Holetown,28498,13.18789,-59.628104
4,St. John,Four Roads,8963,13.157913,-59.504316
5,St. Joseph,Bathsheba,6620,13.197636,-59.545423
6,St. Lucy,Crab Hill,9758,13.304229,-59.61432
7,St. Michael,Bridgetown,88529,13.118029,-59.600986
8,St. Peter,Speightstown,11300,13.261403,-59.619967
9,St. Philip,Crane,30662,13.150033,-59.52503


### Correcting Errors in Latitude & Longitude Columns

In [13]:
df_n.iloc[0,3] = 13.06667 # Christ Church's Lat
df_n.iloc[0,4] = -59.53333 # Christ Church's Lon
df_n.iloc[9,3] = 13.122874 # St. Philip's Lat
df_n.iloc[9,4] = -59.469571 # St. Philip's Lon
df_n

Unnamed: 0,Parish,Capitals,Population (2010 Census),Latitude,Longitude
0,Christ Church,Oistins,54336,13.06667,-59.53333
1,St. Andrew,Greenland,5139,13.248905,-59.575714
2,St. George,Bulkeley,19767,13.146904,-59.547892
3,St. James,Holetown,28498,13.18789,-59.628104
4,St. John,Four Roads,8963,13.157913,-59.504316
5,St. Joseph,Bathsheba,6620,13.197636,-59.545423
6,St. Lucy,Crab Hill,9758,13.304229,-59.61432
7,St. Michael,Bridgetown,88529,13.118029,-59.600986
8,St. Peter,Speightstown,11300,13.261403,-59.619967
9,St. Philip,Crane,30662,13.122874,-59.469571


## Exploratory Analysis

### Use geopy library to get the latitude and longitude values of Barbados

In [14]:
Bdos = 'Barbados'

geolocator = Nominatim(user_agent="bdos_bon")
location = geolocator.geocode(Bdos)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Barbados are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Barbados are 13.1500331, -59.5250305.


### Creating Map with Folium Library

In [15]:
Bdos_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, cap, par in zip(df_n['Latitude'], df_n['Longitude'], df_n['Capitals'], df_n['Parish']):
    label = '{}, {}'.format(cap, par)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Bdos_map)  
    
Bdos_map

### Define Foursquare Credentials and Version

In [16]:
CLIENT_ID = 'TL2XEQBNMUHYC4KWR2Q2HVYLTIIP4BMSTXG5QKRR3V0ACZPH' 
CLIENT_SECRET = 'TPMNE0ECMDR0JYJWBCPA0RW0Q1HGQQZ22JN1OI0MUIRR3GDJ' 
VERSION = '20180605'

### Connecting to Foursuare

In [17]:
LIMIT = 100 
radius = 17000 

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=TL2XEQBNMUHYC4KWR2Q2HVYLTIIP4BMSTXG5QKRR3V0ACZPH&client_secret=TPMNE0ECMDR0JYJWBCPA0RW0Q1HGQQZ22JN1OI0MUIRR3GDJ&v=20180605&ll=13.1500331,-59.5250305&radius=17000&limit=100'

In [18]:
results = requests.get(url).json()

### Creating Function that Searches for Venue Categories

In [19]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [20]:
# clean the json and structure it into a pandas dataframe.
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Village Bar,BBQ Joint,13.169778,-59.526597
1,Bathsheba,Surf Spot,13.21243,-59.520481
2,Bushy Park,Racetrack,13.137394,-59.467449
3,Mr. Delicious Snack Bar,Food Truck,13.078572,-59.528693
4,Hunte's Gardens,Garden,13.193272,-59.550569


### Creating a Function to input Venue Information into Dataframe

In [21]:
# Explore Neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Parish', 
                  'Parish Latitude', 
                  'Parish Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
# get venues in Barbados
barbados_venues = getNearbyVenues(names=df_n['Parish'],
                                   latitudes=df_n['Latitude'],
                                   longitudes=df_n['Longitude']
                                  )

barbados_venues.head(10)

Christ Church
St. Andrew
St. George
St. James
St. John
St. Joseph
St. Lucy
St. Michael
St. Peter
St. Philip
St. Thomas


Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Christ Church,13.06667,-59.53333,Enterprise/Miami Beach,13.060394,-59.539852,Beach
1,Christ Church,13.06667,-59.53333,Oistins Fish Fry,13.063553,-59.542639,Seafood Restaurant
2,Christ Church,13.06667,-59.53333,Pat's Place,13.063469,-59.54263,Caribbean Restaurant
3,Christ Church,13.06667,-59.53333,Fred's Bar,13.06356,-59.542452,Caribbean Restaurant
4,Christ Church,13.06667,-59.53333,Mr. Delicious Snack Bar,13.078572,-59.528693,Food Truck
5,Christ Church,13.06667,-59.53333,Cafe Luna,13.059421,-59.538004,Restaurant
6,Christ Church,13.06667,-59.53333,chillin & grillin,13.063441,-59.542947,Seafood Restaurant
7,Christ Church,13.06667,-59.53333,Oistins Fish Market,13.063556,-59.542722,Fish Market
8,Christ Church,13.06667,-59.53333,Oistins Bay Garden,13.063511,-59.542559,Other Nightlife
9,Christ Church,13.06667,-59.53333,Uncle George's Fish Net Grill,13.063589,-59.542981,Seafood Restaurant


In [23]:
barbados_venues.loc[:, 'Venue Category'] # Examining all the Venue Categories pulled from Foursquare

0                     Beach
1        Seafood Restaurant
2      Caribbean Restaurant
3      Caribbean Restaurant
4                Food Truck
               ...         
416                  Market
417                     Bar
418                  Garden
419       Electronics Store
420    Fast Food Restaurant
Name: Venue Category, Length: 421, dtype: object

### Pulling Information of Interest from Data pulled from Foursquare

In [24]:
ics_df = barbados_venues.loc[barbados_venues['Venue Category'] == 'Ice Cream Shop']
ics_df

Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
266,St. Michael,13.118029,-59.600986,Chilly Moos Ice Cream Treatery,13.075227,-59.589527,Ice Cream Shop


### Searching for data from Popular Restaurant Chain "Chefette" known for its Icecream

In [35]:
chef = barbados_venues.loc[barbados_venues['Venue'] == 'Chefette']
chef

Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
62,Christ Church,13.06667,-59.53333,Chefette,13.064235,-59.543791,Fast Food Restaurant
241,St. Michael,13.118029,-59.600986,Chefette,13.091495,-59.585205,Fast Food Restaurant
344,St. Peter,13.261403,-59.619967,Chefette,13.249799,-59.641827,Burger Joint


In [36]:
chef = chef.drop([62, 344])

### Concatenating the Two Dataframes containing Information about Ice Cream Shops

In [47]:
ics = pd.concat([ics_df, chef])
ics

Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
266,St. Michael,13.118029,-59.600986,Chilly Moos Ice Cream Treatery,13.075227,-59.589527,Ice Cream Shop
241,St. Michael,13.118029,-59.600986,Chefette,13.091495,-59.585205,Fast Food Restaurant


### Reading in data manually scraped from Foursquare and Google Maps

In [29]:
MsN_V = pd.read_csv('MsN_V.csv')
MsN_V

Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Christ Church,13.06667,-59.53333,Chefette,13.074406,-59.588523,Ice Cream Shop
1,St. James,13.18789,-59.628104,Chefette,13.200587,-59.615367,Ice Cream Shop
2,St. Philip,13.122874,-59.469571,Chefette,13.11716,-59.47618,Ice Cream Shop
3,St. Michael,13.118029,-59.600986,Chefette,13.117091,-59.617039,Ice Cream Shop
4,St. Michael,13.118029,-59.600986,Chefette,13.09697,-59.615466,Ice Cream Shop
5,St. Thomas,13.179373,-59.586824,Chefette,13.154219,-59.611567,Ice Cream Shop
6,St. George,13.146904,-59.547892,Chefette,13.129086,-59.569922,Ice Cream Shop
7,Christ Church,13.06667,-59.53333,Chefette,13.080162,-59.487813,Ice Cream Shop
8,St. James,13.18789,-59.628104,Chillz Delight Treats,13.162094,-59.636862,Ice Cream Shop
9,Christ Church,13.06667,-59.53333,Cafe de Paris,13.065909,-59.563326,Ice Cream Shop


### Concatenating the manually scraped data with "ics" Dataframe

In [48]:
ic_shops = pd.concat([ics, MsN_V])
ic_shops

Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
266,St. Michael,13.118029,-59.600986,Chilly Moos Ice Cream Treatery,13.075227,-59.589527,Ice Cream Shop
241,St. Michael,13.118029,-59.600986,Chefette,13.091495,-59.585205,Fast Food Restaurant
0,Christ Church,13.06667,-59.53333,Chefette,13.074406,-59.588523,Ice Cream Shop
1,St. James,13.18789,-59.628104,Chefette,13.200587,-59.615367,Ice Cream Shop
2,St. Philip,13.122874,-59.469571,Chefette,13.11716,-59.47618,Ice Cream Shop
3,St. Michael,13.118029,-59.600986,Chefette,13.117091,-59.617039,Ice Cream Shop
4,St. Michael,13.118029,-59.600986,Chefette,13.09697,-59.615466,Ice Cream Shop
5,St. Thomas,13.179373,-59.586824,Chefette,13.154219,-59.611567,Ice Cream Shop
6,St. George,13.146904,-59.547892,Chefette,13.129086,-59.569922,Ice Cream Shop
7,Christ Church,13.06667,-59.53333,Chefette,13.080162,-59.487813,Ice Cream Shop


### Changing Venue Categories to 'Ice Cream Shop'

In [51]:
ic_shops['Venue Category'] = 'Ice Cream Shop'
ic_shops

Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
266,St. Michael,13.118029,-59.600986,Chilly Moos Ice Cream Treatery,13.075227,-59.589527,Ice Cream Shop
241,St. Michael,13.118029,-59.600986,Chefette,13.091495,-59.585205,Ice Cream Shop
0,Christ Church,13.06667,-59.53333,Chefette,13.074406,-59.588523,Ice Cream Shop
1,St. James,13.18789,-59.628104,Chefette,13.200587,-59.615367,Ice Cream Shop
2,St. Philip,13.122874,-59.469571,Chefette,13.11716,-59.47618,Ice Cream Shop
3,St. Michael,13.118029,-59.600986,Chefette,13.117091,-59.617039,Ice Cream Shop
4,St. Michael,13.118029,-59.600986,Chefette,13.09697,-59.615466,Ice Cream Shop
5,St. Thomas,13.179373,-59.586824,Chefette,13.154219,-59.611567,Ice Cream Shop
6,St. George,13.146904,-59.547892,Chefette,13.129086,-59.569922,Ice Cream Shop
7,Christ Church,13.06667,-59.53333,Chefette,13.080162,-59.487813,Ice Cream Shop


### Concatenating the accumulated data with the original Foursquare

In [52]:
barbados_venues_n = pd.concat([barbados_venues, ic_shops])
barbados_venues_n.drop_duplicates()

Unnamed: 0,Parish,Parish Latitude,Parish Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Christ Church,13.066670,-59.533330,Enterprise/Miami Beach,13.060394,-59.539852,Beach
1,Christ Church,13.066670,-59.533330,Oistins Fish Fry,13.063553,-59.542639,Seafood Restaurant
2,Christ Church,13.066670,-59.533330,Pat's Place,13.063469,-59.542630,Caribbean Restaurant
3,Christ Church,13.066670,-59.533330,Fred's Bar,13.063560,-59.542452,Caribbean Restaurant
4,Christ Church,13.066670,-59.533330,Mr. Delicious Snack Bar,13.078572,-59.528693,Food Truck
...,...,...,...,...,...,...,...
6,St. George,13.146904,-59.547892,Chefette,13.129086,-59.569922,Ice Cream Shop
7,Christ Church,13.066670,-59.533330,Chefette,13.080162,-59.487813,Ice Cream Shop
8,St. James,13.187890,-59.628104,Chillz Delight Treats,13.162094,-59.636862,Ice Cream Shop
9,Christ Church,13.066670,-59.533330,Cafe de Paris,13.065909,-59.563326,Ice Cream Shop


### Preparing the Data for Clustering

In [53]:
# analyze each neighborhood

# one hot encoding
bdosV_onehot = pd.get_dummies(barbados_venues_n[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bdosV_onehot['Parish'] = barbados_venues_n['Parish'] 

# move neighborhood column to the first column
fixed_columns = [bdosV_onehot.columns[-1]] + list(bdosV_onehot.columns[:-1])
bdosV_onehot = bdosV_onehot[fixed_columns]

bdosV_onehot.head()

Unnamed: 0,Parish,Airport Terminal,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,...,Spa,Sports Bar,Steakhouse,Supermarket,Surf Spot,Taco Place,Tea Room,Trail,Vegetarian / Vegan Restaurant,Zoo
0,Christ Church,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Christ Church,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Christ Church,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Christ Church,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Christ Church,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [54]:
bdos_grouped = bdosV_onehot.groupby('Parish').mean().reset_index()
bdos_grouped

Unnamed: 0,Parish,Airport Terminal,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,...,Spa,Sports Bar,Steakhouse,Supermarket,Surf Spot,Taco Place,Tea Room,Trail,Vegetarian / Vegan Restaurant,Zoo
0,Christ Church,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,...,0.0,0.010101,0.010101,0.030303,0.020202,0.0,0.0,0.0,0.0,0.0
1,St. Andrew,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1
2,St. George,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.133333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,St. James,0.0,0.028986,0.0,0.014493,0.014493,0.0,0.0,0.0,0.014493,...,0.0,0.0,0.0,0.028986,0.0,0.014493,0.0,0.0,0.0,0.0
4,St. John,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,St. Joseph,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,...,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.05,0.0,0.0
6,St. Lucy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1
7,St. Michael,0.0,0.009615,0.0,0.0,0.019231,0.0,0.009615,0.019231,0.038462,...,0.0,0.019231,0.0,0.019231,0.0,0.009615,0.009615,0.009615,0.009615,0.0
8,St. Peter,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,...,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333
9,St. Philip,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0


In [55]:
bdos_grouped_ic = bdos_grouped[['Parish', 'Ice Cream Shop']]
bdos_grouped_ic

Unnamed: 0,Parish,Ice Cream Shop
0,Christ Church,0.040404
1,St. Andrew,0.0
2,St. George,0.066667
3,St. James,0.028986
4,St. John,0.0
5,St. Joseph,0.0
6,St. Lucy,0.0
7,St. Michael,0.048077
8,St. Peter,0.0
9,St. Philip,0.038462


### Setting K-Mean Clusters

In [56]:
# set number of clusters
kclusters = 5

bdos_grouped_clustering = bdos_grouped_ic.drop('Parish', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bdos_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 0, 2, 3, 0, 0, 0, 4, 0, 1])

In [57]:
bdos_grouped_clustering

Unnamed: 0,Ice Cream Shop
0,0.040404
1,0.0
2,0.066667
3,0.028986
4,0.0
5,0.0
6,0.0
7,0.048077
8,0.0
9,0.038462


In [58]:
# add clustering labels
bdos_grouped_ic.insert(0, 'Cluster Labels', kmeans.labels_)

bdos_ic = df_n

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
bdos_ic = bdos_ic.join(bdos_grouped_ic.set_index('Parish'), on='Parish')


In [59]:
bdos_ic

Unnamed: 0,Parish,Capitals,Population (2010 Census),Latitude,Longitude,Cluster Labels,Ice Cream Shop
0,Christ Church,Oistins,54336,13.06667,-59.53333,1,0.040404
1,St. Andrew,Greenland,5139,13.248905,-59.575714,0,0.0
2,St. George,Bulkeley,19767,13.146904,-59.547892,2,0.066667
3,St. James,Holetown,28498,13.18789,-59.628104,3,0.028986
4,St. John,Four Roads,8963,13.157913,-59.504316,0,0.0
5,St. Joseph,Bathsheba,6620,13.197636,-59.545423,0,0.0
6,St. Lucy,Crab Hill,9758,13.304229,-59.61432,0,0.0
7,St. Michael,Bridgetown,88529,13.118029,-59.600986,4,0.048077
8,St. Peter,Speightstown,11300,13.261403,-59.619967,0,0.0
9,St. Philip,Crane,30662,13.122874,-59.469571,1,0.038462


In [60]:
ic_shops['Parish'] = bdos_grouped_ic['Parish']

In [61]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster, popl in zip(bdos_ic['Latitude'], bdos_ic['Longitude'], bdos_ic['Parish'], bdos_ic['Cluster Labels'], bdos_ic['Population (2010 Census)']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster) + ' Population: ' + str(popl), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [62]:
bdos_ic.loc[bdos_ic['Cluster Labels'] == 0, bdos_ic.columns[[1] + list(range(5, bdos_ic.shape[1]))]]

Unnamed: 0,Capitals,Cluster Labels,Ice Cream Shop
1,Greenland,0,0.0
4,Four Roads,0,0.0
5,Bathsheba,0,0.0
6,Crab Hill,0,0.0
8,Speightstown,0,0.0


### Cluster 2

In [63]:
bdos_ic.loc[bdos_ic['Cluster Labels'] == 1, bdos_ic.columns[[1] + list(range(5, bdos_ic.shape[1]))]]

Unnamed: 0,Capitals,Cluster Labels,Ice Cream Shop
0,Oistins,1,0.040404
9,Crane,1,0.038462


### Cluster 3

In [64]:
bdos_ic.loc[bdos_ic['Cluster Labels'] == 2, bdos_ic.columns[[1] + list(range(5, bdos_ic.shape[1]))]]

Unnamed: 0,Capitals,Cluster Labels,Ice Cream Shop
2,Bulkeley,2,0.066667


### Cluster 4

In [65]:
bdos_ic.loc[bdos_ic['Cluster Labels'] == 3, bdos_ic.columns[[1] + list(range(5, bdos_ic.shape[1]))]]

Unnamed: 0,Capitals,Cluster Labels,Ice Cream Shop
3,Holetown,3,0.028986
10,Hillaby,3,0.023256


### Cluster 5

In [66]:
bdos_ic.loc[bdos_ic['Cluster Labels'] == 4, bdos_ic.columns[[1] + list(range(5, bdos_ic.shape[1]))]]

Unnamed: 0,Capitals,Cluster Labels,Ice Cream Shop
7,Bridgetown,4,0.048077
