# STONA YOGA - SEATTLE, WASHINGTON
**This notebook will be used to explore the feasibility of openig a Stona Yoga Studio in Seattle, Washington - and if feasible, which neighborhood would be best to do so.**

In [1]:
#Install and import libraries
import pandas as pd
import numpy as np

!conda install -c conda-forge folium=0.5.0 --yes
import folium

!conda install -c conda-forge geopy --yes 
!conda install -c conda-forge geocoder --yes
from geopy.geocoders import Nominatim 
import geocoder

import json 
import requests 
from pandas.io.json import json_normalize 

from sklearn.cluster import KMeans

print("Libraries Installed.")

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    ------------------------------------------------------------
                       

## PART I - Data Gathering and Cleaning

## Neighbourhood Data
**Get Seattle North and Central Neighborhood information - Areas, Neigborhoods, Coordinates**

In [2]:
#Import list of neighborhoods in Seattle, WA
url = 'https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Seattle'
df = pd.read_html(url,header=0)
df

[     Unnamed: 0                                  Neighborhood name  \
 0             1                                      North Seattle   
 1             2                                          Broadview   
 2             3                                        Bitter Lake   
 3             4                           North Beach / Blue Ridge   
 4             5                                         Crown Hill   
 5             6                                          Greenwood   
 6             7                                          Northgate   
 7             8                                        Haller Lake   
 8             9                                          Pinehurst   
 9            10                North College Park (Licton Springs)   
 10           11                                         Maple Leaf   
 11           12                                          Lake City   
 12           13                                         Cedar Park   
 13   

In [3]:
#Extract required info from import
sdf = df[0]
sdf = sdf[['Neighborhood name', 'Within larger district']]
sdf.head()

Unnamed: 0,Neighborhood name,Within larger district
0,North Seattle,Seattle
1,Broadview,North Seattle[42]
2,Bitter Lake,North Seattle[42]
3,North Beach / Blue Ridge,North Seattle[42]
4,Crown Hill,North Seattle[42]


In [4]:
#Rename columns for ease of reference
sdf.rename(columns = {'Neighborhood name':'Neighborhood'}, inplace = True)
sdf.rename(columns = {'Within larger district':'District'}, inplace = True)
sdf.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,Neighborhood,District
0,North Seattle,Seattle
1,Broadview,North Seattle[42]
2,Bitter Lake,North Seattle[42]
3,North Beach / Blue Ridge,North Seattle[42]
4,Crown Hill,North Seattle[42]


In [5]:
#Get neighborhoods in Central and North Seattle
cseattle = sdf[sdf.District.str.contains('Central')]
nseattle = sdf[sdf.District.str.contains('North')]
ncseattle = nseattle.append(cseattle).reset_index()
ncseattle = ncseattle.drop('index',1)
ncseattle.head()

Unnamed: 0,Neighborhood,District
0,Broadview,North Seattle[42]
1,Bitter Lake,North Seattle[42]
2,North Beach / Blue Ridge,North Seattle[42]
3,Crown Hill,North Seattle[42]
4,Greenwood,North Seattle[42]


In [6]:
#See how many unique districts are in the df, should be only 3 - North Seattle, Northgate and Central Seattle
ncseattle.District.unique()

array(['North Seattle[42]', 'Northgate[50]',
       'Windermere[42] / North Seattle',
       'North Seattle / Central Seattle', 'Central Seattle[42]',
       'Capitol Hill[94] / Central Seattle',
       'Capitol Hill / Stevens[94] / Central Seattle', 'Central Seattle',
       'Central Area[120]'], dtype=object)

In [7]:
#Get all 'District' values into same formats
ncseattle['District'] = ncseattle['District'].replace(['North Seattle[42]', 'Windermere[42] / North Seattle','North Seattle / Central Seattle'], 'North Seattle')
ncseattle['District'] = ncseattle['District'].replace(['Northgate[50]'], 'Northgate')
ncseattle['District'] = ncseattle['District'].replace(['Central Seattle[42]', 'Capitol Hill[94] / Central Seattle', 'Capitol Hill / Stevens[94] / Central Seattle', 'Central Area[120]'], 'Central Seattle')
ncseattle.head()

Unnamed: 0,Neighborhood,District
0,Broadview,North Seattle
1,Bitter Lake,North Seattle
2,North Beach / Blue Ridge,North Seattle
3,Crown Hill,North Seattle
4,Greenwood,North Seattle


In [8]:
#Clean up Special characters and brackets in Neighborhood column
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('Licton Springs', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('U District', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('CD', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('120', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('[', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace(']', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('(', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace(')', '')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('"', '')
ncseattle.head()

Unnamed: 0,Neighborhood,District
0,Broadview,North Seattle
1,Bitter Lake,North Seattle
2,North Beach / Blue Ridge,North Seattle
3,Crown Hill,North Seattle
4,Greenwood,North Seattle


In [9]:
#Clean up names in the Neighborhoods column
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('North Beach / Blue Ridge', 'North Beach')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('Central Area / Central District', 'Central Area')
ncseattle['Neighborhood'] = ncseattle.Neighborhood.str.replace('Harrison / Denny-Blaine', 'Harrison')
ncseattle.head()

Unnamed: 0,Neighborhood,District
0,Broadview,North Seattle
1,Bitter Lake,North Seattle
2,North Beach,North Seattle
3,Crown Hill,North Seattle
4,Greenwood,North Seattle


### Get the coordinates for each neighborhood

In [10]:
# create empty lists
list_lat = []   
list_long = []

for index, row in ncseattle.iterrows(): # iterate over rows in dataframe

    Neighborhood = row['Neighborhood']
    City = 'Seattle'
    State = 'WA'       
    query = str(Neighborhood)+', '+str(City)+', '+str(State)
    
    geolocator = Nominatim(user_agent="wa_explorer")
    location = geolocator.geocode(query)
    
    if location == None:
        lat = "none"
        long = 'none'
    
    else:
        lat = location.latitude
        long = location.longitude
    
    list_lat.append(lat)
    list_long.append(long)

# create new columns from lists
ncseattle['Latitude'] = list_lat   
ncseattle['Longitude'] = list_long

ncseattle.head()

Unnamed: 0,Neighborhood,District,Latitude,Longitude
0,Broadview,North Seattle,47.72232,-122.360407
1,Bitter Lake,North Seattle,47.726236,-122.348764
2,North Beach,North Seattle,47.69621,-122.392362
3,Crown Hill,North Seattle,47.694715,-122.371459
4,Greenwood,North Seattle,47.690981,-122.354877


**Drop the rows without coordinates**

In [11]:
ncseattle = ncseattle.drop(ncseattle[ncseattle.Latitude == 'none'].index)
ncseattle = ncseattle.reset_index()
ncseattle = ncseattle.drop('index',1)
ncseattle.head()
print(ncseattle.shape)

(43, 4)


  result = method(y)


**Draw a map of all Neighbourhoods**

In [12]:
address = 'North Seattle, WA'

geolocator = Nominatim(user_agent="seattle_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North Seattle are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North Seattle are 47.6607729, -122.29149712901432.


In [13]:
# create map of North Seattle using latitude and longitude values
map_seattle = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(ncseattle['Latitude'], ncseattle['Longitude'], ncseattle['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_seattle)  
    
map_seattle

Because neighborhoods are different sizes, a lot of duplicate venues will show up for different neighborhoods, thus create clusters of neighborhoods

In [14]:
# set number of clusters
kclusters = 25
ncs_drop = ncseattle.drop('Neighborhood', 1)
ncs_clustering = ncs_drop.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ncs_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([10, 10,  2, 15, 20,  4,  4,  3, 19, 18], dtype=int32)

In [15]:
# add clustering labels
ncseattle.insert(0, 'Cluster Labels', kmeans.labels_)

ncseattle.head()

Unnamed: 0,Cluster Labels,Neighborhood,District,Latitude,Longitude
0,10,Broadview,North Seattle,47.72232,-122.360407
1,10,Bitter Lake,North Seattle,47.726236,-122.348764
2,2,North Beach,North Seattle,47.69621,-122.392362
3,15,Crown Hill,North Seattle,47.694715,-122.371459
4,20,Greenwood,North Seattle,47.690981,-122.354877


get the midpoint lat and long for each cluster

In [16]:
ncs_clus_coord = ncseattle[['Cluster Labels','Latitude','Longitude']]
ncs_clus_coord.head()

Unnamed: 0,Cluster Labels,Latitude,Longitude
0,10,47.72232,-122.360407
1,10,47.726236,-122.348764
2,2,47.69621,-122.392362
3,15,47.694715,-122.371459
4,20,47.690981,-122.354877


In [17]:
ncs_clus_coord.rename(columns = {'Latitude':'Cluster Latitude'}, inplace = True)
ncs_clus_coord.rename(columns = {'Longitude':'Cluster Longitude'}, inplace = True)
ncs_clus_coord.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,Cluster Labels,Cluster Latitude,Cluster Longitude
0,10,47.72232,-122.360407
1,10,47.726236,-122.348764
2,2,47.69621,-122.392362
3,15,47.694715,-122.371459
4,20,47.690981,-122.354877


In [18]:
ncs_clus = ncs_clus_coord.groupby('Cluster Labels').mean().reset_index()
ncs_clus

Unnamed: 0,Cluster Labels,Cluster Latitude,Cluster Longitude
0,0,47.639919,-122.335558
1,1,47.690253,-122.290811
2,2,47.69621,-122.392362
3,3,47.602372,-122.331063
4,4,47.716451,-122.327491
5,5,47.63593,-122.280196
6,6,47.646811,-122.399489
7,7,47.678284,-122.338549
8,8,47.611081,-122.292902
9,9,47.676646,-122.263052


In [58]:
# create map of North Seattle using latitude and longitude values
map_clust = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, cluster in zip(ncs_clus['Cluster Latitude'], ncs_clus['Cluster Longitude'], ncs_clus['Cluster Labels']):
    label = '{}'.format(cluster)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_clust)  
    
map_clust

## PART II - Exploratory Data Analysis

## Location Data

In [20]:
#save a back up df
test_df = ncseattle
test_df.head()

Unnamed: 0,Cluster Labels,Neighborhood,District,Latitude,Longitude
0,10,Broadview,North Seattle,47.72232,-122.360407
1,10,Bitter Lake,North Seattle,47.726236,-122.348764
2,2,North Beach,North Seattle,47.69621,-122.392362
3,15,Crown Hill,North Seattle,47.694715,-122.371459
4,20,Greenwood,North Seattle,47.690981,-122.354877


**Define Foursquare Credentials and Version**

In [21]:
CLIENT_ID = 'QJSF02SQUDXX1BIVDCTAVMNL1RTZSFHQGSDM5G34EGVXW2KU' # your Foursquare ID
CLIENT_SECRET = 'TKTRXHW4CBP4JLLKCQ2G5F23JASZUIRD0FFHJ0AXW15CVKRG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QJSF02SQUDXX1BIVDCTAVMNL1RTZSFHQGSDM5G34EGVXW2KU
CLIENT_SECRET:TKTRXHW4CBP4JLLKCQ2G5F23JASZUIRD0FFHJ0AXW15CVKRG


**Define a function to get location data for each cluster from Foursquare**

Perform a search query for venues that are good to use when looking at opening stona yoga

Yoga, Dispensary, Cannabis, Organic, Fitness, Gym, Health Food



Define a function that will run the search for these types of venues

In [22]:
def SearchVenues(names, latitudes, longitudes):
    
    search_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
                    
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            lat, 
            lng, 
            VERSION, 
            search_query, 
            radius, 
            LIMIT)
                
        # make the GET request
        results = requests.get(url).json()["response"]['venues']
        
        # return only relevant information for each nearby venue
        search_list.append([(
            name, 
            lat, 
            lng, 
            v['name'], 
            v['location']['lat'], 
            v['location']['lng'],  
            search_query) for v in results])

    search_venues = pd.DataFrame([venue for search_list in search_list for venue in search_list])
    search_venues.columns = ['Cluser', 
                  'Cluster Latitude', 
                  'Cluster Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Search']
    
    return(search_venues)

### Search for the type of venues per Cluster

Search for **YOGA** studios

In [23]:
#Set default values for search
LIMIT = 15 
radius = 500
search_query = 'yoga'

In [24]:
yoga_search = SearchVenues(names=ncs_clus['Cluster Labels'],
                                   latitudes=ncs_clus['Cluster Latitude'],
                                   longitudes=ncs_clus['Cluster Longitude']
                                  )
print(yoga_search.shape)
yoga_search.sort_values('Venue')

(27, 7)


Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search
26,23,47.659463,-122.334342,Aditi Yoga,47.658396,-122.342085,yoga
0,3,47.602372,-122.331063,Aum Yoga Therapy,47.60217,-122.333915,yoga
24,22,47.650453,-122.349986,Barefoot Yoga Co.,47.646661,-122.350864,yoga
18,22,47.650453,-122.349986,Bikram Yoga Seattle,47.64905,-122.344261,yoga
7,14,47.624183,-122.325858,CorePower Yoga,47.623323,-122.321225,yoga
8,14,47.624183,-122.325858,Corepower Yoga,47.621707,-122.326055,yoga
14,19,47.668213,-122.294562,Dahn Yoga,47.667296,-122.30132,yoga
17,20,47.690981,-122.354877,In & Out Yoga,47.687494,-122.352306,yoga
4,11,47.669302,-122.31348,Inside Out Yoga,47.670158,-122.313217,yoga
2,3,47.602372,-122.331063,Laura's Yoga Laboratory,47.604333,-122.334278,yoga


In [25]:
yoga_dup = yoga_search[yoga_search.Venue.duplicated() == True]
yoga_dup.sort_values('Venue')

Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search
21,22,47.650453,-122.349986,Sweat Hot Yoga,47.651043,-122.350239,yoga


In [26]:
# create map of yoga studios in North Seattle using latitude and longitude values
yoga_map = map_clust

# add markers to map
for lat, lng, venue in zip(yoga_search['Venue Latitude'], yoga_search['Venue Longitude'], yoga_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.4,
        parse_html=False).add_to(yoga_map)  
    
yoga_map

Search for **FITNESS** centres and gyms

In [35]:
#Set default values for search
LIMIT = 15 
radius = 450
search_query = 'fitness,gym'

In [36]:
fit_search = SearchVenues(names=ncs_clus['Cluster Labels'],
                                   latitudes=ncs_clus['Cluster Latitude'],
                                   longitudes=ncs_clus['Cluster Longitude']
                                  )
print(fit_search.shape)
fit_search.sort_values('Venue')

(65, 7)


Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search
14,3,47.602372,-122.331063,1001 4th Avenue Plaza Fitness Center,47.605316,-122.334541,"fitness,gym"
23,11,47.669302,-122.313480,AVA U District Gym,47.665112,-122.314114,"fitness,gym"
16,3,47.602372,-122.331063,Addison on Fourth Apartments,47.599684,-122.328714,"fitness,gym"
50,22,47.650453,-122.349986,Anytime Fitness,47.649190,-122.348250,"fitness,gym"
59,22,47.650453,-122.349986,Cambio Fitness,47.651546,-122.352531,"fitness,gym"
60,22,47.650453,-122.349986,Center for Holistic Health & Fitness,47.651760,-122.353450,"fitness,gym"
25,13,47.719162,-122.295494,City North Apartments Gym,47.719563,-122.294780,"fitness,gym"
24,11,47.669302,-122.313480,Community Fitness,47.673202,-122.317312,"fitness,gym"
27,13,47.719162,-122.295494,Cormaster Fitness,47.719611,-122.294551,"fitness,gym"
12,3,47.602372,-122.331063,Courtyard Seattle Fitness Center,47.602943,-122.332634,"fitness,gym"


In [38]:
fit_dup = fit_search[fit_search.Venue.duplicated() == True]
fit_dup.sort_values('Venue')

Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search
63,22,47.650453,-122.349986,IanFitness,47.648373,-122.355263,"fitness,gym"
64,22,47.650453,-122.349986,IanFitness,47.648373,-122.355263,"fitness,gym"
6,3,47.602372,-122.331063,Seattle Executive Fitness,47.604802,-122.329945,"fitness,gym"


In [39]:
# add markers to map
fit_map = map_clust

for lat, lng, venue in zip(fit_search['Venue Latitude'], fit_search['Venue Longitude'], fit_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='orange',
        fill_opacity=0.4,
        parse_html=False).add_to(fit_map)  
    
fit_map

Search for **CANNABIS DISPENSARY**

In [41]:
#Set default values for search
LIMIT = 15 
radius = 650
search_query = 'cannabis,dispensary'

In [42]:
canna_search = SearchVenues(names=ncs_clus['Cluster Labels'],
                                   latitudes=ncs_clus['Cluster Latitude'],
                                   longitudes=ncs_clus['Cluster Longitude']
                                  )
print(canna_search.shape)
canna_search.sort_values('Venue')

(8, 7)


Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search
6,22,47.650453,-122.349986,Bastyr Dispensary,47.652327,-122.342193,"cannabis,dispensary"
0,3,47.602372,-122.331063,"C3 Law Group, PLLC | Cannabis Law Firm",47.6037,-122.33447,"cannabis,dispensary"
3,14,47.624183,-122.325858,Cannabis Hemp Store,47.621819,-122.317162,"cannabis,dispensary"
2,11,47.669302,-122.31348,Cannabis Training Institute,47.668458,-122.313174,"cannabis,dispensary"
4,20,47.690981,-122.354877,Dockside Cannabis Recreational Dispensary - 85...,47.690238,-122.344989,"cannabis,dispensary"
7,22,47.650453,-122.349986,Hashtag Recreational Cannabis,47.650415,-122.34249,"cannabis,dispensary"
1,8,47.611081,-122.292902,Ponder,47.612715,-122.300765,"cannabis,dispensary"
5,22,47.650453,-122.349986,Satori Recreational Cannabis - Seattle Dispensary,47.6525,-122.354721,"cannabis,dispensary"


In [43]:
canna_dup = canna_search[canna_search.Venue.duplicated() == True]
canna_dup.sort_values('Venue')

Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search


In [44]:
# add markers to map
canna_map = map_clust

for lat, lng, venue in zip(canna_search['Venue Latitude'], canna_search['Venue Longitude'], canna_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='green',
        fill_opacity=0.4,
        parse_html=False).add_to(canna_map)  
    
canna_map

Search for **HEALTH/ORGANIC FOOD**

In [50]:
#Set default values for search
LIMIT = 15 
radius = 450
search_query = 'organic,food'

In [51]:
health_search = SearchVenues(names=ncs_clus['Cluster Labels'],
                                   latitudes=ncs_clus['Cluster Latitude'],
                                   longitudes=ncs_clus['Cluster Longitude']
                                  )
print(health_search.shape)
health_search.sort_values('Venue')

(41, 7)


Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search
9,8,47.611081,-122.292902,'s Food,47.610294,-122.295235,"organic,food"
31,22,47.650453,-122.349986,123 Thai Food,47.648456,-122.35508,"organic,food"
10,9,47.676646,-122.263052,Athena's Food Truck,47.674796,-122.261269,"organic,food"
32,22,47.650453,-122.349986,Bigboys Filipino Food Truck,47.650316,-122.353751,"organic,food"
19,14,47.624183,-122.325858,Boo's Food & Booze!,47.621114,-122.323763,"organic,food"
4,3,47.602372,-122.331063,Celesto Espresso Food Truck,47.598116,-122.330394,"organic,food"
24,20,47.690981,-122.354877,Chaco Canyon Organic Café,47.690106,-122.355137,"organic,food"
6,3,47.602372,-122.331063,Cherry Street Food Bank,47.605325,-122.326767,"organic,food"
37,23,47.659463,-122.334342,Chow Foods Main Office,47.661519,-122.332908,"organic,food"
7,3,47.602372,-122.331063,Columbia Tower Club,47.604507,-122.330484,"organic,food"


In [52]:
health_dup = health_search[health_search.Venue.duplicated() == True]
health_dup.sort_values('Venue')

Unnamed: 0,Cluser,Cluster Latitude,Cluster Longitude,Venue,Venue Latitude,Venue Longitude,Search
18,14,47.624183,-122.325858,QFC,47.622597,-122.321378,"organic,food"
38,23,47.659463,-122.334342,QFC,47.661661,-122.336213,"organic,food"


In [53]:
# add markers to map
health_map = map_clust

for lat, lng, venue in zip(health_search['Venue Latitude'], health_search['Venue Longitude'], health_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_color='yellow',
        fill_opacity=0.4,
        parse_html=False).add_to(health_map)  
    
health_map

Create a map with all location data on it: **Yoga, Fitness, Dispensary & Health Food**

In [59]:
consolidated_map = map_clust

#Add Health markers
for lat, lng, venue in zip(health_search['Venue Latitude'], health_search['Venue Longitude'], health_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_color='yellow',
        fill_opacity=0.4,
        parse_html=False).add_to(consolidated_map)

#Add Fitness markers    
for lat, lng, venue in zip(fit_search['Venue Latitude'], fit_search['Venue Longitude'], fit_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='orange',
        fill_opacity=0.4,
        parse_html=False).add_to(consolidated_map)

#Add Yoga markers    
for lat, lng, venue in zip(yoga_search['Venue Latitude'], yoga_search['Venue Longitude'], yoga_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.4,
        parse_html=False).add_to(consolidated_map)

#Add Dispensary markers    
for lat, lng, venue in zip(canna_search['Venue Latitude'], canna_search['Venue Longitude'], canna_search['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='green',
        fill_opacity=0.4,
        parse_html=False).add_to(consolidated_map)
    
consolidated_map

## Best and Worst Neighborhoods

**Green and Yellow:**

**Only Green:**

**Only Yellow:**

**No Red or Orange:** 2, 6, 7, 16, 21, 24

**Orange and Yellow:** 1, 9, 18

**Only Orange:** 0, 4, 5, 10, 17

**Only Red:** 12, 15, 20(G), 23

**Orange and Red:** 3(G), 8(G), 11(G), 13, 14(G), 19, 22

In [62]:
ncseattle.sort_values('Cluster Labels').head()

Unnamed: 0,Cluster Labels,Neighborhood,District,Latitude,Longitude
26,0,Lake Union,North Seattle,47.639919,-122.335558
34,0,Lake Union,North Seattle,47.639919,-122.335558
11,1,Wedgwood,North Seattle,47.690253,-122.290811
2,2,North Beach,North Seattle,47.69621,-122.392362
35,3,Downtown,Central Seattle,47.604872,-122.333458


### Best Neighborhoods

In [66]:
best = ncseattle[ncseattle['Cluster Labels'].isin([2, 6, 7, 16, 21, 24])]
best

Unnamed: 0,Cluster Labels,Neighborhood,District,Latitude,Longitude
2,2,North Beach,North Seattle,47.69621,-122.392362
12,16,View Ridge,North Seattle,47.679543,-122.274014
16,16,Bryant,North Seattle,47.675233,-122.283493
18,16,Hawthorne Hills,North Seattle,47.672043,-122.275403
19,16,Laurelhurst,North Seattle,47.663432,-122.27707
22,7,Green Lake,North Seattle,47.678284,-122.338549
24,24,Phinney Ridge,North Seattle,47.672131,-122.354031
27,6,Magnolia,Central Seattle,47.646811,-122.399489
31,21,Montlake,Central Seattle,47.641408,-122.303044


### Average Neighborhoods

In [64]:
average = ncseattle[ncseattle['Cluster Labels'].isin([1, 9, 18,0, 4, 5, 10, 17])]
average

Unnamed: 0,Cluster Labels,Neighborhood,District,Latitude,Longitude
0,10,Broadview,North Seattle,47.72232,-122.360407
1,10,Bitter Lake,North Seattle,47.726236,-122.348764
5,4,Northgate,North Seattle,47.713153,-122.321231
6,4,Haller Lake,Northgate,47.719748,-122.333751
9,18,Maple Leaf,Northgate,47.693987,-122.322905
11,1,Wedgwood,North Seattle,47.690253,-122.290811
13,9,Sand Point,North Seattle,47.682359,-122.264312
17,9,Windermere,North Seattle,47.670932,-122.261792
25,17,Ballard,North Seattle,47.676507,-122.386223
26,0,Lake Union,North Seattle,47.639919,-122.335558


### Worst Neighborhoods

In [65]:
worst = ncseattle[ncseattle['Cluster Labels'].isin([12, 15, 20, 23, 3, 8, 11, 13, 14, 19, 22])]
worst

Unnamed: 0,Cluster Labels,Neighborhood,District,Latitude,Longitude
3,15,Crown Hill,North Seattle,47.694715,-122.371459
4,20,Greenwood,North Seattle,47.690981,-122.354877
7,3,Pinehurst,Northgate,47.603832,-122.330062
8,19,North College Park,Northgate,47.660773,-122.291497
10,13,Lake City,North Seattle,47.719162,-122.295494
14,11,Roosevelt,North Seattle,47.677305,-122.313807
15,19,Ravenna,North Seattle,47.675654,-122.297626
20,11,University District,North Seattle,47.661298,-122.313152
21,23,Wallingford,North Seattle,47.659463,-122.334342
23,22,Fremont,North Seattle,47.650453,-122.349986


# PART III - Check type of venues in each of best neighborhoods

In [67]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

In [68]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [69]:
best_venues = getNearbyVenues(names=best['Neighborhood'],
                                   latitudes=best['Latitude'],
                                   longitudes=best['Longitude']
                                  )
best_venues.head()

North Beach
View Ridge
Bryant
Hawthorne Hills
Laurelhurst
Green Lake
Phinney Ridge
Magnolia
Montlake


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,North Beach,47.69621,-122.392362,North Beach Beach Club,47.698958,-122.394127,Beach
1,North Beach,47.69621,-122.392362,Blue Ridge Beach Park,47.699011,-122.394135,Beach
2,North Beach,47.69621,-122.392362,North Beach Park,47.694529,-122.387501,Park
3,North Beach,47.69621,-122.392362,Kayla Kaka,47.694631,-122.397847,Scenic Lookout
4,View Ridge,47.679543,-122.274014,View Ridge Area,47.679516,-122.271654,Park


In [70]:
best_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bryant,2,2,2,2,2,2
Green Lake,8,8,8,8,8,8
Hawthorne Hills,6,6,6,6,6,6
Laurelhurst,5,5,5,5,5,5
Magnolia,4,4,4,4,4,4
Montlake,14,14,14,14,14,14
North Beach,4,4,4,4,4,4
Phinney Ridge,32,32,32,32,32,32
View Ridge,3,3,3,3,3,3


In [71]:
print(best_venues.shape)
print('There are {} uniques categories.'.format(len(best_venues['Venue Category'].unique())))

(78, 7)
There are 44 uniques categories.


In [72]:
# one hot encoding
best_onehot = pd.get_dummies(best_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
best_onehot['Neighborhood'] = best_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [best_onehot.columns[-1]] + list(best_onehot.columns[:-1])
best_onehot = best_onehot[fixed_columns]

best_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Automotive Shop,Bank,Bar,Beach,Bike Shop,Boutique,Bus Line,...,Salon / Barbershop,Scenic Lookout,Tennis Court,Theater,Theme Park Ride / Attraction,Trail,Video Store,Wine Shop,Zoo,Zoo Exhibit
0,North Beach,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,North Beach,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,North Beach,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,North Beach,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,View Ridge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [73]:
best_grouped = best_onehot.groupby('Neighborhood').mean().reset_index()
best_grouped

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Automotive Shop,Bank,Bar,Beach,Bike Shop,Boutique,Bus Line,...,Salon / Barbershop,Scenic Lookout,Tennis Court,Theater,Theme Park Ride / Attraction,Trail,Video Store,Wine Shop,Zoo,Zoo Exhibit
0,Bryant,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0
1,Green Lake,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,...,0.0,0.0,0.125,0.125,0.0,0.125,0.0,0.0,0.0,0.0
2,Hawthorne Hills,0.333333,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Laurelhurst,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Magnolia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
5,Montlake,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,...,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North Beach,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Phinney Ridge,0.0,0.03125,0.0,0.0,0.03125,0.0,0.0,0.03125,0.0,...,0.03125,0.0,0.0,0.0,0.03125,0.03125,0.03125,0.0,0.03125,0.3125
8,View Ridge,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [74]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [75]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
best_venues_sorted = pd.DataFrame(columns=columns)
best_venues_sorted['Neighborhood'] = best_grouped['Neighborhood']

for ind in np.arange(best_grouped.shape[0]):
    best_venues_sorted.iloc[ind, 1:] = return_most_common_venues(best_grouped.iloc[ind, :], num_top_venues)

best_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bryant,Wine Shop,Organic Grocery,Zoo Exhibit,Café,Hardware Store,Grocery Store,Farm,Dessert Shop,Convenience Store,Construction & Landscaping
1,Green Lake,Park,Lake,Trail,Theater,Tennis Court,Beach,Zoo Exhibit,Café,Farm,Dessert Shop
2,Hawthorne Hills,American Restaurant,Bank,Hardware Store,Café,Automotive Shop,Bar,Ice Cream Shop,Asian Restaurant,Grocery Store,Farm
3,Laurelhurst,Pharmacy,Café,Park,Bus Stop,Chinese Restaurant,Hardware Store,Grocery Store,Farm,Dessert Shop,Convenience Store
4,Magnolia,Pool,Bus Stop,Video Store,Coffee Shop,Hardware Store,Grocery Store,Farm,Dessert Shop,Convenience Store,Construction & Landscaping
5,Montlake,Bus Stop,Grocery Store,American Restaurant,Salon / Barbershop,Library,Farm,Park,Coffee Shop,Bus Line,Italian Restaurant
6,North Beach,Beach,Scenic Lookout,Park,Zoo Exhibit,Chinese Restaurant,Hardware Store,Grocery Store,Farm,Dessert Shop,Convenience Store
7,Phinney Ridge,Zoo Exhibit,Pizza Place,Park,Asian Restaurant,Bar,Boutique,Café,Chinese Restaurant,Convenience Store,Dessert Shop
8,View Ridge,Park,Construction & Landscaping,Zoo Exhibit,Italian Restaurant,Hardware Store,Grocery Store,Farm,Dessert Shop,Convenience Store,Coffee Shop
