# First find out which venue categories are most commonly present in proximity to restaurants.
# Start with a query by using the GPS coordinates with a large radius and restaurants.
# Then start another query with each of the identified restaurants to get closeby venues
# 
# Then analyze the neighborhoods and make a dataframe with relative frequencies and transform that into a frame of the 
# 5 most common venue categories in that neighborhood.
# Add a line with the restaurant's most common venue neighbors and use k-means to find out which neighborhood it clusters with.
# This should be the most favorable neighborhood based on the data I fed in.

# Analyze restaurant surroundings:

In [None]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

# !conda install -c conda-forge folium=0.5.0 --yes
# import folium # plotting library

print('Folium installed')
print('Libraries imported.')

In [3]:
CLIENT_ID = '5KDQHQJYMX55H4M4MUTD31PREA33CGFDE4H50VU3CRVFGRWI' # your Foursquare ID
CLIENT_SECRET = 'RRUGEO0ZXPQUW1UCVPNU2Y1CVBUHZIDZVDGSLMCXXIEX4VRE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5KDQHQJYMX55H4M4MUTD31PREA33CGFDE4H50VU3CRVFGRWI
CLIENT_SECRET:RRUGEO0ZXPQUW1UCVPNU2Y1CVBUHZIDZVDGSLMCXXIEX4VRE


In [4]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Restaurant name', 
                  'Restaurant Latitude', 
                  'Restaurant Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [5]:
def getLatLong (address):
    #returns latitude and longitude using geopy.geocoders.Nominatim
    geolocator = Nominatim(user_agent="foursquare_agent") # Nominatim has to be instantialized
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    return latitude, longitude

In [56]:
latidude, longitude = getLatLong ("San Leandro, CA")
print (latitude, longitude)

37.7249296 -122.1560768


In [25]:
search_query = 'restaurant'
radius = 2500
LIMIT = 50

In [26]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=5KDQHQJYMX55H4M4MUTD31PREA33CGFDE4H50VU3CRVFGRWI&client_secret=RRUGEO0ZXPQUW1UCVPNU2Y1CVBUHZIDZVDGSLMCXXIEX4VRE&ll=37.7249296,-122.1560768&v=20180605&query=restaurant&radius=2500&limit=50'

In [27]:
# results = requests.get(url).json() #Turn off once data frame is done to reduce Foursquare usage, skip and load from csv
results

{'meta': {'code': 200, 'requestId': '5d5723c52b274a002c8b1c97'},
 'response': {'venues': [{'id': '4bb0fdedf964a520dc703ce3',
    'name': 'New Hong Kong Restaurant',
    'location': {'address': '1750 E 14th St',
     'crossStreet': 'Cross with Williams St.',
     'lat': 37.721591384996906,
     'lng': -122.15109431227279,
     'labeledLatLngs': [{'label': 'display',
       'lat': 37.721591384996906,
       'lng': -122.15109431227279}],
     'distance': 574,
     'postalCode': '94577',
     'cc': 'US',
     'city': 'San Leandro',
     'state': 'CA',
     'country': 'United States',
     'formattedAddress': ['1750 E 14th St (Cross with Williams St.)',
      'San Leandro, CA 94577',
      'United States']},
    'categories': [{'id': '4bf58dd8d48988d145941735',
      'name': 'Chinese Restaurant',
      'pluralName': 'Chinese Restaurants',
      'shortName': 'Chinese',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_',
       'suffix': '.png'},
      'primary': Tr

#### Get relevant part of JSON and transform it into a *pandas* dataframe

In [28]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,...,location.country,location.formattedAddress,location.neighborhood,delivery.id,delivery.url,delivery.provider.name,delivery.provider.icon.prefix,delivery.provider.icon.sizes,delivery.provider.icon.name,venuePage.id
0,4bb0fdedf964a520dc703ce3,New Hong Kong Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1565991877,False,1750 E 14th St,Cross with Williams St.,37.721591,-122.151094,"[{'label': 'display', 'lat': 37.72159138499690...",...,United States,"[1750 E 14th St (Cross with Williams St.), San...",,,,,,,,
1,5a908e1d89b06a235a330a00,Xiao Long Bao Restaurant,"[{'id': '4bf58dd8d48988d108941735', 'name': 'D...",v-1565991877,False,1668 E 14th St,,37.722464,-122.152311,"[{'label': 'display', 'lat': 37.72246446500505...",...,United States,"[1668 E 14th St, San Leandro, CA 94577, United...",Downtown San Leandro,,,,,,,
2,4bba2a5fb35776b05f9bca01,Hidden Wok Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1565991877,False,145 Pelton Center Way,,37.722506,-122.153698,"[{'label': 'display', 'lat': 37.722506, 'lng':...",...,United States,"[145 Pelton Center Way, San Leandro, CA 94577,...",,321121.0,https://www.grubhub.com/restaurant/hidden-wok-...,grubhub,https://fastly.4sqi.net/img/general/cap/,"[40, 50]",/delivery_provider_grubhub_20180129.png,
3,4c33f3627cc0c9b6f7b4f29a,Chang's Gourmet Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1565991877,False,1057 MacArthur Blvd.,,37.731969,-122.139652,"[{'label': 'display', 'lat': 37.73196940062547...",...,United States,"[1057 MacArthur Blvd., Oakland, CA 94610, Unit...",,,,,,,,
4,4bda4cf33904a5938718469e,Lee's Garden Restaurant,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",v-1565991877,False,21 Thornton St,,37.72176,-122.152144,"[{'label': 'display', 'lat': 37.72175969929383...",...,United States,"[21 Thornton St, San Leandro, CA 94577, United...",,1257315.0,https://www.grubhub.com/restaurant/lees-garden...,grubhub,https://fastly.4sqi.net/img/general/cap/,"[40, 50]",/delivery_provider_grubhub_20180129.png,


#### Define information of interest and filter dataframe

In [29]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,crossStreet,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,neighborhood,id
0,New Hong Kong Restaurant,Chinese Restaurant,1750 E 14th St,Cross with Williams St.,37.721591,-122.151094,"[{'label': 'display', 'lat': 37.72159138499690...",574,94577.0,US,San Leandro,CA,United States,"[1750 E 14th St (Cross with Williams St.), San...",,4bb0fdedf964a520dc703ce3
1,Xiao Long Bao Restaurant,Dumpling Restaurant,1668 E 14th St,,37.722464,-122.152311,"[{'label': 'display', 'lat': 37.72246446500505...",430,94577.0,US,San Leandro,CA,United States,"[1668 E 14th St, San Leandro, CA 94577, United...",Downtown San Leandro,5a908e1d89b06a235a330a00
2,Hidden Wok Restaurant,Chinese Restaurant,145 Pelton Center Way,,37.722506,-122.153698,"[{'label': 'display', 'lat': 37.722506, 'lng':...",341,94577.0,US,San Leandro,CA,United States,"[145 Pelton Center Way, San Leandro, CA 94577,...",,4bba2a5fb35776b05f9bca01
3,Chang's Gourmet Restaurant,Chinese Restaurant,1057 MacArthur Blvd.,,37.731969,-122.139652,"[{'label': 'display', 'lat': 37.73196940062547...",1644,94610.0,US,Oakland,CA,United States,"[1057 MacArthur Blvd., Oakland, CA 94610, Unit...",,4c33f3627cc0c9b6f7b4f29a
4,Lee's Garden Restaurant,Chinese Restaurant,21 Thornton St,,37.72176,-122.152144,"[{'label': 'display', 'lat': 37.72175969929383...",494,94577.0,US,San Leandro,CA,United States,"[21 Thornton St, San Leandro, CA 94577, United...",,4bda4cf33904a5938718469e
5,Ana Rosa's Mexican Restaurant,Mexican Restaurant,2089 E 14th St,,37.719217,-122.148588,"[{'label': 'display', 'lat': 37.71921662406495...",916,94577.0,US,San Leandro,CA,United States,"[2089 E 14th St, San Leandro, CA 94577, United...",,4bccb294cc8cd13a2969c1cf
6,Benny's Restaurant,Food,794 E 14th St,,37.729187,-122.15923,"[{'label': 'display', 'lat': 37.729187, 'lng':...",549,94577.0,US,San Leandro,CA,United States,"[794 E 14th St, San Leandro, CA 94577, United ...",,4f325e3919836c91c7d0e81c
7,El Novillo Restaurant,Food,2089 E 14th St,,37.719799,-122.149002,"[{'label': 'display', 'lat': 37.719799, 'lng':...",845,94577.0,US,San Leandro,CA,United States,"[2089 E 14th St, San Leandro, CA 94577, United...",,4f3203d219833175d609dd2b
8,Tsuru Sushi,Sushi Restaurant,1427 E 14th St,,37.724021,-122.154729,"[{'label': 'display', 'lat': 37.72402140861661...",155,94577.0,US,San Leandro,CA,United States,"[1427 E 14th St, San Leandro, CA 94577, United...",,4c3e67dadb3b1b8d16746595
9,Italian & Indian Restaurant,Indian Restaurant,,,37.716157,-122.162057,"[{'label': 'display', 'lat': 37.71615685636459...",1109,,US,San Leandro,CA,United States,"[San Leandro, CA, United States]",,531bc77811d2e8eeb2dd2c5c


In [37]:
proximityFrame = getNearbyVenues(names=dataframe_filtered["name"],
                                   latitudes=dataframe_filtered["lat"],
                                   longitudes=dataframe_filtered["lng"]
                                  )
proximityFrame


New Hong Kong Restaurant
Xiao Long Bao Restaurant
Hidden Wok Restaurant
Chang's Gourmet Restaurant
Lee's Garden Restaurant
Ana Rosa's Mexican Restaurant
Benny's Restaurant
El Novillo Restaurant
Tsuru Sushi
Italian & Indian Restaurant
El Yogurt California's Restaurant
San Vicente Restaurant
Paradiso
Reed's Cajun Creole Restaurant
Ploughman's
Vallarta Mexican Restaurant
Saigon Restaurant #2
Los Cantaros Taqueria
Yummy Yummy
Rositas
21st Amendment Brewery
Sweet Fingers
East Village Seafood Restaurant
Blossom Chinese Restaurant
La Piñata #2
Szechwan Restaurant
Marina Restaurant
Stan's Restaurant
Caliente Mexican Restaurant
Images International Restaurant
Vila Cereja


Unnamed: 0,Restaurant name,Restaurant Latitude,Restaurant Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,New Hong Kong Restaurant,37.721591,-122.151094,The Vine Wine & Tapas,37.722054,-122.153484,Tapas Restaurant
1,New Hong Kong Restaurant,37.721591,-122.151094,Xiao Long Bao Restaurant,37.722464,-122.152311,Dumpling Restaurant
2,New Hong Kong Restaurant,37.721591,-122.151094,Taqueria Los Pericos,37.722982,-122.153389,Mexican Restaurant
3,New Hong Kong Restaurant,37.721591,-122.151094,i-Tea,37.722776,-122.154268,Bubble Tea Shop
4,New Hong Kong Restaurant,37.721591,-122.151094,The Cooler,37.723258,-122.153738,Beer Bar
5,New Hong Kong Restaurant,37.721591,-122.151094,Joaquin Deli,37.722679,-122.153673,Deli / Bodega
6,New Hong Kong Restaurant,37.721591,-122.151094,Washington Club,37.720775,-122.153430,Bar
7,New Hong Kong Restaurant,37.721591,-122.151094,Le Soleil Vietnamese Noodles & Grill,37.723279,-122.153711,Vietnamese Restaurant
8,New Hong Kong Restaurant,37.721591,-122.151094,Ana Rosa's Mexican Restaurant,37.719217,-122.148588,Mexican Restaurant
9,New Hong Kong Restaurant,37.721591,-122.151094,Sushi Delight,37.723774,-122.154916,Sushi Restaurant


In [6]:
# proximityFrame.to_csv("proximityFrame") #Save to csv to save Foursquare calls if I need to load the data again
proximityFrame = pd.read_csv("proximityFrame")

In [7]:
proximityFrame["Venue Category"].value_counts()

Mexican Restaurant               41
Pizza Place                      30
Sushi Restaurant                 27
Bar                              22
Chinese Restaurant               22
                                 ..
Big Box Store                     1
Department Store                  1
Shop & Service                    1
Neighborhood                      1
Vegetarian / Vegan Restaurant     1
Name: Venue Category, Length: 98, dtype: int64

In [20]:
# Replace all categories that are specialized restaurants (i.e.: burger joint, mexican restaurant) with just Restaurant
proximityFrame.loc[proximityFrame["Venue Category"].str.contains("Food"), "Venue Category"] = "Restaurant"
proximityFrame.loc[proximityFrame["Venue Category"].str.contains("Restaurant"), "Venue Category"] = "Restaurant"
proximityFrame.loc[proximityFrame["Venue Category"].str.contains("Pizza Place"), "Venue Category"] = "Restaurant"
proximityFrame.loc[proximityFrame["Venue Category"].str.contains("Burger Joint"), "Venue Category"] = "Restaurant"
proximityFrame

Unnamed: 0.1,Unnamed: 0,Restaurant name,Restaurant Latitude,Restaurant Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,New Hong Kong Restaurant,37.721591,-122.151094,The Vine Wine & Tapas,37.722054,-122.153484,Restaurant
1,1,New Hong Kong Restaurant,37.721591,-122.151094,Xiao Long Bao Restaurant,37.722464,-122.152311,Restaurant
2,2,New Hong Kong Restaurant,37.721591,-122.151094,Taqueria Los Pericos,37.722982,-122.153389,Restaurant
3,3,New Hong Kong Restaurant,37.721591,-122.151094,i-Tea,37.722776,-122.154268,Bubble Tea Shop
4,4,New Hong Kong Restaurant,37.721591,-122.151094,The Cooler,37.723258,-122.153738,Beer Bar
5,5,New Hong Kong Restaurant,37.721591,-122.151094,Joaquin Deli,37.722679,-122.153673,Deli / Bodega
6,6,New Hong Kong Restaurant,37.721591,-122.151094,Washington Club,37.720775,-122.153430,Bar
7,7,New Hong Kong Restaurant,37.721591,-122.151094,Le Soleil Vietnamese Noodles & Grill,37.723279,-122.153711,Restaurant
8,8,New Hong Kong Restaurant,37.721591,-122.151094,Ana Rosa's Mexican Restaurant,37.719217,-122.148588,Restaurant
9,9,New Hong Kong Restaurant,37.721591,-122.151094,Sushi Delight,37.723774,-122.154916,Restaurant


In [8]:
# Find out which venue categories are in close physical proximity to restaurants:
proximityFrame["Venue Category"].value_counts()

Restaurant          244
Bar                  22
Grocery Store        22
Pharmacy             18
Coffee Shop          17
                   ... 
Department Store      1
Vineyard              1
Cosmetics Shop        1
Shop & Service        1
Neighborhood          1
Name: Venue Category, Length: 71, dtype: int64

# Get neighborhood statistics

In [9]:
import csv
with open('San Leandro neighborhoods.txt', 'r') as file:
    reader = csv.reader(file)
    neighborhoods = list(reader)[0]
neighborhoods

['Bay-O-Vista',
 ' Farrelly Pond',
 ' Broadmoor',
 ' Mulford Gardens',
 ' Floresta Gardens',
 ' Washington Manor',
 ' Heron Bay',
 ' Estudillo Estates',
 ' Marina Faire']

In [11]:
# get GPS coordinates for each neighborhood and make list of truples to easily convert to data frame
neighborhoodsWithGPS = list()
for neighborhood in neighborhoods:
    lat, lng = getLatLong(neighborhood)
    neighborhoodsWithGPS.append((neighborhood, lat, lng))
neighborhoodsWithGPS

[('Bay-O-Vista', 37.72717495, -122.132386314574),
 (' Farrelly Pond', 39.879514, -74.175452),
 (' Broadmoor', 51.36839555, -0.77879162927407),
 (' Mulford Gardens', 37.7046526, -122.1796885),
 (' Floresta Gardens', 37.9873511, 14.9097135),
 (' Washington Manor', 39.73432955, -104.979029975045),
 (' Heron Bay', 30.3546444, -88.1302777),
 (' Estudillo Estates', 37.7171421, -122.136648138689),
 (' Marina Faire', -17.54433565, -149.565402644154)]

In [13]:
# neighborhoodsDF = pd.DataFrame(neighborhoodsWithGPS, columns=["Neighborhood", "Latitude", "Longitude"])
# neighborhoodsDF.to_csv("neighborhoodsDF") #using saved csv file, far quicker than re-aquiring GPS data
neighborhoodsDF = pd.read_csv("neighborhoodsDF")
neighborhoodsDF


Unnamed: 0.1,Unnamed: 0,Neighborhood,Latitude,Longitude
0,0,Bay-O-Vista,37.727175,-122.132386
1,1,Farrelly Pond,37.730799,-122.16366
2,2,Broadmoor,37.735208,-122.157487
3,3,Mulford Gardens,37.704653,-122.179688
4,4,Floresta Gardens,37.701668,-122.146796
5,5,Washington Manor,37.685459,-122.150921
6,6,Heron Bay,37.682137,-122.161047
7,7,Estudillo Estates,37.717142,-122.136648
8,8,Marina Faire,37.694906,-122.176441


In [15]:
LIMIT = 50
# Read SL_venues from csv to save foursquare hits
# SL_venues = getNearbyVenues(names=neighborhoodsDF['Neighborhood'],
#                                    latitudes=neighborhoodsDF['Latitude'],
#                                    longitudes=neighborhoodsDF['Longitude']
#                                   )
# SL_venues.to_csv("SL_venues")
SL_venues = pd.from_csv("SL_venues")
SL_venues


 Bay-O-Vista
 Farrelly Pond
 Broadmoor
 Mulford Gardens
 Floresta Gardens
 Washington Manor
 Heron Bay
 Estudillo Estates
 Marina Faire


Unnamed: 0,Restaurant name,Restaurant Latitude,Restaurant Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bay-O-Vista,37.727175,-122.132386,Bay-O-Vista Swim & Tennis Club,37.727056,-122.127006,Pool
1,Bay-O-Vista,37.727175,-122.132386,Chabot Park,37.731065,-122.131040,Park
2,Bay-O-Vista,37.727175,-122.132386,Low Chabot,37.730426,-122.131245,Park
3,Bay-O-Vista,37.727175,-122.132386,Peets Coffee,37.723745,-122.130102,Café
4,Bay-O-Vista,37.727175,-122.132386,Chabot Disc Golf,37.731366,-122.131472,Disc Golf
5,Farrelly Pond,37.730799,-122.163660,Bluebird Pizzeria,37.731800,-122.160904,Pizza Place
6,Farrelly Pond,37.730799,-122.163660,Mike's Feed & Pets,37.729896,-122.159447,Pet Store
7,Farrelly Pond,37.730799,-122.163660,Sweet Fingers,37.731967,-122.161156,Caribbean Restaurant
8,Farrelly Pond,37.730799,-122.163660,Evergreen Nursery,37.730327,-122.167807,Garden Center
9,Farrelly Pond,37.730799,-122.163660,8-Twelve Oriental Market,37.730889,-122.160345,Vietnamese Restaurant


In [25]:
#change header
SL_venues.rename(columns={"Restaurant name":"Neighborhood", "Restaurant Latitude":"Neighborhood Latitude", "Restaurant Longitude":"Neighborhood Longitude"}, inplace=True)
SL_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bay-O-Vista,37.727175,-122.132386,Bay-O-Vista Swim & Tennis Club,37.727056,-122.127006,Pool
1,Bay-O-Vista,37.727175,-122.132386,Chabot Park,37.731065,-122.131040,Park
2,Bay-O-Vista,37.727175,-122.132386,Low Chabot,37.730426,-122.131245,Park
3,Bay-O-Vista,37.727175,-122.132386,Peets Coffee,37.723745,-122.130102,Café
4,Bay-O-Vista,37.727175,-122.132386,Chabot Disc Golf,37.731366,-122.131472,Disc Golf
5,Farrelly Pond,37.730799,-122.163660,Bluebird Pizzeria,37.731800,-122.160904,Restaurant
6,Farrelly Pond,37.730799,-122.163660,Mike's Feed & Pets,37.729896,-122.159447,Pet Store
7,Farrelly Pond,37.730799,-122.163660,Sweet Fingers,37.731967,-122.161156,Restaurant
8,Farrelly Pond,37.730799,-122.163660,Evergreen Nursery,37.730327,-122.167807,Garden Center
9,Farrelly Pond,37.730799,-122.163660,8-Twelve Oriental Market,37.730889,-122.160345,Restaurant


In [None]:
# Replace all categories that are specialized restaurants (i.e.: burger joint, mexican restaurant) with just Restaurant
SL_venues.loc[SL_venues["Venue Category"].str.contains("Food"), "Venue Category"] = "Restaurant"
SL_venues.loc[SL_venues["Venue Category"].str.contains("Restaurant"), "Venue Category"] = "Restaurant"
SL_venues.loc[SL_venues["Venue Category"].str.contains("Pizza Place"), "Venue Category"] = "Restaurant"
SL_venues.loc[SL_venues["Venue Category"].str.contains("Burger Joint"), "Venue Category"] = "Restaurant"
SL_venues

# In order to find locations most similar to where restaurants are located (and therefore at some level succeed),
# I will treat all restaurants as one neighborhood. Therefore I will rename all restaurants to just restaurant and use
# the data from all restaurants as one neighborhood: Restaurant. I will add that neighborhood profile into the data frame
# from actual neighborhoods and then see which neighborhood it clusters with which should be the most similar one.


In [32]:
proximityFrame["Restaurant name"] = "Restaurant"
proximityFrame.rename(columns={"Restaurant name":"Neighborhood", "Restaurant Latitude":"Neighborhood Latitude", "Restaurant Longitude":"Neighborhood Longitude"}, inplace=True)
proximityFrame.head(2)

Unnamed: 0.1,Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,Restaurant,37.721591,-122.151094,The Vine Wine & Tapas,37.722054,-122.153484,Restaurant
1,1,Restaurant,37.721591,-122.151094,Xiao Long Bao Restaurant,37.722464,-122.152311,Restaurant


In [37]:
# drop all columns except neighborhood name in both data frames
proximityFrame = proximityFrame[["Neighborhood","Venue Category"]]
SL_venues = SL_venues[["Neighborhood","Venue Category"]]

In [38]:
# merge both data frames
mergedDF = pd.concat([SL_venues, proximityFrame])
mergedDF

Unnamed: 0,Neighborhood,Venue Category
0,Bay-O-Vista,Pool
1,Bay-O-Vista,Park
2,Bay-O-Vista,Park
3,Bay-O-Vista,Café
4,Bay-O-Vista,Disc Golf
5,Farrelly Pond,Restaurant
6,Farrelly Pond,Pet Store
7,Farrelly Pond,Restaurant
8,Farrelly Pond,Garden Center
9,Farrelly Pond,Restaurant


In [46]:
# one hot encoding
SL_onehot = pd.get_dummies(mergedDF[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
SL_onehot['Neighborhood'] = mergedDF['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [SL_onehot.columns[-1]] + list(SL_onehot.columns[:-1])
SL_onehot = SL_onehot[fixed_columns]

SL_onehot

Unnamed: 0,Yoga Studio,ATM,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,...,Shop & Service,Shopping Mall,Spa,Sporting Goods Shop,Steakhouse,Supplement Shop,Theater,Trail,Video Game Store,Vineyard
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [47]:
SL_grouped = SL_onehot.groupby("Neighborhood").mean().reset_index()
SL_grouped

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,...,Shop & Service,Shopping Mall,Spa,Sporting Goods Shop,Steakhouse,Supplement Shop,Theater,Trail,Video Game Store,Vineyard
0,Bay-O-Vista,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Broadmoor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Estudillo Estates,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
3,Farrelly Pond,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Floresta Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Heron Bay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
6,Marina Faire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Mulford Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Restaurant,0.003268,0.011438,0.001634,0.011438,0.004902,0.009804,0.003268,0.026144,0.017974,...,0.001634,0.003268,0.006536,0.001634,0.003268,0.006536,0.0,0.0,0.009804,0.001634


In [49]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

SL_grouped_clustering = SL_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(SL_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 3, 2, 1, 1, 0, 3, 1, 1], dtype=int32)

In [50]:
# add clustering labels
SL_grouped.insert(0, 'Cluster Labels', kmeans.labels_)

SL_grouped

Unnamed: 0,Cluster Labels,Neighborhood,Yoga Studio,ATM,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,...,Shop & Service,Shopping Mall,Spa,Sporting Goods Shop,Steakhouse,Supplement Shop,Theater,Trail,Video Game Store,Vineyard
0,4,Bay-O-Vista,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,Broadmoor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,Estudillo Estates,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
3,1,Farrelly Pond,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,Floresta Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0,Heron Bay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
6,3,Marina Faire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,1,Mulford Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,1,Restaurant,0.003268,0.011438,0.001634,0.011438,0.004902,0.009804,0.003268,0.026144,...,0.001634,0.003268,0.006536,0.001634,0.003268,0.006536,0.0,0.0,0.009804,0.001634
