# Exploring restaurants in LA neighbourhoods for new restaurant set up

In [70]:
#!pip install geopy ~uncomment this if not installed 
#!pip install folium ~uncomment this if not installed
#!pip install wget ~uncomment this if not installed
#!pip install yellowbrick ~uncomment this if not installed

import pandas as pd #allows data analysis by creating dataframe
import numpy as np #allows mathematical operations on arrays
import wget #importing dataset given url
pd.set_option('display.max_columns', None) #to view maximum cols in pandas if huge dataframe
pd.set_option('display.max_rows', None) #to view maximum rows in pandas if huge dataframe

from geopy.geocoders import Nominatim #to create user agent required for geocoding process 
import folium #map rendering library

import requests # library to handle requests
import json #to handle json files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

#get all plotting libraries
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
import seaborn as sns
import matplotlib.pyplot as plt

#import clustering(KMeans) libraries
import sklearn
import yellowbrick
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer

from sklearn.preprocessing import StandardScaler #for scaling features, normalise data 

print('Libraries imported')


Libraries imported


In [5]:
#import data from below given url, has all information about LA neighbourhoods
la_data = wget.download('https://usc.data.socrata.com/resource/9utn-waje.json')
print('Data downloaded!')

Data downloaded!


In [6]:
with open(la_data) as json_data:
    la_data = json.load(json_data)

In [7]:
df = pd.DataFrame(la_data)
df.head()

Unnamed: 0,set,slug,the_geom,kind,external_i,name,display_na,sqmi,type,latitude,longitude,location
0,L.A. County Neighborhoods (Current),acton,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),acton,Acton,Acton L.A. County Neighborhood (Current),39.3391089485,unincorporated-area,-118.16981019229348,34.49735523924085,POINT(34.497355239240846 -118.16981019229348)
1,L.A. County Neighborhoods (Current),adams-normandie,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),adams-normandie,Adams-Normandie,Adams-Normandie L.A. County Neighborhood (Curr...,0.805350187789,segment-of-a-city,-118.30020800000013,34.03146149912416,POINT(34.031461499124156 -118.30020800000011)
2,L.A. County Neighborhoods (Current),agoura-hills,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),agoura-hills,Agoura Hills,Agoura Hills L.A. County Neighborhood (Current),8.14676029818,standalone-city,-118.75988450000015,34.146736499122795,POINT(34.146736499122795 -118.75988450000015)
3,L.A. County Neighborhoods (Current),agua-dulce,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),agua-dulce,Agua Dulce,Agua Dulce L.A. County Neighborhood (Current),31.4626319451,unincorporated-area,-118.3171036690717,34.50492699979684,POINT(34.504926999796837 -118.3171036690717)
4,L.A. County Neighborhoods (Current),alhambra,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),alhambra,Alhambra,Alhambra L.A. County Neighborhood (Current),7.62381430605,standalone-city,-118.1365120000002,34.08553899912357,POINT(34.085538999123571 -118.13651200000021)


In [8]:
#remove non-important cols from the dataframe w.r.t to the project requirements
df.drop(columns=['display_na', 'external_i', 'kind', 'location','set','slug','the_geom','type'], axis = 1, inplace = True)
df.head()

Unnamed: 0,name,sqmi,latitude,longitude
0,Acton,39.3391089485,-118.16981019229348,34.49735523924085
1,Adams-Normandie,0.805350187789,-118.30020800000013,34.03146149912416
2,Agoura Hills,8.14676029818,-118.75988450000015,34.146736499122795
3,Agua Dulce,31.4626319451,-118.3171036690717,34.50492699979684
4,Alhambra,7.62381430605,-118.1365120000002,34.08553899912357


In [9]:
df.shape

(272, 4)

In [10]:
len(df.name.unique())

272

In [11]:
#latitude and longitude cols are flipped here so need to fix this 
df.rename(columns = {'latitude' : 'Longitude','longitude' : 'Latitude', 'name': 'Neighbourhood'}, inplace = True)
df.head()

Unnamed: 0,Neighbourhood,sqmi,Longitude,Latitude
0,Acton,39.3391089485,-118.16981019229348,34.49735523924085
1,Adams-Normandie,0.805350187789,-118.30020800000013,34.03146149912416
2,Agoura Hills,8.14676029818,-118.75988450000015,34.146736499122795
3,Agua Dulce,31.4626319451,-118.3171036690717,34.50492699979684
4,Alhambra,7.62381430605,-118.1365120000002,34.08553899912357


In [12]:
df.dtypes

Neighbourhood    object
sqmi             object
Longitude        object
Latitude         object
dtype: object

In [13]:
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)
df['sqmi'] = df['sqmi'].astype(float)
df.dtypes

Neighbourhood     object
sqmi             float64
Longitude        float64
Latitude         float64
dtype: object

In [14]:
address ='Los Angeles, California'

geolocator = Nominatim(user_agent = 'LA_Explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('LA geographical coordinates:= Latitude:{}, Longitude:{}'.format(latitude, longitude))

LA geographical coordinates:= Latitude:34.0536909, Longitude:-118.242766


In [15]:
#create map of LA marking all the neighbourhoods in it using folium 
map_LA = folium.Map(location= [latitude, longitude], zoom_start = 10 )

for label, lat, long in zip(df['Neighbourhood'], df['Latitude'], df['Longitude']):
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LA)
    
map_LA

In [16]:
#we want to focus only on some neighbourhoods which are within some certain area limit(radius), hence we will define a 
#function that can return the distance of each neighbourhood from LA centroid for that purpose 

R = 6373.0 #earth radius approx. in km

def dist_from_la(lat, long):
    lat_la = np.radians(latitude)
    long_la = np.radians(longitude)
    lat_neigh = np.radians(lat)
    long_neigh = np.radians(long)
    
    dlat = lat_neigh - lat_la
    dlong = long_neigh - long_la
    
    a = np.sin(dlat / 2)**2 + np.cos(lat_la) * np.cos(lat_neigh) * np.sin(dlong / 2)**2
    c = 2 * np.arctan(np.sqrt(a)/ np.sqrt(1 - a))

    distance = R * c
    return distance

In [17]:
#Keeping neighborhoods only within a given radius (given by rad) around the LA centroid  
rad = 30
dist = dist_from_la(df['Latitude'], df['Longitude'])
df_trunc = df[dist<=rad]
df_trunc.shape

(199, 4)

In [18]:
#create map of LA marking the neighbourhoods within radius 30 using folium 
map_LA = folium.Map(location= [latitude, longitude], zoom_start = 10 )

for label, lat, long in zip(df_trunc['Neighbourhood'], df_trunc['Latitude'], df_trunc['Longitude']):
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LA)
    
map_LA

In [19]:
#now we will obtain nearby venues in each neighborhood by using the FourSquare API

CLIENT_ID = 'VSNQS55IP3RD5F5K4NA5TOICT4ENYU1XTUDBVHU2ROH4HOE1' #Foursquare ID
CLIENT_SECRET = 'G4VAQYSKTGJR10VQXEV2NDDWLVLDYYBAXJ53BN3FSIQGYMQN' #Foursquare Secret
VERSION = '20200624'
LIMIT = 70
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: VSNQS55IP3RD5F5K4NA5TOICT4ENYU1XTUDBVHU2ROH4HOE1
CLIENT_SECRET:G4VAQYSKTGJR10VQXEV2NDDWLVLDYYBAXJ53BN3FSIQGYMQN


In [20]:
#defining a function to return the nearby venues of each neighbourhood mentioned in the df_trunc dataframe

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
LA_venues = getNearbyVenues(names=df_trunc['Neighbourhood'],
                                   latitudes=df_trunc['Latitude'],
                                   longitudes=df_trunc['Longitude']
                                  )

Adams-Normandie
Alhambra
Alondra Park
Artesia
Altadena
Arcadia
Arleta
Arlington Heights
Athens
Atwater Village
Avocado Heights
Vermont-Slauson
Baldwin Hills/Crenshaw
Baldwin Park
Bel-Air
Bellflower
Bell Gardens
Bell
Beverly Crest
Beverly Grove
Burbank
Koreatown
Beverly Hills
Beverlywood
Boyle Heights
Bradbury
Brentwood
Broadway-Manchester
Carson
Carthay
Central-Alameda
Century City
Cerritos
Chesterfield Square
Cheviot Hills
Chinatown
Commerce
Compton
Cypress Park
La Mirada
Cudahy
Culver City
Del Aire
Del Rey
Downey
Downtown
Duarte
Eagle Rock
East Compton
East Hollywood
East La Mirada
East Los Angeles
East Pasadena
East San Gabriel
Echo Park
El Monte
El Segundo
El Sereno
Elysian Park
Elysian Valley
Vermont Square
Encino
Exposition Park
Fairfax
Florence-Firestone
Florence
Gardena
Glassell Park
Green Meadows
Vermont Vista
Glendale
Gramercy Park
Griffith Park
Hacienda Heights
Hancock Park
Hansen Dam
Harbor City
Harbor Gateway
Harvard Heights
Harvard Park
Walnut Park
Hawaiian Gardens
Hawtho

In [22]:
print(LA_venues.shape)
LA_venues.head()

(2674, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Adams-Normandie,34.031461,-118.300208,Orange Door Sushi,34.032485,-118.299368,Sushi Restaurant
1,Adams-Normandie,34.031461,-118.300208,7-Eleven,34.033027,-118.29996,Convenience Store
2,Adams-Normandie,34.031461,-118.300208,Shell,34.033095,-118.300025,Gas Station
3,Adams-Normandie,34.031461,-118.300208,Little Xian,34.032292,-118.299465,Sushi Restaurant
4,Adams-Normandie,34.031461,-118.300208,Sushi Delight,34.032501,-118.299454,Sushi Restaurant


In [26]:
#Saved Foursquare data locally in case number of calls for a day are exhausted
path='F:/Desktop/Datasets/'
LA_venues.to_csv(path+'LA_venues.csv', index=False)
LA_venues = pd.read_csv(path+'LA_venues.csv')
LA_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Adams-Normandie,34.031461,-118.300208,Orange Door Sushi,34.032485,-118.299368,Sushi Restaurant
1,Adams-Normandie,34.031461,-118.300208,7-Eleven,34.033027,-118.29996,Convenience Store
2,Adams-Normandie,34.031461,-118.300208,Shell,34.033095,-118.300025,Gas Station
3,Adams-Normandie,34.031461,-118.300208,Little Xian,34.032292,-118.299465,Sushi Restaurant
4,Adams-Normandie,34.031461,-118.300208,Sushi Delight,34.032501,-118.299454,Sushi Restaurant


In [28]:
ven_category = LA_venues['Venue Category'].unique().tolist()

In [29]:
#fetch only the restaurant venues

ven_rest = []
for item in ven_category:
    if item.split()[-1] == 'Restaurant':
        ven_rest.append(item)
ven_rest

['Sushi Restaurant',
 'Latin American Restaurant',
 'Mexican Restaurant',
 'Fast Food Restaurant',
 'Indian Restaurant',
 'Taiwanese Restaurant',
 'Asian Restaurant',
 'Chinese Restaurant',
 'Vegetarian / Vegan Restaurant',
 'Thai Restaurant',
 'Vietnamese Restaurant',
 'Japanese Restaurant',
 'Shabu-Shabu Restaurant',
 'Korean Restaurant',
 'Mediterranean Restaurant',
 'Italian Restaurant',
 'Seafood Restaurant',
 'Dumpling Restaurant',
 'Udon Restaurant',
 'Southern / Soul Food Restaurant',
 'Satay Restaurant',
 'American Restaurant',
 'Doner Restaurant',
 'Cuban Restaurant',
 'Korean BBQ Restaurant',
 'New American Restaurant',
 'Brazilian Restaurant',
 'Ramen Restaurant',
 'Restaurant',
 'Indonesian Restaurant',
 'Ethiopian Restaurant',
 'French Restaurant',
 'Cajun / Creole Restaurant',
 'Dim Sum Restaurant',
 'Filipino Restaurant',
 'Greek Restaurant',
 'Middle Eastern Restaurant',
 'Falafel Restaurant',
 'South American Restaurant',
 'Spanish Restaurant',
 'Peruvian Restaurant',

In [30]:
LA_ven_rest = LA_venues[LA_venues['Venue Category'].isin(ven_rest)]
print(LA_ven_rest.shape)

(667, 7)


In [31]:
LA_ven_rest.reset_index(inplace = True)

In [36]:
LA_ven_rest.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Adams-Normandie,34.031461,-118.300208,Orange Door Sushi,34.032485,-118.299368,Sushi Restaurant
1,Adams-Normandie,34.031461,-118.300208,Little Xian,34.032292,-118.299465,Sushi Restaurant
2,Adams-Normandie,34.031461,-118.300208,Sushi Delight,34.032501,-118.299454,Sushi Restaurant
3,Adams-Normandie,34.031461,-118.300208,El Rincon Hondureño,34.032527,-118.29886,Latin American Restaurant
4,Alhambra,34.085539,-118.136512,Manny's Tacos,34.087148,-118.135275,Mexican Restaurant


In [40]:
#We have fetched all the relevant data as required. We now just have to perform clustering on our datasets so as to gain useful insights.
#we will perform clustering in 2 stages; cluster firstly based on density and secondly based on the kind of restaurants. We will then 
#try to find intersection between the 2 results so as to decide which can be the best place for setting up a new specific kind of restaurant.

#But first of all we need to determine the densities of each neighborhoods by finding the total no of restaurants in that area.

#one-hot encoding for restaurants
LA_onehot = pd.get_dummies(LA_ven_rest[['Venue Category']], prefix='',prefix_sep='')

# add neighborhood column back to dataframe
LA_onehot['Neighbourhood'] = LA_ven_rest['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [LA_onehot.columns[-1]] + list(LA_onehot.columns[:-1])
LA_onehot = LA_onehot[fixed_columns]


LA_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,Donburi Restaurant,Doner Restaurant,Dongbei Restaurant,Dumpling Restaurant,Eastern European Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Satay Restaurant,Seafood Restaurant,Shabu-Shabu Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Alhambra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [43]:
LA_grouped = LA_onehot.groupby('Neighbourhood').sum().reset_index()

LA_grouped.head()

Unnamed: 0,Neighbourhood,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,Donburi Restaurant,Doner Restaurant,Dongbei Restaurant,Dumpling Restaurant,Eastern European Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Satay Restaurant,Seafood Restaurant,Shabu-Shabu Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0
1,Alhambra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Arcadia,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0
3,Arlington Heights,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0
4,Artesia,0,0,1,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,2


In [44]:
LA_grouped['Total_rest'] = LA_grouped.sum(axis=1)
LA_grouped.head()

  LA_grouped['Total_rest'] = LA_grouped.sum(axis=1)


Unnamed: 0,Neighbourhood,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,Donburi Restaurant,Doner Restaurant,Dongbei Restaurant,Dumpling Restaurant,Eastern European Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Satay Restaurant,Seafood Restaurant,Shabu-Shabu Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Total_rest
0,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,4
1,Alhambra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
2,Arcadia,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,6
3,Arlington Heights,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,11
4,Artesia,0,0,1,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,2,18


In [46]:
LA_grouped['area'] = [df_trunc[df_trunc['Neighbourhood']==neigh]['sqmi'].values[0] for neigh in LA_grouped['Neighbourhood']]
LA_grouped.head()

Unnamed: 0,Neighbourhood,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,Donburi Restaurant,Doner Restaurant,Dongbei Restaurant,Dumpling Restaurant,Eastern European Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Satay Restaurant,Seafood Restaurant,Shabu-Shabu Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Total_rest,area
0,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,4,0.80535
1,Alhambra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,7.623814
2,Arcadia,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,6,11.150797
3,Arlington Heights,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,11,1.031415
4,Artesia,0,0,1,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,2,18,1.632204


In [47]:
LA_grouped['density'] = LA_grouped['Total_rest']/LA_grouped['area']
LA_grouped.head()

Unnamed: 0,Neighbourhood,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,Donburi Restaurant,Doner Restaurant,Dongbei Restaurant,Dumpling Restaurant,Eastern European Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Satay Restaurant,Seafood Restaurant,Shabu-Shabu Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Total_rest,area,density
0,Adams-Normandie,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,4,0.80535,4.966783
1,Alhambra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,7.623814,0.262336
2,Arcadia,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,6,11.150797,0.538078
3,Arlington Heights,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,11,1.031415,10.664958
4,Artesia,0,0,1,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,2,18,1.632204,11.028032


In [48]:
LA_density = LA_grouped[['Neighbourhood','Total_rest','area','density']]
LA_density.head()

Unnamed: 0,Neighbourhood,Total_rest,area,density
0,Adams-Normandie,4,0.80535,4.966783
1,Alhambra,2,7.623814,0.262336
2,Arcadia,6,11.150797,0.538078
3,Arlington Heights,11,1.031415,10.664958
4,Artesia,18,1.632204,11.028032


## First Clustering 

In [72]:
#First scaling the features and then using the elbow method to find the optimum k for KMeans()
X = LA_density.drop('Neighbourhood',1)[['Total_rest','density']].values
X = StandardScaler().fit_transform(X)
#model = KElbowVisualizer(KMeans(), k=10)
#model.fit(X)
#model.show()
#we get elbow at k=5 so we can consider 5 clusters for kmeans

  X = LA_density.drop('Neighbourhood',1)[['Total_rest','density']].values


In [74]:
k_clusters = 5

k_means = KMeans(n_clusters=k_clusters, random_state=0)