<h1>Import libraries</h1>

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [16]:
!pip install folium



In [3]:
import folium

In [5]:
!pip install geopy
from geopy.geocoders import Nominatim

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/80/93/d384479da0ead712bdaf697a8399c13a9a89bd856ada5a27d462fb45e47b/geopy-1.20.0-py2.py3-none-any.whl (100kB)
[K     |████████████████████████████████| 102kB 19.2MB/s ta 0:00:01
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.20.0


In [6]:
#importing libraries
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="foursquare_agent")

In [7]:
import requests

In [8]:
from sklearn.cluster import KMeans

In [10]:
#loading the data on LA Neighborhoods
LA_data = pd.read_csv("la_neighborhoods.csv")
LA_data.head(1)

Unnamed: 0,set,slug,the_geom,kind,external_i,name,display_na,sqmi,type,name_1,slug_1,latitude,longitude,location
0,L.A. County Neighborhoods (Current),acton,MULTIPOLYGON (((-118.20261747920541 34.5389897...,L.A. County Neighborhood (Current),acton,Acton,Acton L.A. County Neighborhood (Current),39.339109,unincorporated-area,,,-118.16981,34.497355,POINT(34.497355239240846 -118.16981019229348)


In [11]:
#selecting the necessary columns
LA_data = LA_data[['name', 'longitude', 'latitude']]

In [12]:
LA_data.rename (columns = {'name':'Neighborhood'}, inplace = True)
LA_data

Unnamed: 0,Neighborhood,longitude,latitude
0,Acton,34.497355,-118.169810
1,Adams-Normandie,34.031461,-118.300208
2,Agoura Hills,34.146736,-118.759885
3,Agua Dulce,34.504927,-118.317104
4,Alhambra,34.085539,-118.136512
...,...,...,...
267,Willowbrook,33.915711,-118.252312
268,Wilmington,33.791294,-118.259187
269,Windsor Square,34.069108,-118.319909
270,Winnetka,34.210459,-118.575220


In [13]:
#we see that longitude and latitude values are mixed up
# let's fix it
LA_data.rename(columns = {'longitude':'latitude', 'latitude':'longitude'}, inplace = True)

In [14]:
LA_data

Unnamed: 0,Neighborhood,latitude,longitude
0,Acton,34.497355,-118.169810
1,Adams-Normandie,34.031461,-118.300208
2,Agoura Hills,34.146736,-118.759885
3,Agua Dulce,34.504927,-118.317104
4,Alhambra,34.085539,-118.136512
...,...,...,...
267,Willowbrook,33.915711,-118.252312
268,Wilmington,33.791294,-118.259187
269,Windsor Square,34.069108,-118.319909
270,Winnetka,34.210459,-118.575220


In [15]:
#getting coordinates of Los Angeles and visualising the Neighborhoods
address = 'Los Angeles, Ca'
location = geolocator.geocode("{}, La, Ca".format(address))
print(location.latitude, location.longitude)
LA = [location.latitude,location.longitude]

34.0536909 -118.2427666


In [24]:
#create the map
# create map of LA using latitude and longitude values
map_LA = folium.Map(location=[LA[0], LA[1]], zoom_start=9)

# add markers to map
for lat, lng, label in zip(LA_data['latitude'], LA_data['longitude'], LA_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='grey',
        fill=True,
        fill_color='grey',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LA)  
    
map_LA

<h1>Clustering using additional Foursquare data</h1>

In [25]:
#Define Foursquare Credentials and Version
CLIENT_ID = '5UHQPSVP1JJB3J35A4FLLBW4J512AVWBOSEIWUUKLR0KV0BZ' # your Foursquare ID
CLIENT_SECRET = 'A2EQBFHX2X44RL0QJQMUOUU4YH4SOTB12ZMTZZAQVYPF2YG5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5UHQPSVP1JJB3J35A4FLLBW4J512AVWBOSEIWUUKLR0KV0BZ
CLIENT_SECRET:A2EQBFHX2X44RL0QJQMUOUU4YH4SOTB12ZMTZZAQVYPF2YG5


In [26]:
# a fuction for exploring all Neighborhoods and surrounding venues in LA
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
# lets explore the Neighborhood from the data frame
LIMIT = 100
LA_venues = getNearbyVenues(names=LA_data['Neighborhood'],
                                   latitudes=LA_data['latitude'],
                                   longitudes=LA_data['longitude']
                                  )

Acton
Adams-Normandie
Agoura Hills
Agua Dulce
Alhambra
Alondra Park
Artesia
Altadena
Angeles Crest
Arcadia
Arleta
Arlington Heights
Athens
Atwater Village
Avalon
Avocado Heights
Azusa
Vermont-Slauson
Baldwin Hills/Crenshaw
Baldwin Park
Bel-Air
Bellflower
Bell Gardens
Green Valley
Bell
Beverly Crest
Beverly Grove
Burbank
Koreatown
Beverly Hills
Beverlywood
Boyle Heights
Bradbury
Brentwood
Broadway-Manchester
Calabasas
Canoga Park
Carson
Carthay
Castaic Canyons
Chatsworth
Castaic
Central-Alameda
Century City
Cerritos
Charter Oak
Chatsworth Reservoir
Chesterfield Square
Cheviot Hills
Chinatown
Citrus
Claremont
Northridge
Commerce
Compton
Cypress Park
La Mirada
Covina
Cudahy
Culver City
Del Aire
Del Rey
Desert View Highlands
Diamond Bar
Downey
Downtown
Duarte
Eagle Rock
East Compton
East Hollywood
East La Mirada
Elizabeth Lake
East Los Angeles
East Pasadena
East San Gabriel
Echo Park
El Monte
El Segundo
El Sereno
Elysian Park
Elysian Valley
Vermont Square
Encino
Exposition Park
Fairfax
Flo

In [28]:
#shape of the data
LA_venues.shape

(2987, 7)

In [29]:
LA_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Acton,34.497355,-118.16981,Epik Engineering,34.498718,-118.168046,Construction & Landscaping
1,Acton,34.497355,-118.16981,Alma Gardening Co.,34.494762,-118.17255,Construction & Landscaping
2,Adams-Normandie,34.031461,-118.300208,Orange Door Sushi,34.032485,-118.299368,Sushi Restaurant
3,Adams-Normandie,34.031461,-118.300208,Shell,34.033095,-118.300025,Gas Station
4,Adams-Normandie,34.031461,-118.300208,Sushi Delight,34.032445,-118.299525,Sushi Restaurant


In [50]:
# number of unique categories
print('There are {} uniques categories.'.format(len(LA_venues['Venue Category'].unique())))

There are 321 uniques categories.


In [53]:
#unique categories
LA_venues['Venue Category'].unique()

array(['Construction & Landscaping', 'Sushi Restaurant', 'Gas Station',
       'Taco Place', 'Latin American Restaurant', 'Locksmith',
       'Playground', 'Grocery Store', 'Park', 'Fast Food Restaurant',
       'Indian Restaurant', 'Bakery', 'Café', 'Breakfast Spot',
       'Restaurant', 'BBQ Joint', 'Brewery', 'American Restaurant',
       'Multiplex', 'Burger Joint', 'Thai Restaurant', 'Deli / Bodega',
       'Chinese Restaurant', 'Lounge', 'Mexican Restaurant',
       'Shipping Store', 'Hotel', 'Sporting Goods Shop', 'Airport',
       'Pizza Place', 'Video Store', 'Convenience Store',
       'Hardware Store', 'Bagel Shop', 'Pet Store',
       'Health & Beauty Service', 'Baseball Field',
       'Gym / Fitness Center', 'Frozen Yogurt Shop', 'Salon / Barbershop',
       'Taiwanese Restaurant', 'Sandwich Place', 'Asian Restaurant',
       'Shopping Mall', 'Vegetarian / Vegan Restaurant', 'Gift Shop',
       'Bubble Tea Shop', 'Vietnamese Restaurant', 'Boutique', 'Tea Room',
       'Pha

Let's check how many venues were returned for each neighborhood

In [30]:
LA_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Acton,2,2,2,2,2,2
Adams-Normandie,10,10,10,10,10,10
Agoura Hills,28,28,28,28,28,28
Agua Dulce,1,1,1,1,1,1
Alhambra,14,14,14,14,14,14
...,...,...,...,...,...,...
Willowbrook,4,4,4,4,4,4
Wilmington,12,12,12,12,12,12
Windsor Square,4,4,4,4,4,4
Winnetka,12,12,12,12,12,12


<h2>The number of unique categories</h2>

In [31]:
#the number of unique categories
print('There are {} uniques categories.'.format(len(LA_venues['Venue Category'].unique())))

There are 321 uniques categories.


<h2>One hot encoding</h2>

In [32]:
# one hot encoding
LA_onehot = pd.get_dummies(LA_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
LA_onehot['Neighborhood'] = LA_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [LA_onehot.columns[-1]] + list(LA_onehot.columns[:-1])
LA_onehot = LA_onehot[fixed_columns]

LA_onehot.head()

Unnamed: 0,Yoga Studio,ATM,Accessories Store,Airport,Airport Lounge,Airport Terminal,Alternative Healer,American Restaurant,Amphitheater,Antique Shop,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Watch Shop,Weight Loss Center,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [33]:
LA_grouped = LA_onehot.groupby('Neighborhood').mean().reset_index()
LA_grouped

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Accessories Store,Airport,Airport Lounge,Airport Terminal,Alternative Healer,American Restaurant,Amphitheater,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Watch Shop,Weight Loss Center,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Acton,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Adams-Normandie,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Agoura Hills,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.035714,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Agua Dulce,0.0,0.0,0.0,1.0,0.00,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Alhambra,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,...,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,Willowbrook,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
234,Wilmington,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
235,Windsor Square,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
236,Winnetka,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [34]:
num_top_venues = 5

for hood in LA_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = LA_grouped[LA_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Acton----
                        venue  freq
0  Construction & Landscaping   1.0
1                 Yoga Studio   0.0
2              Nightlife Spot   0.0
3             Other Nightlife   0.0
4        Other Great Outdoors   0.0


----Adams-Normandie----
                       venue  freq
0           Sushi Restaurant   0.3
1  Latin American Restaurant   0.1
2                 Playground   0.1
3                  Locksmith   0.1
4              Grocery Store   0.1


----Agoura Hills----
                  venue  freq
0  Fast Food Restaurant  0.14
1        Breakfast Spot  0.07
2    Chinese Restaurant  0.07
3      Sushi Restaurant  0.07
4   Sporting Goods Shop  0.04


----Agua Dulce----
                  venue  freq
0               Airport   1.0
1           Yoga Studio   0.0
2        Nightlife Spot   0.0
3       Other Nightlife   0.0
4  Other Great Outdoors   0.0


----Alhambra----
                  venue  freq
0     Convenience Store  0.29
1   Sporting Goods Shop  0.07
2           Video Sto

#### Let's put that into a *pandas* dataframe

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [36]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to cnumber of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = LA_grouped['Neighborhood']

for ind in np.arange(LA_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(LA_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(5)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Acton,Construction & Landscaping,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
1,Adams-Normandie,Sushi Restaurant,Playground,Gas Station,Latin American Restaurant,Taco Place,Park,Grocery Store,Locksmith,Event Service,Ethiopian Restaurant
2,Agoura Hills,Fast Food Restaurant,Breakfast Spot,Chinese Restaurant,Sushi Restaurant,Brewery,Gas Station,Bakery,Lounge,Sporting Goods Shop,Burger Joint
3,Agua Dulce,Airport,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
4,Alhambra,Convenience Store,Bagel Shop,Pizza Place,Breakfast Spot,Health & Beauty Service,Mexican Restaurant,Hardware Store,Video Store,Fast Food Restaurant,Sporting Goods Shop


## 4. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [37]:
# set number of clusters
kclusters = 5

LA_grouped_clustering = LA_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(LA_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [38]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

LA_merged = LA_data

# merge LA_grouped with LA_data to add latitude/longitude for each neighborhood
LA_merged = LA_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

LA_merged['Cluster Labels'].value_counts() # check the last columns!

0.0    212
3.0     10
1.0      6
4.0      6
2.0      4
Name: Cluster Labels, dtype: int64

<h2>Visualizing Clusters</h2>

In [39]:
LA_merged = LA_merged.dropna()
LA_merged.head(5)

Unnamed: 0,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Acton,34.497355,-118.16981,4.0,Construction & Landscaping,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
1,Adams-Normandie,34.031461,-118.300208,0.0,Sushi Restaurant,Playground,Gas Station,Latin American Restaurant,Taco Place,Park,Grocery Store,Locksmith,Event Service,Ethiopian Restaurant
2,Agoura Hills,34.146736,-118.759885,0.0,Fast Food Restaurant,Breakfast Spot,Chinese Restaurant,Sushi Restaurant,Brewery,Gas Station,Bakery,Lounge,Sporting Goods Shop,Burger Joint
3,Agua Dulce,34.504927,-118.317104,0.0,Airport,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
4,Alhambra,34.085539,-118.136512,0.0,Convenience Store,Bagel Shop,Pizza Place,Breakfast Spot,Health & Beauty Service,Mexican Restaurant,Hardware Store,Video Store,Fast Food Restaurant,Sporting Goods Shop


In [41]:
# create map
map_clusters = folium.Map(location=[LA[0], LA[1]], zoom_start=9)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(LA_merged['latitude'], LA_merged['longitude'], LA_merged['Neighborhood'], LA_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h1>Examining and exporting clusters for presentation to the business team</h1>

In [81]:
#Examining and exporting the clusters for presentation
LA_merged.loc[LA_merged['Cluster Labels'] == 0, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]].to_csv('cluster_0.csv')
LA_merged.loc[LA_merged['Cluster Labels'] == 0, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Adams-Normandie,Sushi Restaurant,Playground,Gas Station,Latin American Restaurant,Taco Place,Park,Grocery Store,Locksmith,Event Service,Ethiopian Restaurant
2,Agoura Hills,Fast Food Restaurant,Breakfast Spot,Chinese Restaurant,Sushi Restaurant,Brewery,Gas Station,Bakery,Lounge,Sporting Goods Shop,Burger Joint
3,Agua Dulce,Airport,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
4,Alhambra,Convenience Store,Bagel Shop,Pizza Place,Breakfast Spot,Health & Beauty Service,Mexican Restaurant,Hardware Store,Video Store,Fast Food Restaurant,Sporting Goods Shop
5,Alondra Park,Park,Baseball Field,Gym / Fitness Center,Women's Store,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
...,...,...,...,...,...,...,...,...,...,...,...
267,Willowbrook,Breakfast Spot,Convenience Store,Park,Grocery Store,Women's Store,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
268,Wilmington,Fast Food Restaurant,Pizza Place,Sandwich Place,Discount Store,Mexican Restaurant,Convenience Store,Shoe Store,Museum,Park,Dumpling Restaurant
269,Windsor Square,Cosmetics Shop,Airport Lounge,Dog Run,Gym / Fitness Center,Women's Store,Event Service,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
270,Winnetka,Convenience Store,Latin American Restaurant,Filipino Restaurant,Pizza Place,Health & Beauty Service,Ice Cream Shop,Bar,Mexican Restaurant,South American Restaurant,Fried Chicken Joint


In [82]:
#Examining and exporting the clusters for presentation
LA_merged.loc[LA_merged['Cluster Labels'] == 1, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]].to_csv('cluster_1.csv')
LA_merged.loc[LA_merged['Cluster Labels'] == 1, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Glendale,Trail,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
158,Monrovia,Trail,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
161,Mount Washington,Trail,Park,Garden,Women's Store,Entertainment Service,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
191,Rancho Palos Verdes,Trail,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
235,Tujunga,Lake,Trail,Women's Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
241,Unincorporated Santa Monica Mountains,Trail,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant


In [83]:
#Examining and exporting the clusters for presentation
LA_merged.loc[LA_merged['Cluster Labels'] == 2, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]].to_csv('cluster_2.csv')
LA_merged.loc[LA_merged['Cluster Labels'] == 2, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Beverly Hills,Food Truck,Sake Bar,Women's Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
33,Brentwood,Food Truck,Women's Store,Event Service,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
44,Cerritos,Food Truck,Korean Restaurant,Women's Store,Event Service,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
196,Rowland Heights,Food Truck,Mexican Restaurant,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant


In [84]:
#Examining and exporting the clusters for presentation
LA_merged.loc[LA_merged['Cluster Labels'] == 3, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]].to_csv('cluster_3.csv')
LA_merged.loc[LA_merged['Cluster Labels'] == 3, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
40,Chatsworth,Park,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
96,Hacienda Heights,Park,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
122,La Crescenta-Montrose,Park,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
135,La Verne,Park,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
160,Montecito Heights,Lake,Park,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
184,Porter Ranch,Park,Home Service,Women's Store,Ethiopian Restaurant,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
216,South El Monte,Mexican Restaurant,Park,Women's Store,Event Service,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
218,South Park,Park,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
262,West San Dimas,Other Repair Shop,Park,Women's Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
266,Whittier Narrows,Lake,Park,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant


In [85]:
#Examining and exporting the clusters for presentation
LA_merged.loc[LA_merged['Cluster Labels'] == 4, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]].to_csv('cluster_4.csv')
LA_merged.loc[LA_merged['Cluster Labels'] == 4, LA_merged.columns[[0]+[4] + list(range(5, LA_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Acton,Construction & Landscaping,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
51,Claremont,Construction & Landscaping,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
73,East Pasadena,Construction & Landscaping,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
94,Granada Hills,Home Service,Construction & Landscaping,Women's Store,Event Service,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
104,Hasley Canyon,Construction & Landscaping,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
128,Lake Los Angeles,Construction & Landscaping,Women's Store,Fabric Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service


<h1>Neighborhoods by the number of Chinese restaurants</h1>

In [59]:
LA_ch_r = LA_venues[LA_venues['Venue Category']=='Chinese Restaurant']
LA_ch_r = LA_ch_r[['Neighborhood','Venue Category']]
LA_ch_r.groupby('Neighborhood').count()
num_ch_r = LA_ch_r.groupby('Neighborhood').count()
num_ch_r = num_ch_r.sort_values(by = "Venue Category", ascending=False)
num_ch_r.to_csv('Chinese restaurants.csv')
num_ch_r.head(10)

Unnamed: 0_level_0,Venue Category
Neighborhood,Unnamed: 1_level_1
Chinatown,12
Diamond Bar,5
Artesia,4
La Puente,3
Lomita,2
Gardena,2
Downey,2
Agoura Hills,2
Century City,2
Beverly Grove,2


<h2>Visualize 10 neighborhoods with the most number of chinese restaurants</h2>

In [60]:
num_ch_r_merged = LA_data
num_ch_r_merged

Unnamed: 0,Neighborhood,latitude,longitude
0,Acton,34.497355,-118.169810
1,Adams-Normandie,34.031461,-118.300208
2,Agoura Hills,34.146736,-118.759885
3,Agua Dulce,34.504927,-118.317104
4,Alhambra,34.085539,-118.136512
...,...,...,...
267,Willowbrook,33.915711,-118.252312
268,Wilmington,33.791294,-118.259187
269,Windsor Square,34.069108,-118.319909
270,Winnetka,34.210459,-118.575220


In [61]:
num_ch_r_merged = num_ch_r_merged.merge(num_ch_r, on=['Neighborhood'])
num_ch_r_merged

Unnamed: 0,Neighborhood,latitude,longitude,Venue Category
0,Agoura Hills,34.146736,-118.759885,2
1,Artesia,33.866896,-118.080101,4
2,Arcadia,34.13323,-118.030419,1
3,Arlington Heights,34.04491,-118.323408,1
4,Vermont-Slauson,33.983914,-118.290358,1
5,Bellflower,33.888013,-118.129032,1
6,Beverly Grove,34.076633,-118.376102,2
7,Century City,34.055326,-118.415083,2
8,Chinatown,34.06351,-118.237006,12
9,Northridge,34.238805,-118.527969,1


In [62]:
num_ch_r_merged['Venue Category'].max()

12

In [66]:
# create map
map_clusters = folium.Map(location=[LA[0], LA[1]], zoom_start=9)

# set color scheme for the clusters
x = np.arange(num_ch_r_merged['Venue Category'].max())
ys = [i + x + (i*x)**2 for i in range(num_ch_r_merged['Venue Category'].max())]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(num_ch_r_merged['latitude'], num_ch_r_merged['longitude'], num_ch_r_merged['Neighborhood'], num_ch_r_merged['Venue Category']):
    label = folium.Popup(str(poi) + ' Number of Chinese restaurants ' + str(cluster), parse_html=True)
    if cluster == 12:
        color = 'Red'
    elif cluster == 5:
        color = '#ff6600'
    elif cluster == 4:
        color = 'Orange'
    elif cluster == 3:
        color = '#ffcc00'
    elif cluster == 2:
        color = 'yellow'
    elif cluster == 1:
        color = 'white'
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [67]:
LA_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Acton,34.497355,-118.16981,Epik Engineering,34.498718,-118.168046,Construction & Landscaping
1,Acton,34.497355,-118.16981,Alma Gardening Co.,34.494762,-118.17255,Construction & Landscaping
2,Adams-Normandie,34.031461,-118.300208,Orange Door Sushi,34.032485,-118.299368,Sushi Restaurant
3,Adams-Normandie,34.031461,-118.300208,Shell,34.033095,-118.300025,Gas Station
4,Adams-Normandie,34.031461,-118.300208,Sushi Delight,34.032445,-118.299525,Sushi Restaurant


In [74]:
Ch_r_map = LA_venues

In [75]:
Ch_r_map = Ch_r_map [Ch_r_map['Venue Category']== 'Chinese Restaurant' ]
Ch_r_map =Ch_r_map [['Venue','Venue Latitude', 'Venue Longitude']]
Ch_r_map.head(5)

Unnamed: 0,Venue,Venue Latitude,Venue Longitude
29,Mandarin Lotus,34.144848,-118.762822
35,Panda Express,34.146469,-118.758639
68,Great Seafood Harbor,33.865016,-118.082561
76,Yummy Boba,33.864985,-118.08329
80,Shanghai Style Restaurant,33.865233,-118.082987


In [77]:
#saving the table to csv for later presentation
Ch_r_map.to_csv('all_chisese_restaurants_in_la.csv')

In [72]:
#create the map
# create map of LA using latitude and longitude values
map_LA = folium.Map(location=[LA[0], LA[1]], zoom_start=9)

# add markers to map
for lat, lng, label in zip(Ch_r_map['Venue Latitude'], Ch_r_map['Venue Longitude'], Ch_r_map['Venue']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LA)  
    
map_LA