# Exploring restaurants in LA neighbourhoods for new restaurant set up

In [7]:
#!pip install geopy ~uncomment this if not installed 
#!pip install folium ~uncomment this if not installed
#!pip install wget ~uncomment this if not installed
#!pip install yellowbrick ~uncomment this if not installed

import pandas as pd #allows data analysis by creating dataframe
import numpy as np #allows mathematical operations on arrays
import wget #importing dataset given url
pd.set_option('display.max_columns', None) #to view maximum cols in pandas if huge dataframe
pd.set_option('display.max_rows', None) #to view maximum rows in pandas if huge dataframe

from geopy.geocoders import Nominatim #to create user agent required for geocoding process 
import folium #map rendering library

import requests # library to handle requests
import json #to handle json files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

#get all plotting libraries
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
import seaborn as sns
import matplotlib.pyplot as plt

#import clustering(KMeans) libraries
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer

from sklearn.preprocessing import StandardScaler #for scaling features, normalise data 

print('Libraries imported')


Libraries imported


In [8]:
#import data from below given url, has all information about LA neighbourhoods
la_data = wget.download('https://usc.data.socrata.com/resource/9utn-waje.json')
print('Data downloaded!')

Data downloaded!


In [9]:
with open(la_data) as json_data:
    la_data = json.load(json_data)

In [10]:
df = pd.DataFrame(la_data)
df.head()

Unnamed: 0,set,slug,the_geom,kind,external_i,name,display_na,sqmi,type,latitude,longitude,location
0,L.A. County Neighborhoods (Current),acton,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),acton,Acton,Acton L.A. County Neighborhood (Current),39.3391089485,unincorporated-area,-118.16981019229348,34.49735523924085,POINT(34.497355239240846 -118.16981019229348)
1,L.A. County Neighborhoods (Current),adams-normandie,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),adams-normandie,Adams-Normandie,Adams-Normandie L.A. County Neighborhood (Curr...,0.805350187789,segment-of-a-city,-118.30020800000013,34.03146149912416,POINT(34.031461499124156 -118.30020800000011)
2,L.A. County Neighborhoods (Current),agoura-hills,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),agoura-hills,Agoura Hills,Agoura Hills L.A. County Neighborhood (Current),8.14676029818,standalone-city,-118.75988450000015,34.146736499122795,POINT(34.146736499122795 -118.75988450000015)
3,L.A. County Neighborhoods (Current),agua-dulce,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),agua-dulce,Agua Dulce,Agua Dulce L.A. County Neighborhood (Current),31.4626319451,unincorporated-area,-118.3171036690717,34.50492699979684,POINT(34.504926999796837 -118.3171036690717)
4,L.A. County Neighborhoods (Current),alhambra,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",L.A. County Neighborhood (Current),alhambra,Alhambra,Alhambra L.A. County Neighborhood (Current),7.62381430605,standalone-city,-118.1365120000002,34.08553899912357,POINT(34.085538999123571 -118.13651200000021)


In [11]:
#remove non-important cols from the dataframe w.r.t to the project requirements
df.drop(columns=['display_na', 'external_i', 'kind', 'location','set','slug','the_geom','type'], axis = 1, inplace = True)
df.head()

Unnamed: 0,name,sqmi,latitude,longitude
0,Acton,39.3391089485,-118.16981019229348,34.49735523924085
1,Adams-Normandie,0.805350187789,-118.30020800000013,34.03146149912416
2,Agoura Hills,8.14676029818,-118.75988450000015,34.146736499122795
3,Agua Dulce,31.4626319451,-118.3171036690717,34.50492699979684
4,Alhambra,7.62381430605,-118.1365120000002,34.08553899912357


In [12]:
df.shape

(272, 4)

In [13]:
len(df.name.unique())

272

In [14]:
#latitude and longitude cols are flipped here so need to fix this 
df.rename(columns = {'latitude' : 'Longitude','longitude' : 'Latitude', 'name': 'Neighbourhood'}, inplace = True)
df.head()

Unnamed: 0,Neighbourhood,sqmi,Longitude,Latitude
0,Acton,39.3391089485,-118.16981019229348,34.49735523924085
1,Adams-Normandie,0.805350187789,-118.30020800000013,34.03146149912416
2,Agoura Hills,8.14676029818,-118.75988450000015,34.146736499122795
3,Agua Dulce,31.4626319451,-118.3171036690717,34.50492699979684
4,Alhambra,7.62381430605,-118.1365120000002,34.08553899912357


In [15]:
df.dtypes

Neighbourhood    object
sqmi             object
Longitude        object
Latitude         object
dtype: object

In [16]:
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)
df['sqmi'] = df['sqmi'].astype(float)
df.dtypes

Neighbourhood     object
sqmi             float64
Longitude        float64
Latitude         float64
dtype: object

In [17]:
address ='Los Angeles, California'

geolocator = Nominatim(user_agent = 'LA_Explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('LA geographical coordinates:= Latitude:{}, Longitude:{}'.format(latitude, longitude))

LA geographical coordinates:= Latitude:34.0536909, Longitude:-118.242766


In [18]:
#create map of LA marking all the neighbourhoods in it using folium 
map_LA = folium.Map(location= [latitude, longitude], zoom_start = 10 )

for label, lat, long in zip(df['Neighbourhood'], df['Latitude'], df['Longitude']):
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LA)
    
map_LA

In [19]:
#we want to focus only on some neighbourhoods which are within some certain area limit(radius), hence we will define a 
#function that can return the distance of each neighbourhood from LA centroid for that purpose 

R = 6373.0 #earth radius approx. in km

def dist_from_la(lat, long):
    lat_la = np.radians(latitude)
    long_la = np.radians(longitude)
    lat_neigh = np.radians(lat)
    long_neigh = np.radians(long)
    
    dlat = lat_neigh - lat_la
    dlong = long_neigh - long_la
    
    a = np.sin(dlat / 2)**2 + np.cos(lat_la) * np.cos(lat_neigh) * np.sin(dlong / 2)**2
    c = 2 * np.arctan(np.sqrt(a)/ np.sqrt(1 - a))

    distance = R * c
    return distance

In [20]:
#Keeping neighborhoods only within a given radius (given by rad) around the LA centroid  
rad = 30
dist = dist_from_la(df['Latitude'], df['Longitude'])
df_trunc = df[dist<=rad]
df_trunc.shape

(199, 4)

In [21]:
#create map of LA marking the neighbourhoods within radius 30 using folium 
map_LA = folium.Map(location= [latitude, longitude], zoom_start = 10 )

for label, lat, long in zip(df_trunc['Neighbourhood'], df_trunc['Latitude'], df_trunc['Longitude']):
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LA)
    
map_LA

In [22]:
#now we will obtain nearby venues in each neighborhood by using the FourSquare API

CLIENT_ID = '4GWYF4APL4TC4UHUU3RDXXWEXTHFAJW3DVGGBWMPT2UTNRLR' #Foursquare ID
CLIENT_SECRET = 'BXCTIXOIAMNQATA0XOXTOBOGSLYCKY5IIMMNJXP3O5YQLYNS' #Foursquare Secret
ACCESS_TOKEN = 'R2QBR51ASCHNWV230OHN2HPS5NX0HC5U3PVWHBQ1Y1SUBSJ3' #FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4GWYF4APL4TC4UHUU3RDXXWEXTHFAJW3DVGGBWMPT2UTNRLR
CLIENT_SECRET:BXCTIXOIAMNQATA0XOXTOBOGSLYCKY5IIMMNJXP3O5YQLYNS


In [25]:
#defining a function to return the nearby venues of each neighbourhood mentioned in the df_trunc dataframe

def get_nearby_venues(names, latitudes, longitudes, radius=500):
    
    venues_list = []
    for name, lat, long in zip(names, latitudes, longitudes):
        print(name)
        
        #create API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius,
        LIMIT)
        
        #make a get request
        results = requests.get(url).json()["response"]["groups"][0]["items"]
        
        venues_list.append([(
            name,
            lat,
            long,
            v["venue"]["name"],
            v["venue"]["location"]["lat"],
            v["venue"]["location"]["lng"],
            v["venue"]["categories"][0]["name"])for v in results])
        
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighbourhood',
                                'Neighbourhood Latitude',
                                'Neighbourhood Longitude',
                                'Venue',
                                'Venue Latitude',
                                'Venue Longitude',
                                'Venue Category']
    return nearby_venues

In [26]:
LA_venues = get_nearby_venues(names= df_trunc['Neighbourhood'],
                             latitudes= df_trunc['Latitude'],
                             longitudes= df_trunc['Longitude'])

Adams-Normandie


KeyError: 'groups'