### Import Libraries/Packages

In [97]:
import pandas as pd
import numpy as np
import folium
from bs4 import BeautifulSoup
#import geocoder
import requests
import geopy
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

### Get Neighborhood List from Wikipedia

In [2]:
wikiURL = 'https://en.wikipedia.org/wiki/List_of_Austin_neighborhoods'
#wikiURL = 'https://en.wikipedia.org/wiki/Category:Neighborhoods_in_Austin,_Texas'
WikiData = requests.get(wikiURL).text

#I needed to install lxml package for this to work, not entirely sure why
AustinWikiPage  = BeautifulSoup(WikiData, 'lxml')

In [3]:
#because of the way the Wiki page is set up, tons of other nonimportant information is listed in the same way as the info we need
#so I am going to gather the info and then remove everything after our last item
AustinNeighborhoods = []

for row in AustinWikiPage.find_all('li', class_=''):
    AustinNeighborhoods.append(row.text)
    
#now we will decide what is the last value we want (shown in Wikipedia)
lastValue = AustinNeighborhoods.index('Woodstone Village')
#we will remove everything after the lastValue since it's unnecessary information
AustinNeighborhoods = AustinNeighborhoods[:lastValue+1]
print('Austin has', len(AustinNeighborhoods), 'neighborhoods according to Wikipedia')

Austin has 95 neighborhoods according to Wikipedia


### Getting Coordinates for Each Neighborhood

In [4]:
# # define a function to get coordinates
# def get_latlng(neighborhood):
#     # initialize your variable to None
#     lat_lng_coords = None
#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#         g = geocoder.arcgis('{}, Austin, Texas'.format(neighborhood))
#         lat_lng_coords = g.latlng
#     return lat_lng_coords

In [5]:
# define a function to get coordinates
locator = Nominatim(user_agent='myGeocoder')

def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    try:
        while(lat_lng_coords is None):
            location = locator.geocode('{}, Austin, Texas'.format(neighborhood)) 
            lat_lng_coords = (location.latitude, location.longitude)
            #print(neighborhood) #used to debug
        return lat_lng_coords
    except:
        pass #I know this isn't great but some of the neighborhoods don't "exist"

#coords = [get_latlng(neighborhood) for neighborhood in test['Neighborhoods'].tolist()]

In [6]:
#running the function with the neighborhoods and storing them as a list
coords = [get_latlng(neighborhood) for neighborhood in AustinNeighborhoods]

In [7]:
#goes through the list and checks which values are None (the ones that gave an error)
#it then changes None to (0,0) so I can then remove them later in dataframe
coords = [(0,0) if value is None else value for value in coords]

#splitting the list into two separate lists, one for longitude and the other for latitude
lat, long = map(list, zip(*coords)) 

In [8]:
#creating a new dataframe that shows each neighborhood with their respective long/lat
AustinDF = pd.DataFrame(
    {'Neighborhoods': AustinNeighborhoods,
     'Latitude': lat,
     'Longitude': long
    })

#removes the entries where long/lat = 0
#I am using both just to be extra safe
AustinDF = AustinDF[(AustinDF['Latitude']!= 0) &
                    (AustinDF['Longitude']!= 0)]

### Create Folium Map

In [9]:
AustinMap = folium.Map(location=[30.2672, -97.7431], 
                       zoom_start=11) #those are the coordinates to Austin

# add markers to map
for lat, long, neighborhood in zip(AustinDF['Latitude'], AustinDF['Longitude'], AustinDF['Neighborhoods']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(AustinMap)  
    
AustinMap

### Use FourSquare to get Food Truck Info

In [144]:
# define Foursquare Credentials and Version
CLIENT_ID = 'Client ID' # your Foursquare ID
CLIENT_SECRET = 'Client Secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 2000

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Client ID
CLIENT_SECRET:Client Secret


In [33]:
venues = []

for lat, long, neighborhood in zip(AustinDF['Latitude'], AustinDF['Longitude'], AustinDF['Neighborhoods']):
    
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [116]:
# convert the venues list into a new DataFrame
AustinEatery = pd.DataFrame(venues, columns = ['Neighborhoods', 
                                            'Latitude', 'Longitude', 
                                            'VenueName', 'VenueLatitude', 
                                            'VenueLongitude', 'Category'])

print(AustinEatery.shape)
AustinEatery.head()

(6790, 7)


Unnamed: 0,Neighborhoods,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,Category
0,Bryker Woods,30.305246,-97.754585,Kerbey Lane Café,30.30803,-97.75047,Café
1,Bryker Woods,30.305246,-97.754585,Anderson's Coffee Co,30.308382,-97.750355,Coffee Shop
2,Bryker Woods,30.305246,-97.754585,Tiny Boxwoods,30.306058,-97.749789,American Restaurant
3,Bryker Woods,30.305246,-97.754585,Tiny's Milk And Cookies,30.305971,-97.74995,Bakery
4,Bryker Woods,30.305246,-97.754585,Brykerwood Veterinary Clinic,30.305978,-97.749611,Veterinarian


### Now We Look at the Neighborhoods

In [117]:
#do not filter for the category you want just yet because you will only get the areas where the food truck is in
#but we want to see the relation of where are they vs where aren't they
#FoodTrucks = AustinEatery[AustinEatery['Category'] == 'Food Truck']

In [118]:
# one hot encoding
AustinOneHot = pd.get_dummies(AustinEatery[['Category']], prefix='', prefix_sep='')
#if you don't include prefix and prefix_sep, it looks weird and it adds text to the column names so it's best to leave

#create Neighborhood column and move it to the first column position
AustinOneHot.insert(0, 'Neighborhoods', AustinEatery['Neighborhoods'])

In [119]:
#look at the frequency of each category in each neighborhood
AustinCatGrouped = AustinOneHot.groupby(['Neighborhoods']).mean().reset_index()

In [120]:
#NOW we can only look at Food Truck since we have the frequency of food trucks per neighborhood
AustinFoodTruck = AustinCatGrouped[['Neighborhoods', 'Food Truck']]
AustinFoodTruck.head()

Unnamed: 0,Neighborhoods,Food Truck
0,Allandale,0.03
1,Balcones Woods,0.0
2,Barrington Oaks,0.0
3,Barton Creek,0.03
4,Barton Hills,0.010204


### Start K-Means Clustering to Create Cluster/Groups

In [121]:
#decide how many clusters you want -- typically 3 is good
clusterCount = 4

#for some reason I need to drop the column to pass it into the kmeans below
OnlyFoodTruck = AustinFoodTruck.drop(['Neighborhoods'], 1)
#if I try to pass AustinFoodTruck['Food Truck'] instead of OnlyFoodTruck, I get an error
kmeans = KMeans(n_clusters=clusterCount, random_state=0).fit(OnlyFoodTruck)

In [122]:
#now we can combine the cluster results back into the AustinFoodTruck df
AustinFoodTruck['Cluster'] = kmeans.labels_

#merge the coordinates from AustinDF
AustinFoodTruck = AustinFoodTruck.merge(AustinDF, on='Neighborhoods', how='left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  AustinFoodTruck['Cluster'] = kmeans.labels_


In [123]:
#sort by Cluster
AustinFoodTruck.sort_values(['Cluster'], inplace=True)

In [130]:
# create map
FoodTruckMap = folium.Map(location=[30.2672, -97.7431], 
                       zoom_start=11) #those are the coordinates to Austin

# set color scheme for the clusters
x = np.arange(clusterCount)
ys = [i+x+(i*x)**2 for i in range(clusterCount)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(AustinFoodTruck['Latitude'], 
                                  AustinFoodTruck['Longitude'], 
                                  AustinFoodTruck['Neighborhoods'], 
                                  AustinFoodTruck['Cluster']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(FoodTruckMap)
       
FoodTruckMap

In [143]:
AustinFoodTruck[AustinFoodTruck['Cluster']==0].head()

Unnamed: 0,Neighborhoods,Food Truck,Cluster,Latitude,Longitude
0,Allandale,0.03,0,30.339309,-97.746972
60,South Lamar,0.02,0,30.236389,-97.782647
23,French Place,0.02,0,30.286138,-97.719715
24,Galindo,0.03,0,30.235578,-97.768515
25,Govalle,0.03,0,30.258676,-97.701216


### Conclusion

Initially, when looking at the cluster map I only added 3 clusters. When looking at the result, there were too many overlaps so I ended up increasing the cluster to 4 (0-3). Just from the data, we can see where Food Trucks are lacking, such as in Cluster 3. From my personal research into the city, I can infer that the areas Cluster 3 belong to are residential areas which, typically, aren't great areas for Food Trucks as there isn't as much foot traffic. Cluster 2 is an anomaly where only one instance exists with a Food Truck concentration of roughly 16%. Cluster 0 appears to be a great fit for Food Trucks as those are the areas with the least amount of Food Trucks per venue category. Though I make the recommendation that new Food Trucks should go to areas in Cluster 0, this is not taking into account the local ordinances regarding Food Truck regulation/laws/permits. Purely from a concentration of Food Trucks to other business, Cluster 0 is the best fit, especially in Downtown Austin.