# Clustering Fort Worth neighborhoods

## Imports/Installs

In [1]:
import numpy as np
import pandas as pd
import json
import geocoder
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

## Pre-processing

In [2]:
#Bring in csv of neighborhoods and create initial dataframe
hoods = pd.read_csv('C:/Users/Administrator.Sergio-PC/Desktop/Fort Worth Neighborhoods.csv')
hoods.columns = ['Neighborhood']

In [3]:
#Split on backslash, keep first name 
hoods_split = pd.DataFrame(hoods['Neighborhood'].str.split("\\",1))
hoods_split['Neighborhood'] = hoods_split['Neighborhood'].str[0]

In [4]:
#Remove words 'Greater' and 'Near' from neighborhood names so they can be located
hoods_split['Neighborhood'] = hoods_split['Neighborhood'].str.replace('Greater','')
hoods_split['Neighborhood'] = hoods_split['Neighborhood'].str.replace('Near','')
#hoods_split

In [5]:
#initialize user agent for geolocator
geolocator = Nominatim(user_agent = 'foros_explorer')
# Create a test function to get latitude and longitude

def getcoords(name):
    address = '{},Fort Worth'.format(name)
    location = geolocator.geocode(address)
    lat = location.latitude
    long = location.longitude
    print(lat,long)

In [6]:
# delete neighborhoods from dataframe that we cannot find latitude and longitude for
List = ['Far South','Fort Worth Nature Center','Far West','Greater 287 Corridor','Harriet Creek Ranch',"Historic Randol's Mill",'Historic Southside']

hoods_split = hoods_split[~hoods_split['Neighborhood'].isin(List)]

hoods_split.reset_index(drop=True,inplace=True)

hoods_split

Unnamed: 0,Neighborhood
0,Alliance Gateway
1,Benbrook Lake
2,Centreport
3,Clearfork
4,Diamond Hill-Jarvis
5,Eagle Mountain
6,Fossil Creek
7,Garden Acres
8,Gateway Park
9,287 Corridor


In [7]:
#Test to check for neighborhoods that can't be found
index=0
for hood in hoods_split['Neighborhood']:
    getcoords(hood)
    print('Index {}'.format(index))
    index+=1

32.9597161 -97.3098371
Index 0
32.6251539 -97.46958117898197
Index 1
32.8169726 -97.052651
Index 2
32.70799605 -97.39964903824261
Index 3
32.8084707 -97.33687884817545
Index 4
32.8934599 -97.4444646
Index 5
32.89313675 -97.29671143940243
Index 6
32.5945803 -97.3025153
Index 7
32.7585519 -97.27186768992246
Index 8
57.2304195 -111.4272347
Index 9
32.6408712 -97.38599811582664
Index 10
38.813932 -77.098963
Index 11
32.753177 -97.3327459
Index 12
32.72021675 -97.41073038861262
Index 13
32.75397915 -97.32953315949203
Index 14
32.6663647 -97.3318397
Index 15
32.729365 -97.31251196491371
Index 16
32.7886211 -97.3483917
Index 17
32.7321685 -97.28843178003808
Index 18
32.719586750000005 -97.41421914933309
Index 19
32.689791 -97.26180935
Index 20
32.6803248 -97.34822416089753
Index 21
32.753177 -97.3327459
Index 22
32.618375 -97.39772844110882
Index 23
32.70269385 -97.36855768763164
Index 24
32.61584015 -97.38471728541448
Index 25
32.7326315 -97.2180688
Index 26
32.902016399999994 -97.2654930581

In [8]:
#Define Variable that will become columns, and new function to append coordinates
Latitude = []
Longitude = []

def getcoords(name):
    address = '{},Fort Worth'.format(name)
    location = geolocator.geocode(address)
    lat = location.latitude
    long = location.longitude
    Latitude.append(lat)
    Longitude.append(long)

In [9]:
#iterate over all neighborhoods
for hood in hoods_split['Neighborhood']:
    getcoords(hood)

In [10]:
#append lat and long to datadrame
hoods_split['Latitude'] = Latitude
hoods_split['Longitude'] = Longitude
hoods_split

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Alliance Gateway,32.959716,-97.309837
1,Benbrook Lake,32.625154,-97.469581
2,Centreport,32.816973,-97.052651
3,Clearfork,32.707996,-97.399649
4,Diamond Hill-Jarvis,32.808471,-97.336879
5,Eagle Mountain,32.89346,-97.444465
6,Fossil Creek,32.893137,-97.296711
7,Garden Acres,32.59458,-97.302515
8,Gateway Park,32.758552,-97.271868
9,287 Corridor,57.23042,-111.427235


## Create a map of Fort Worth

In [11]:
fw = geolocator.geocode('Fort Worth, Texas')
fw_latitude = fw.latitude
fw_longitude = fw.longitude

#Create a map of Fort Worth using our coordinates

map_Fort_Worth = folium.Map(location = [fw_latitude,fw_longitude], zoom_start = 10)

map_Fort_Worth

## Use foursquare API to explore neighborhoods

Start by defining user credentials

In [12]:
CLIENT_ID = 'DVLMGSHW5ZXGOPKUJIMG0P4TH4CRUQ0K3AYQCFO3VEK4MOCK'
CLIENT_SECRET = 'RKEGR051LSDZEPTWUROZ3NPPSY5DXXWKGLYYW4EYPFGEDP03'
ACCESS_TOKEN = 'CQWUXEOI30MJUDWUD2Z2NMAUX3F22IPEWAREZPCJXI2OBRSF'
VERSION = '20180605'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET) 

Your credentails:
CLIENT_ID: DVLMGSHW5ZXGOPKUJIMG0P4TH4CRUQ0K3AYQCFO3VEK4MOCK
CLIENT_SECRET:RKEGR051LSDZEPTWUROZ3NPPSY5DXXWKGLYYW4EYPFGEDP03


Next, we define function(s) to extract venue category and to get nearby venues 

In [13]:
def getNearbyVenues(names,latitudes,longitudes,radius=500):
    venues_list = []
    for name,lat,lng in zip(names,latitudes,longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID,CLIENT_SECRET,VERSION,lat,lng,radius,LIMIT)
        results = requests.get(url).json()['response']['groups'][0]['items']
        venues_list.append([(name,lat,lng,v['venue']['name'],v['venue']['location']['lat'],v['venue']['location']['lng'],v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood','Neighborhood latitude','Neighborhood longitude','Venue','Venue latitude','Venue longitude','Venue category']
    return(nearby_venues)

Get the nearby venues for all of the neighborhoods in Fort Worth and store them in a new dataframe

In [14]:
fw_venues = getNearbyVenues(names=hoods_split['Neighborhood'],latitudes=hoods_split['Latitude'],longitudes=hoods_split['Longitude'])

fw_venues.head() 

Alliance Gateway
Benbrook Lake
Centreport
Clearfork
Diamond Hill-Jarvis
Eagle Mountain
Fossil Creek
Garden Acres
Gateway Park
 287 Corridor
 Candleridge
 Chapel Creek
 City View
 Como
 Downtown
 Hemphill
 Hillside
 Marine Creek
 Polytechnic
 Ridglea
 Riverside
 Seminary
 Southeast
 Summer Creek
 TCU
Hallmark
Handley
Heritage
Highland Hills
Lake Arlington
Lake Worth
Meacham
Meacham Airport
Meadowbrook
Mosier Valley
 Southside
 Westside
Northside
Oakridge Terrace
Park Glen
Rosemary Ridge
Ryanwood
Sendera Ranch
Seventeen Lakes
Stop Six
Summerfields
Tehama Ridge
Thomas Crossing
TMS
Walsh Ranch
Wedgwood
Western Hills
Wise County
Woodhaven


Unnamed: 0,Neighborhood,Neighborhood latitude,Neighborhood longitude,Venue,Venue latitude,Venue longitude,Venue category
0,Alliance Gateway,32.959716,-97.309837,Cabela's,32.96101,-97.308356,Sporting Goods Shop
1,Alliance Gateway,32.959716,-97.309837,Hill Overlooking Alliance,32.957735,-97.309287,Scenic Lookout
2,Alliance Gateway,32.959716,-97.309837,IHOP,32.957353,-97.308843,Breakfast Spot
3,Alliance Gateway,32.959716,-97.309837,SUBWAY,32.957701,-97.307488,Sandwich Place
4,Alliance Gateway,32.959716,-97.309837,Pilot Travel Centers,32.957734,-97.306777,Gas Station


##  Analyze the neighborhoods

In [15]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [16]:
#one hot encoding
fw_onehot = pd.get_dummies(fw_venues[['Venue category']],prefix="",prefix_sep="")

#add neighborhood column to our new onehot dataframe
fw_onehot['Neighborhood'] = fw_venues['Neighborhood']

In [17]:
#group rows by neighborhood and mean occurance of categories

fw_grouped = fw_onehot.groupby('Neighborhood').mean().reset_index()

#print shape to confirm changes
fw_grouped.shape

(47, 150)

## Get most common venues and put them into a dataframe

In [18]:
#Define a function that sorts venues in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [19]:
#create dataframe to display top 10 venues for each neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = fw_grouped['Neighborhood']

for ind in np.arange(fw_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(fw_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,287 Corridor,Coffee Shop,Airport,Discount Store,Flea Market,Fast Food Restaurant,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop,Dog Run
1,Candleridge,Park,Food,Dog Run,Lake,Construction & Landscaping,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Fondue Restaurant
2,Chapel Creek,Sporting Goods Shop,Mexican Restaurant,Vape Store,Rental Car Location,Park,Shipping Store,Dessert Shop,Ethiopian Restaurant,Electronics Store,Donut Shop
3,City View,Hotel,American Restaurant,Coffee Shop,Park,Sandwich Place,Steakhouse,Lounge,Seafood Restaurant,New American Restaurant,Beer Bar
4,Como,Discount Store,Clothing Store,Gym,Wine Bar,Fast Food Restaurant,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop,Dog Run


## Cluster the neighborhoods

 Run K-Means and split the neighborhoods into 5 clusters

In [20]:
# set number of clusters
kclusters = 5

fw_grouped_clustering = fw_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(fw_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 0, 2, 2, 2, 2, 2, 2, 2, 2])

Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [21]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

fw_merged = hoods_split

# merge Toronto_grouped with df_Toronto to add latitude/longitude for each neighborhood
fw_merged = fw_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

fw_merged.shape

(54, 14)

In [22]:
#Drop rows with Nan values
fw_merged.dropna(inplace=True)
fw_merged.reset_index(drop=True,inplace=True)
fw_merged.shape

(47, 14)

Finally, let's visualize the resulting clusters

In [24]:
# create map
map_clusters = folium.Map(location=[fw_latitude, fw_longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(fw_merged['Latitude'], fw_merged['Longitude'], fw_merged['Neighborhood'], fw_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine clusters 

In [28]:
fw_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alliance Gateway,32.959716,-97.309837,2.0,ATM,Gas Station,Sandwich Place,Scenic Lookout,Breakfast Spot,Fast Food Restaurant,Sporting Goods Shop,Grocery Store,Electronics Store,Comedy Club
1,Benbrook Lake,32.625154,-97.469581,4.0,Circus,Wine Bar,Discount Store,Fast Food Restaurant,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop,Dog Run,Dessert Shop
2,Centreport,32.816973,-97.052651,1.0,Trail,Wine Bar,Dessert Shop,Fast Food Restaurant,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop,Dog Run,Discount Store
3,Clearfork,32.707996,-97.399649,2.0,American Restaurant,Shopping Plaza,French Restaurant,Bowling Alley,Boutique,Dog Run,Movie Theater,Southern / Soul Food Restaurant,Leather Goods Store,Bar
4,Diamond Hill-Jarvis,32.808471,-97.336879,2.0,Discount Store,Recreation Center,Mexican Restaurant,Grocery Store,Convenience Store,Fried Chicken Joint,Wine Bar,Ethiopian Restaurant,Electronics Store,Donut Shop


In [40]:
cluster = input('Choose cluster: ')
fw_merged.loc[fw_merged['Cluster Labels'] == int(cluster), fw_merged.columns[[0] + list(range(4, fw_merged.shape[1]))]]

Choose cluster: 2


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alliance Gateway,ATM,Gas Station,Sandwich Place,Scenic Lookout,Breakfast Spot,Fast Food Restaurant,Sporting Goods Shop,Grocery Store,Electronics Store,Comedy Club
3,Clearfork,American Restaurant,Shopping Plaza,French Restaurant,Bowling Alley,Boutique,Dog Run,Movie Theater,Southern / Soul Food Restaurant,Leather Goods Store,Bar
4,Diamond Hill-Jarvis,Discount Store,Recreation Center,Mexican Restaurant,Grocery Store,Convenience Store,Fried Chicken Joint,Wine Bar,Ethiopian Restaurant,Electronics Store,Donut Shop
6,Fossil Creek,Baseball Field,Pizza Place,Market,Wine Bar,Dog Run,Fast Food Restaurant,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop
7,287 Corridor,Coffee Shop,Airport,Discount Store,Flea Market,Fast Food Restaurant,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop,Dog Run
9,Chapel Creek,Sporting Goods Shop,Mexican Restaurant,Vape Store,Rental Car Location,Park,Shipping Store,Dessert Shop,Ethiopian Restaurant,Electronics Store,Donut Shop
10,City View,Hotel,American Restaurant,Coffee Shop,Park,Sandwich Place,Steakhouse,Lounge,Seafood Restaurant,New American Restaurant,Beer Bar
11,Como,Discount Store,Clothing Store,Gym,Wine Bar,Fast Food Restaurant,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop,Dog Run
12,Downtown,Hotel,American Restaurant,Sandwich Place,Coffee Shop,Steakhouse,Lounge,Park,Seafood Restaurant,New American Restaurant,Beer Bar
13,Hemphill,Public Art,Gym,Grocery Store,Deli / Bodega,Gas Station,Discount Store,Fabric Shop,Ethiopian Restaurant,Electronics Store,Donut Shop
