# IBM Applied Data Science Capstone
## The Battle of Neighborhoods: Venue Comparison in the Dallas-Fort Worth Metroplex

In [1]:
# Import libaries

# Library to handle data in a vectorized manner
import numpy as np

# Library for data analysis
!conda install -c conda-forge geopy --yes
import pandas as pd

# Library to handle JSON files
import json 

# Tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize

# Convert an address into latitude and longitude values
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

# Library to handle requests
import requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# Import k-means from clustering stage
from sklearn.cluster import KMeans

# Map rendering library
!conda install -c conda-forge folium=0.10.1 --yes 
import folium

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.0.0-pyh9f0ad1d_0



Downloading and Extracting Packages
geopy-2.0.0          | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ################################

# Load the data into a .CSV from Opendatasoft 

In [2]:
# Create Dallas dataframe
csv_url_dallas = 'https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&q=dallas&refine.state=TX&timezone=America/Chicago&use_labels_for_header=true&csv_separator=%3B'
df_dallas = pd.read_csv(csv_url_dallas, sep = ';', engine = 'python')

# Drop unneeded columns
df_dallas.drop(['State', 'Timezone', 'Daylight savings time flag', 'geopoint'], axis = 1, inplace = True)

#Rename column
df_dallas.columns = ['Zipcode', 'City', 'Latitude', 'Longitude']
df_dallas.head()

Unnamed: 0,Zipcode,City,Latitude,Longitude
0,75294,Dallas,32.767268,-96.777626
1,75255,Dallas,32.669783,-96.614921
2,75374,Dallas,32.767268,-96.777626
3,75252,Dallas,32.998132,-96.79088
4,75275,Dallas,32.767268,-96.777626


In [3]:
# Eliminate duplicates

indexnames = df_dallas[df_dallas['Latitude'] == 32.767268].index
df_dallas.drop(indexnames, inplace = True)

indexnames = df_dallas[df_dallas['City'] == 'Lake Dallas'].index
# Drop those rows
df_dallas.drop(indexnames, inplace = True)

df_dallas.head()

Unnamed: 0,Zipcode,City,Latitude,Longitude
1,75255,Dallas,32.669783,-96.614921
3,75252,Dallas,32.998132,-96.79088
5,75202,Dallas,32.77988,-96.80502
10,75270,Dallas,32.78133,-96.80198
11,75220,Dallas,32.867977,-96.86306


In [4]:
print('The shape of the Dallas dataframe is', df_dallas.shape)

The shape of the Dallas dataframe is (52, 4)


In [5]:
# Create Fort Worth dataframe
csv_url_fortworth = 'https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&q=fort+worth&refine.state=TX&timezone=America/Chicago&use_labels_for_header=true&csv_separator=%3B'
df_fortworth = pd.read_csv(csv_url_fortworth, sep = ';', engine = 'python')

# Drop columns
df_fortworth.drop(['State', 'Timezone', 'Daylight savings time flag', 'geopoint'], axis = 1, inplace = True)

#Rename column
df_fortworth.columns = ['Zipcode', 'City', 'Latitude', 'Longitude']
df_fortworth.head()

Unnamed: 0,Zipcode,City,Latitude,Longitude
0,76107,Fort Worth,32.738481,-97.38424
1,76179,Fort Worth,32.876475,-97.41249
2,76137,Fort Worth,32.86814,-97.28566
3,76345,Fort Worth,32.38253,-98.404816
4,76177,Fort Worth,32.949819,-97.31406


In [6]:
# Get names of indexes where Latitude = 32.771419
indexnames = df_fortworth[df_fortworth['Latitude'] == 32.771419].index
# Drop those rows
df_fortworth.drop(indexnames, inplace = True)
df_fortworth.head()

Unnamed: 0,Zipcode,City,Latitude,Longitude
0,76107,Fort Worth,32.738481,-97.38424
1,76179,Fort Worth,32.876475,-97.41249
2,76137,Fort Worth,32.86814,-97.28566
3,76345,Fort Worth,32.38253,-98.404816
4,76177,Fort Worth,32.949819,-97.31406


In [7]:
print('The shape of the Fort Worth dataframe is', df_fortworth.shape)

The shape of the Fort Worth dataframe is (32, 4)


In [8]:
# Combine the Dallas and Fort Worth dataframes
df_dfw = pd.concat([df_dallas, df_fortworth], ignore_index = True)
print('The shape of the combined Dallas and Fort Worth dataframe is', df_dfw.shape)
df_dfw.head()

The shape of the combined Dallas and Fort Worth dataframe is (84, 4)


Unnamed: 0,Zipcode,City,Latitude,Longitude
0,75255,Dallas,32.669783,-96.614921
1,75252,Dallas,32.998132,-96.79088
2,75202,Dallas,32.77988,-96.80502
3,75270,Dallas,32.78133,-96.80198
4,75220,Dallas,32.867977,-96.86306


# Acquiring data through Foursquare API

In [9]:
# Client ID and Secret removed for sharing
CLIENT_ID = 'ZMOS5JBCPGLY0IJ1WYBR1SHNIV2DP2SMRVIEZHLF3IZ5U22M' # Foursquare ID
CLIENT_SECRET = 'O3K0YWMF535K5VJWICWUDAJZXPFQ0C5REELKD1TTTLGG320F' # Foursquare Secret
VERSION = '20180604'
print('Credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Credentials:
CLIENT_ID: ZMOS5JBCPGLY0IJ1WYBR1SHNIV2DP2SMRVIEZHLF3IZ5U22M
CLIENT_SECRET:O3K0YWMF535K5VJWICWUDAJZXPFQ0C5REELKD1TTTLGG320F


In [10]:
# Limit of number of venues returned by Foursquare API
LIMIT = 150
# Define radius
radius = 1000
def getNearbyVenues(names, cities, latitudes, longitudes):
    
    venues_list = []
    for name, city, lat, lng in zip(names, cities, latitudes, longitudes):
        print(name)
            
        # Create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # Make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # Return only relevant information for each nearby venue
        venues_list.append([(
            city,
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City',
                             'Zipcode', 
                             'Zipcode Latitude', 
                             'Zipcode Longitude', 
                             'Venue', 
                             'Venue Latitude', 
                             'Venue Longitude', 
                             'Venue Category']
    
    return(nearby_venues)

In [11]:
# Retrieving venues in Zipcode regions
dfw_venues = getNearbyVenues(names = df_dfw['Zipcode'],
                             cities = df_dfw['City'],
                             latitudes = df_dfw['Latitude'],
                             longitudes = df_dfw['Longitude']
                            )
print('dfw_venues shape', dfw_venues.shape)
dfw_venues

75255
75252
75202
75270
75220
75234
75215
75231
75251
75214
75210
75246
75247
75207
75212
75245
75204
75223
75287
75205
75230
75254
75217
75219
75226
75228
75233
75227
75211
75218
75203
75229
75209
75201
75221
75237
75249
75236
75248
75225
75208
75243
75224
75216
75238
75232
75240
75241
75244
75253
75206
75235
76107
76179
76137
76345
76177
76114
76103
76118
76110
76120
76115
76148
76102
76123
76153
76111
76112
76135
76134
76109
76105
76108
76116
76133
76131
76106
76104
76140
76119
76126
76155
76132
dfw_venues shape (2357, 8)


Unnamed: 0,City,Zipcode,Zipcode Latitude,Zipcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Dallas,75255,32.669783,-96.614921,Sid's Food Mart,32.669854,-96.614021,Deli / Bodega
1,Dallas,75255,32.669783,-96.614921,Compressors Unlimited International LLC,32.666720,-96.613780,Home Service
2,Dallas,75255,32.669783,-96.614921,Los Potrillos,32.669117,-96.609795,Mexican Restaurant
3,Dallas,75252,32.998132,-96.790880,Starbucks,32.998742,-96.794237,Coffee Shop
4,Dallas,75252,32.998132,-96.790880,Jamba Juice,32.998554,-96.794633,Juice Bar
...,...,...,...,...,...,...,...,...
2352,Fort Worth,76132,32.670345,-97.414300,Pizza Hut,32.676109,-97.410387,Pizza Place
2353,Fort Worth,76132,32.670345,-97.414300,Daylight Donuts,32.677814,-97.415108,Donut Shop
2354,Fort Worth,76132,32.670345,-97.414300,Papa Yun's Donuts,32.677814,-97.415108,Donut Shop
2355,Fort Worth,76132,32.670345,-97.414300,Albertsons Pharmacy,32.662846,-97.419465,Pharmacy


In [12]:
# Amount of unique categories 
print('There are {} uniques categories.'.format(len(dfw_venues['Venue Category'].unique())))

There are 275 uniques categories.


In [13]:
# Most frequent categories
dfw_venues.groupby('Venue Category').count().sort_values('Venue', ascending = False)

Unnamed: 0_level_0,City,Zipcode,Zipcode Latitude,Zipcode Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Mexican Restaurant,115,115,115,115,115,115,115
Fast Food Restaurant,100,100,100,100,100,100,100
Pizza Place,71,71,71,71,71,71,71
Convenience Store,66,66,66,66,66,66,66
Hotel,63,63,63,63,63,63,63
...,...,...,...,...,...,...,...
Hobby Shop,1,1,1,1,1,1,1
Health Food Store,1,1,1,1,1,1,1
Harbor / Marina,1,1,1,1,1,1,1
Halal Restaurant,1,1,1,1,1,1,1


In [14]:
# One hot encoding
dfw_venues_onehot = pd.get_dummies(dfw_venues[['Venue Category']], prefix = "", prefix_sep = "")

# Add Zipcode column back to dataframe
col1 = dfw_venues['City']
col2 = dfw_venues['Zipcode'] 
 
# Move Zipcode column to the first column
dfw_venues_onehot.insert(0, 'Zipcode', col2)
dfw_venues_onehot.insert(0, 'City', col1)

dfw_venues_onehot

Unnamed: 0,City,Zipcode,ATM,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,Art Museum,...,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Dallas,75255,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Dallas,75255,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Dallas,75255,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Dallas,75252,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Dallas,75252,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2352,Fort Worth,76132,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2353,Fort Worth,76132,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2354,Fort Worth,76132,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2355,Fort Worth,76132,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
# Group rows by Zipcode and take the mean of the frequency of occurrence of each category
dfw_venues_grouped = dfw_venues_onehot.groupby(['City', 'Zipcode']).mean().reset_index()
dfw_venues_grouped

Unnamed: 0,City,Zipcode,ATM,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,Art Museum,...,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Dallas,75201,0.0,0.0,0.040000,0.0,0.01,0.0,0.0,0.02,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.020000,0.0,0.0000
1,Dallas,75202,0.0,0.0,0.030000,0.0,0.01,0.0,0.0,0.01,...,0.0,0.0,0.0,0.000000,0.0,0.01,0.000000,0.000000,0.0,0.0000
2,Dallas,75203,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,0.00,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.0625
3,Dallas,75204,0.0,0.0,0.042254,0.0,0.00,0.0,0.0,0.00,...,0.0,0.0,0.0,0.028169,0.0,0.00,0.000000,0.042254,0.0,0.0000
4,Dallas,75205,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,0.00,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.083333,0.000000,0.0,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,Fort Worth,76148,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,0.00,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.0000
79,Fort Worth,76153,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,0.00,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.0000
80,Fort Worth,76155,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,0.00,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.0000
81,Fort Worth,76177,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,0.00,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.0000


In [16]:
# View each Zipcode region with top 5 venues
num_top_venues = 5

for hood in dfw_venues_grouped['Zipcode']:
    print("----"+str(hood)+"----")
    temp = dfw_venues_grouped[dfw_venues_grouped['Zipcode'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending = False).reset_index(drop = True).head(num_top_venues))
    print('\n')

----75201----
                     venue      freq
0                  Zipcode  75201.00
1                    Hotel      0.08
2  New American Restaurant      0.06
3              Coffee Shop      0.05
4               Steakhouse      0.05


----75202----
                venue      freq
0             Zipcode  75202.00
1               Hotel      0.12
2  Mexican Restaurant      0.05
3               Plaza      0.05
4        Cocktail Bar      0.04


----75203----
                  venue      freq
0               Zipcode  75203.00
1    Light Rail Station      0.19
2             Gift Shop      0.12
3  Fast Food Restaurant      0.12
4           Bus Station      0.06


----75204----
                 venue      freq
0              Zipcode  75204.00
1          Coffee Shop      0.07
2                  Gym      0.06
3  American Restaurant      0.04
4          Yoga Studio      0.04


----75205----
            venue      freq
0         Zipcode  75205.00
1   Women's Store      0.08
2        Boutique     

In [17]:
# Sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [18]:
# Create new dataframe and display the top 10 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['City', 'Zipcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind + 1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind + 1))

# Create a new dataframe
zipcode_venues_sorted = pd.DataFrame(columns = columns)
#zipcode_venues_sorted['City'] = dfw_venues_grouped['City']
zipcode_venues_sorted['City'] = dfw_venues_grouped['City']
zipcode_venues_sorted['Zipcode'] = dfw_venues_grouped['Zipcode']

for ind in np.arange(dfw_venues_grouped.shape[0]):
    zipcode_venues_sorted.iloc[ind, 2:] = return_most_common_venues(dfw_venues_grouped.iloc[ind, :], num_top_venues)

zipcode_venues_sorted.head()

Unnamed: 0,City,Zipcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Dallas,75201,Hotel,New American Restaurant,Coffee Shop,Steakhouse,American Restaurant,Food Truck,Performing Arts Venue,Japanese Restaurant,Cocktail Bar,Seafood Restaurant
1,Dallas,75202,Hotel,Mexican Restaurant,Plaza,Steakhouse,Cocktail Bar,Coffee Shop,History Museum,Bar,American Restaurant,Gym
2,Dallas,75203,Light Rail Station,Gift Shop,Fast Food Restaurant,Zoo Exhibit,Mexican Restaurant,Park,Paper / Office Supplies Store,Food,Bus Station,Taco Place
3,Dallas,75204,Coffee Shop,Gym,Fast Food Restaurant,Yoga Studio,American Restaurant,Mexican Restaurant,Bar,Convenience Store,Video Store,Pool
4,Dallas,75205,Clothing Store,Women's Store,Boutique,Bank,Social Club,Shopping Mall,Golf Course,Shoe Repair,Cheese Shop,Grocery Store


# k-means clustering

In [19]:
# Set number of clusters
kclusters = 8

dfw_grouped_clustering = dfw_venues_grouped.drop(['City','Zipcode'], 1)

# Run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(dfw_grouped_clustering)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [20]:
# Add clustering labels
# Uncomment to add cluster labels
zipcode_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dfw_merged = df_dfw

# Merge dfw_grouped with dfw_data to add latitude/longitude for each Zipcode
dfw_merged_all = dfw_merged.join(zipcode_venues_sorted.set_index(['City', 'Zipcode']), on = ['City', 'Zipcode'])

dfw_merged_all

# Drop rows with NaN values
dfw_merged_all.dropna(inplace = True)
dfw_merged_all['Cluster Labels'] = dfw_merged_all['Cluster Labels'].astype(int)
dfw_merged_all

Unnamed: 0,Zipcode,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,75255,Dallas,32.669783,-96.614921,0,Mexican Restaurant,Deli / Bodega,Home Service,Food Service,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Zoo Exhibit
1,75252,Dallas,32.998132,-96.790880,1,Mexican Restaurant,Nail Salon,Gym,Coffee Shop,Mediterranean Restaurant,Convenience Store,Sandwich Place,Bank,French Restaurant,Southern / Soul Food Restaurant
2,75202,Dallas,32.779880,-96.805020,1,Hotel,Mexican Restaurant,Plaza,Steakhouse,Cocktail Bar,Coffee Shop,History Museum,Bar,American Restaurant,Gym
3,75270,Dallas,32.781330,-96.801980,1,Hotel,Coffee Shop,Mexican Restaurant,Plaza,Cocktail Bar,Steakhouse,French Restaurant,Park,History Museum,Gym
4,75220,Dallas,32.867977,-96.863060,1,Mexican Restaurant,Grocery Store,Convenience Store,Pizza Place,Chinese Restaurant,Gas Station,Latin American Restaurant,Butcher,Optical Shop,Mobile Phone Shop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,76140,Fort Worth,32.630268,-97.271020,1,Donut Shop,Cable Car,Gas Station,BBQ Joint,Convenience Store,Construction & Landscaping,Mexican Restaurant,Food,Food & Drink Shop,Food Court
80,76119,Fort Worth,32.691033,-97.264790,7,Intersection,Construction & Landscaping,Burger Joint,Shopping Mall,Park,Zoo Exhibit,Food Court,Flower Shop,Fondue Restaurant,Food
81,76126,Fort Worth,32.649476,-97.491240,3,Garden Center,Bar,Zoo Exhibit,Food Truck,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Service,Football Stadium
82,76155,Fort Worth,32.830932,-97.047780,1,Bakery,Food Court,Mexican Restaurant,Gym / Fitness Center,Gym,Snack Place,Shop & Service,Coffee Shop,Food Truck,Sporting Goods Shop


### Generate maps of each city, Dallas and Fort Worth

In [21]:
# Retrieve Dallas coordiantes
dallas_address = 'Dallas, TX'
geolocator = Nominatim(user_agent="Sue_myapp")
dallas_location = geolocator.geocode(dallas_address)
dallas_latitude = dallas_location.latitude
dallas_longitude = dallas_location.longitude
print('The geograpical coordinate of Dallas are {}, {}.'.format(dallas_latitude, dallas_longitude))

# Retrieve Fort Worth coordiantes
fortworth_address = 'Fort Worth, TX'
fortworth_location = geolocator.geocode(fortworth_address)
fortworth_latitude = fortworth_location.latitude
fortworth_longitude = fortworth_location.longitude
print('The geograpical coordinate of Fort Worth are {}, {}.'.format(fortworth_latitude, fortworth_longitude))

The geograpical coordinate of Dallas are 32.7762719, -96.7968559.
The geograpical coordinate of Fort Worth are 32.753177, -97.3327459.


In [22]:
dallas_merged_all = dfw_merged_all[dfw_merged_all['City'] == 'Dallas']
fortworth_merged_all = dfw_merged_all[dfw_merged_all['City'] == 'Fort Worth']

In [37]:
# Create Dallas map
kclusters = 8
map_clusters = folium.Map(location = [dallas_latitude, dallas_longitude], zoom_start = 11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i * x) ** 2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
multi = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dallas_merged_all["Latitude"], dallas_merged_all["Longitude"],dallas_merged_all["Zipcode"], dallas_merged_all["Cluster Labels"]):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster + 1), parse_html = True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = multi[cluster-1],
        fill = True,
        fill_color = multi[cluster-1],
        fill_opacity = 0.3).add_to(map_clusters)

map_clusters

In [35]:
# Create Fort Worth map
kclusters = 8
map_clusters = folium.Map(location = [fortworth_latitude, fortworth_longitude], zoom_start=11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i * x) ** 2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
multi = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(fortworth_merged_all['Latitude'], fortworth_merged_all['Longitude'], fortworth_merged_all['Zipcode'], fortworth_merged_all['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster + 1), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = 'green',
        fill = True,
        fill_color = 'green',
        fill_opacity = 0.3).add_to(map_clusters)

map_clusters

In [25]:
#Cluster 1
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 0, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,75255,32.669783,Mexican Restaurant,Deli / Bodega,Home Service,Food Service,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Zoo Exhibit
14,75212,32.78238,Home Service,Thrift / Vintage Store,Convenience Store,Discount Store,Mexican Restaurant,Food Court,Fast Food Restaurant,Flower Shop,Fondue Restaurant,Food
26,75233,32.704398,Storage Facility,Mexican Restaurant,Construction & Landscaping,Taco Place,Zoo Exhibit,Food Service,Fondue Restaurant,Food,Food & Drink Shop,Food Court
27,75227,32.77003,Mexican Restaurant,Bakery,BBQ Joint,Automotive Shop,Park,Arts & Crafts Store,Food Service,Food,Food & Drink Shop,Food Court
60,76110,32.706331,Mexican Restaurant,Grocery Store,Fast Food Restaurant,Laundromat,Thrift / Vintage Store,Pizza Place,Tailor Shop,Salon / Barbershop,Park,Video Store
62,76115,32.680333,Mexican Restaurant,Taco Place,Fast Food Restaurant,Bakery,Pharmacy,Tennis Court,Business Service,Football Stadium,Food Truck,French Restaurant
72,76105,32.724831,Mexican Restaurant,Seafood Restaurant,Flower Shop,Grocery Store,Furniture / Home Store,Fried Chicken Joint,Moving Target,Discount Store,Fondue Restaurant,Food


In [26]:
#Cluster 2
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 1, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,75252,32.998132,Mexican Restaurant,Nail Salon,Gym,Coffee Shop,Mediterranean Restaurant,Convenience Store,Sandwich Place,Bank,French Restaurant,Southern / Soul Food Restaurant
2,75202,32.779880,Hotel,Mexican Restaurant,Plaza,Steakhouse,Cocktail Bar,Coffee Shop,History Museum,Bar,American Restaurant,Gym
3,75270,32.781330,Hotel,Coffee Shop,Mexican Restaurant,Plaza,Cocktail Bar,Steakhouse,French Restaurant,Park,History Museum,Gym
4,75220,32.867977,Mexican Restaurant,Grocery Store,Convenience Store,Pizza Place,Chinese Restaurant,Gas Station,Latin American Restaurant,Butcher,Optical Shop,Mobile Phone Shop
5,75234,32.925975,Soccer Field,Baseball Field,Pizza Place,Mexican Restaurant,Park,Big Box Store,Bank,Fried Chicken Joint,Sandwich Place,Grocery Store
...,...,...,...,...,...,...,...,...,...,...,...,...
77,76106,32.798429,Pizza Place,Mexican Restaurant,Food,Fast Food Restaurant,BBQ Joint,Seafood Restaurant,Taco Place,Grocery Store,Mobile Phone Shop,Supermarket
78,76104,32.730265,Convenience Store,Mexican Restaurant,Fast Food Restaurant,Gas Station,Pizza Place,Music Venue,New American Restaurant,Beer Bar,Thai Restaurant,Cocktail Bar
79,76140,32.630268,Donut Shop,Cable Car,Gas Station,BBQ Joint,Convenience Store,Construction & Landscaping,Mexican Restaurant,Food,Food & Drink Shop,Food Court
82,76155,32.830932,Bakery,Food Court,Mexican Restaurant,Gym / Fitness Center,Gym,Snack Place,Shop & Service,Coffee Shop,Food Truck,Sporting Goods Shop


In [27]:
#Cluster 3
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 2, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,75245,32.922499,Construction & Landscaping,Park,Zoo Exhibit,Food Truck,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Service,Football Stadium


In [28]:
#Cluster 4
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 3, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
81,76126,32.649476,Garden Center,Bar,Zoo Exhibit,Food Truck,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Service,Football Stadium


In [29]:
#Cluster 5
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 4, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
47,75241,32.669383,Convenience Store,Home Service,Zoo Exhibit,Food Service,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Truck


In [30]:
#Cluster 6
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 5, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
46,75240,32.938774,Intersection,Art Gallery,Pool,Zoo Exhibit,Food Service,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court


In [31]:
#Cluster 7
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 6, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
56,76177,32.949819,Scenic Lookout,Breakfast Spot,Health & Beauty Service,Lawyer,Food Service,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court


In [32]:
#Cluster 8
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 7, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
80,76119,32.691033,Intersection,Construction & Landscaping,Burger Joint,Shopping Mall,Park,Zoo Exhibit,Food Court,Flower Shop,Fondue Restaurant,Food
