# Exploring the Top Tourist Cities in the U.S.
#### Created by: Alexander Ptacek
Description: Travel websites can help tourists pick out a city and help plan their visit, but often fail to paint a clear picture of the cities full offerings. This report will help tourists who are deciding between two or more cities for their next travel plans. This information will also be useful for travel sites and blogs that are trying to create a travel guide for U.S. cities.

In [49]:
import numpy as np
import pandas as pd

In [50]:
city_index=('New York City','San Francisco', 'New Orleans', 'San Diego', 'Chicago')
city_columns=('City','Overall Rank', 'Food Rank', 'Nighlife Rank','Latitude', 'Longitude')
citydf= pd.DataFrame(columns=city_columns)

In [51]:
citydf['City'], citydf['Overall Rank'], citydf['Food Rank'], citydf['Nighlife Rank']= ((city_index),(1,2,3,4,5),(4,1,2,12,3),(4,6,3,9,7))
citydf

Unnamed: 0,City,Overall Rank,Food Rank,Nighlife Rank,Latitude,Longitude
0,New York City,1,4,4,,
1,San Francisco,2,1,6,,
2,New Orleans,3,2,3,,
3,San Diego,4,12,9,,
4,Chicago,5,3,7,,


In [52]:
!conda install -c conda-forge geopy --yes # uncomment this line to install
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests

#Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#Import k-means for clustering solutions
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line to install
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [7]:
city_address=['New York City, New York','San Francisco, California','New Orleans, Louisiana','San Diego, California','Chicago, Illinois']

nyc_address=city_address[0]
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(nyc_address)
nyclat = location.latitude
nyclng = location.longitude
print('The geograpical coordinates of New York City are {}, {}.'.format(nyclat,nyclng))

sf_address=city_address[1]
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(sf_address)
sflat = location.latitude
sflng = location.longitude
print('The geograpical coordinates of San Francisco are {}, {}.'.format(sflat,sflng))

no_address=city_address[2]
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(no_address)
nolat = location.latitude
nolng = location.longitude
print('The geograpical coordinates of New Orleans are {}, {}.'.format(nolat,nolng))

sd_address=city_address[3]
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(sd_address)
sdlat = location.latitude
sdlng = location.longitude
print('The geograpical coordinates of San Diego are {}, {}.'.format(sdlat,sdlng))

chi_address=city_address[4]
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(chi_address)
chilat = location.latitude
chilng = location.longitude
print('The geograpical coordinates of Chicago are {}, {}.'.format(chilat,chilng))

The geograpical coordinates of New York City are 40.7127281, -74.0060152.
The geograpical coordinates of San Francisco are 37.7790262, -122.4199061.
The geograpical coordinates of New Orleans are 29.9499323, -90.0701156.
The geograpical coordinates of San Diego are 32.7174202, -117.1627728.
The geograpical coordinates of Chicago are 41.8755616, -87.6244212.


In [8]:
citydf['Latitude'], citydf['Longitude']=((nyclat,sflat,nolat,sdlat,chilat),(nyclng,sflng,nolng,sdlng,chilng))
citydf

Unnamed: 0,City,Overall Rank,Food Rank,Nighlife Rank,Latitude,Longitude
0,New York City,1,4,4,40.712728,-74.006015
1,San Francisco,2,1,6,37.779026,-122.419906
2,New Orleans,3,2,3,29.949932,-90.070116
3,San Diego,4,12,9,32.71742,-117.162773
4,Chicago,5,3,7,41.875562,-87.624421


In [9]:
address = 'United States'
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of United States are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of United States are 39.7837304, -100.4458825.


In [10]:
map_us = folium.Map(location=[latitude, longitude], zoom_start=5)

# Add markers to map for every Postal Code with loop function
for lat, lng, city in zip(citydf['Latitude'], citydf['Longitude'], 
                                        citydf['City']):
    label = '{}'.format(city)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_us)  
    
map_us

In [11]:
CLIENT_ID = 'OYBKAV3TR4YDJSKVRVGYO350ATXJWDUBQ51QY3XY0XIWWK5H' # Foursquare API ID
CLIENT_SECRET = '1TSRPGF42HTIUMOFIAFHZKLO5HVPJJWUTYM0RW5GM2HK3V1Q' # Foursquare Secret
VERSION = '20180605' # Foursquare API version used for this project
LIMIT = 600 # A default Foursquare API limit value

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=100000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
UScity_venues = getNearbyVenues(names=citydf['City'],
                                   latitudes=citydf['Latitude'],
                                   longitudes=citydf['Longitude']
                                  )

New York City
San Francisco
New Orleans
San Diego
Chicago


In [14]:
UScity_venues.head()

Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,New York City,40.712728,-74.006015,Aire Ancient Baths,40.718141,-74.004941,Spa
1,New York City,40.712728,-74.006015,The Bar Room at Temple Court,40.711448,-74.006802,Hotel Bar
2,New York City,40.712728,-74.006015,Crown Shy,40.706187,-74.00749,Restaurant
3,New York City,40.712728,-74.006015,9/11 Memorial North Pool,40.712077,-74.013187,Memorial Site
4,New York City,40.712728,-74.006015,The Rooftop @ Pier 17,40.705463,-74.001598,Music Venue


In [15]:
UScity_venues.shape

(500, 7)

In [16]:
map_nyc = folium.Map(location=[latitude, longitude], zoom_start=5)

# Add markers to map for every Postal Code with loop function
for lat, lng, venue in zip(UScity_venues['Venue Latitude'], UScity_venues['Venue Longitude'], 
                                        UScity_venues['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_nyc)  
    
map_nyc

In [17]:
UScity_venues.groupby('City').count()

Unnamed: 0_level_0,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Chicago,100,100,100,100,100,100
New Orleans,100,100,100,100,100,100
New York City,100,100,100,100,100,100
San Diego,100,100,100,100,100,100
San Francisco,100,100,100,100,100,100


In [18]:
venue_cats=UScity_venues['Venue Category'].unique()
venue_cats

array(['Spa', 'Hotel Bar', 'Restaurant', 'Memorial Site', 'Music Venue',
       'Furniture / Home Store', 'Park', 'Bridge', 'Playground',
       'Salad Place', 'Thai Restaurant', 'Scenic Lookout', 'Garden',
       'Chocolate Shop', 'Asian Restaurant', 'Beach', 'Volleyball Court',
       'Ice Cream Shop', 'Bookstore', 'Wine Shop', 'Trail', 'Cheese Shop',
       'Pier', 'Movie Theater', 'Theater', 'Sandwich Place',
       'Gourmet Shop', 'Gym / Fitness Center', 'Steakhouse',
       'Dance Studio', 'Bakery', 'Juice Bar', 'Stationery Store',
       'New American Restaurant', 'Farmers Market', 'Yoga Studio',
       'Taco Place', 'Seafood Restaurant', 'Fish Market',
       'Athletics & Sports', 'Event Space', 'Art Gallery',
       'Indie Movie Theater', 'Bike Shop', 'Cocktail Bar',
       'Hot Dog Joint', 'Hotel', 'Coffee Shop', 'Italian Restaurant',
       'Israeli Restaurant', 'Pizza Place', 'Deli / Bodega',
       'Record Shop', 'Cuban Restaurant', 'Food & Drink Shop',
       'Eastern Eur

In [19]:
print('There are {} uniques categories.'.format(len(UScity_venues['Venue Category'].unique())))

There are 162 uniques categories.


In [44]:
# one hot encoding
city_onehot = pd.get_dummies(UScity_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
city_onehot['City'] = UScity_venues['City'] 

# move neighborhood column to the first column
fixed_columns = [city_onehot.columns[-1]] + list(city_onehot.columns[:-1])
city_onehot = city_onehot[fixed_columns]

#Group rows by neighborhood and calculate mean frequency for each venue category
city_grouped = city_onehot.groupby('City').mean().reset_index()
city_grouped

Unnamed: 0,City,Accessories Store,Adult Boutique,American Restaurant,Amphitheater,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Trail,Vegetarian / Vegan Restaurant,Volleyball Court,Waterfront,Wine Bar,Wine Shop,Winery,Yoga Studio,Zoo,Zoo Exhibit
0,Chicago,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,...,0.04,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.01,0.0
1,New Orleans,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.03,0.01,0.01,0.0,0.0,0.0
2,New York City,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,...,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0
3,San Diego,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,...,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.04
4,San Francisco,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,...,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.04,0.0,0.0


In [40]:
city_grouped.loc[[1]]

Unnamed: 0,City,Accessories Store,Adult Boutique,American Restaurant,Amphitheater,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Trail,Vegetarian / Vegan Restaurant,Volleyball Court,Waterfront,Wine Bar,Wine Shop,Winery,Yoga Studio,Zoo,Zoo Exhibit
1,New Orleans,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.03,0.01,0.01,0.0,0.0,0.0


In [41]:
city_grouped=city_grouped.transpose()
city_grouped

Unnamed: 0,0,1,2,3,4
City,Chicago,New Orleans,New York City,San Diego,San Francisco
Accessories Store,0,0,0,0.01,0
Adult Boutique,0,0,0,0,0.01
American Restaurant,0.01,0.02,0,0.01,0
Amphitheater,0.01,0,0,0.01,0
...,...,...,...,...,...
Wine Shop,0,0.01,0.02,0,0.02
Winery,0,0.01,0,0,0
Yoga Studio,0.01,0,0.01,0,0.04
Zoo,0.01,0,0,0.01,0


In [42]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [45]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] = city_grouped['City']

for ind in np.arange(city_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Chicago,Hotel,Coffee Shop,Park,Grocery Store,Trail,Theater,Liquor Store,Waterfront,Spa,Seafood Restaurant
1,New Orleans,Hotel,Cajun / Creole Restaurant,Coffee Shop,Park,Sandwich Place,Italian Restaurant,French Restaurant,Wine Bar,Cocktail Bar,Jazz Club
2,New York City,Park,Bookstore,Gourmet Shop,Ice Cream Shop,Scenic Lookout,Movie Theater,Garden,Theater,Pizza Place,Pier
3,San Diego,Park,Beach,Zoo Exhibit,Brewery,Fast Food Restaurant,Farmers Market,Seafood Restaurant,Sushi Restaurant,Taco Place,Coffee Shop
4,San Francisco,Park,Coffee Shop,Bookstore,Bakery,Yoga Studio,Market,Brewery,Playground,Performing Arts Venue,Climbing Gym


In [46]:
# set number of clusters
kclusters = 2

city_grouped_clustering = city_grouped.drop('City', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(city_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 1, 0, 1], dtype=int32)

In [47]:
#Add clustering labels
city_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [48]:
city_merged = citydf
city_merged = city_merged.join(city_venues_sorted.set_index('City'), on='City')

city_merged.head()

Unnamed: 0,City,Overall Rank,Food Rank,Nighlife Rank,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,New York City,1,4,4,40.712728,-74.006015,1,Park,Bookstore,Gourmet Shop,Ice Cream Shop,Scenic Lookout,Movie Theater,Garden,Theater,Pizza Place,Pier
1,San Francisco,2,1,6,37.779026,-122.419906,1,Park,Coffee Shop,Bookstore,Bakery,Yoga Studio,Market,Brewery,Playground,Performing Arts Venue,Climbing Gym
2,New Orleans,3,2,3,29.949932,-90.070116,0,Hotel,Cajun / Creole Restaurant,Coffee Shop,Park,Sandwich Place,Italian Restaurant,French Restaurant,Wine Bar,Cocktail Bar,Jazz Club
3,San Diego,4,12,9,32.71742,-117.162773,0,Park,Beach,Zoo Exhibit,Brewery,Fast Food Restaurant,Farmers Market,Seafood Restaurant,Sushi Restaurant,Taco Place,Coffee Shop
4,Chicago,5,3,7,41.875562,-87.624421,0,Hotel,Coffee Shop,Park,Grocery Store,Trail,Theater,Liquor Store,Waterfront,Spa,Seafood Restaurant
