# Final Capstone Project

The notebook aims to implement the skills and tools to use location data to explore a geographical location and have the opportunity to be creative and come up with an idea to leverage the Foursquare location data to explore or compare neighborhoods or cities or to come up with a problem that uses the Foursquare location data to solve. 

The main goal of this project is to perform segmentation and clustering of the most popular cities of India, by analyzing its most popular venues and places 

In [2]:
import numpy as np 

import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium # map rendering library

In [42]:
city = "Mumbai"
country = "India"

## The first step is to gather the location data for the two cities

In [9]:
address = city
geolocator = Nominatim(user_agent='most_visited')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


print('The cordinates of Mumbai are:')   
print('\n latitude: ', latitude)
print('longitude: ', longitude)

The cordinates of Mumbai are:

 latitude:  18.9387711
longitude:  72.8353355


In [19]:
cities = pd.DataFrame()
cities['Latitude'] = pd.Series(latitude)
cities['Longitude'] = pd.Series(longitude)
cities['City'] = pd.Series(city)
cities['Country'] = pd.Series(country)
cities.head()

Unnamed: 0,Latitude,Longitude,City,Country
0,18.938771,72.835335,Mumbai,India


In [20]:
# Create map of the world
world_map = folium.Map(location=[0, 0], zoom_start=1)

# Add cities as markers
for lat, lng, country, city in zip(cities['Latitude'],
                                   cities['Longitude'],
                                   cities['Country'],
                                   cities['City']):
    label = '{}, {}'.format(city, country)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#ff6464',
        fill_opacity=0.7,
        parse_html=False).add_to(world_map)  
    
world_map

###Foursquare Credentials

In [21]:

CLIENT_ID = 'RJK5JAH1RCGF42DA0KWAYTAQ5HD1CWNGE55OSSJ5FLLGV3FX' # Foursquare ID
CLIENT_SECRET = 'ZODG12IEPG2FDDJ31YYDEXZ0VKY3XXU2RRZ1JKRUC2I0COLI' # Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Credentials:')
print('CLIENT_ID: ' + CLIENT_ID + '...')
print('CLIENT_SECRET: ' + CLIENT_SECRET + '...')

Credentials:
CLIENT_ID: RJK5JAH1RCGF42DA0KWAYTAQ5HD1CWNGE55OSSJ5FLLGV3FX...
CLIENT_SECRET: ZODG12IEPG2FDDJ31YYDEXZ0VKY3XXU2RRZ1JKRUC2I0COLI...


In [22]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [25]:
def getNearbyVenues(city, latitudes, longitudes):
    
    LIMIT = 100
    venues_list=[]
    
    print('Querying venues from: ', end='')
    
    for name, lat, lng in zip(city, latitudes, longitudes):
        print(name)
            
        # Create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}&radius=30000'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            LIMIT)
            
        # Make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # Return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
mumbai = getNearbyVenues(city=cities['City'],
                             latitudes=cities['Latitude'],
                             longitudes=cities['Longitude']
                            )

Querying venues from: Mumbai


In [29]:
mumbai.head()

Unnamed: 0,City,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Mumbai,18.938771,72.835335,Taj Mahal Palace & Tower,18.922306,72.833578,Hotel
1,Mumbai,18.938771,72.835335,Wankhede Stadium,18.938792,72.825944,Cricket Ground
2,Mumbai,18.938771,72.835335,Food for Thought,18.932031,72.831667,Café
3,Mumbai,18.938771,72.835335,Nariman Point,18.929183,72.822232,Scenic Lookout
4,Mumbai,18.938771,72.835335,Marine Drive,18.941221,72.823261,Scenic Lookout


In [33]:
mumbai.shape

(100, 7)

In [36]:
mumbai.groupby('Venue Category').Venue.count()

Venue Category
American Restaurant              1
Asian Restaurant                 1
Bakery                           4
Bar                              2
Beach                            1
Bengali Restaurant               1
Brewery                          3
Burger Joint                     1
Café                             4
Chinese Restaurant               3
Clothing Store                   2
Coffee Shop                      4
Cricket Ground                   1
Cupcake Shop                     2
Deli / Bodega                    2
Dessert Shop                     3
Diner                            1
Donut Shop                       2
Fast Food Restaurant             3
Golf Course                      1
Gourmet Shop                     1
Grocery Store                    1
Gym / Fitness Center             1
History Museum                   1
Hotel                            8
Ice Cream Shop                   2
Indian Restaurant                6
Italian Restaurant               1
Juice

In [38]:
# Applying OH Encoding by 'creating dummies'
venues_one_hot = pd.get_dummies(all_venues[['Venue Category']], prefix="", prefix_sep="")

# Add the 'City' column back to dataset
venues_one_hot['City Name'] = all_venues['City']

# Move City column to the first column
fixed_columns = [venues_one_hot.columns[-1]] + list(venues_one_hot.columns[:-1])
venues_one_hot = venues_one_hot[fixed_columns]

venues_one_hot.head()

Unnamed: 0,City Name,American Restaurant,Asian Restaurant,Bakery,Bar,Beach,Bengali Restaurant,Brewery,Burger Joint,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Cricket Ground,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Donut Shop,Fast Food Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym / Fitness Center,History Museum,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Juice Bar,Lounge,Mediterranean Restaurant,Monument / Landmark,Mughlai Restaurant,Multiplex,Music Venue,Park,Performing Arts Venue,Pizza Place,Playground,Pub,Restaurant,Salad Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Snack Place,South Indian Restaurant,Spa,Stadium,Thai Restaurant,Theater,Toy / Game Store,Vegetarian / Vegan Restaurant
0,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Mumbai,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [39]:
venues_grouped = venues_one_hot.groupby('City Name').mean().reset_index()
venues_grouped.head()

Unnamed: 0,City Name,American Restaurant,Asian Restaurant,Bakery,Bar,Beach,Bengali Restaurant,Brewery,Burger Joint,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Cricket Ground,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Donut Shop,Fast Food Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym / Fitness Center,History Museum,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Juice Bar,Lounge,Mediterranean Restaurant,Monument / Landmark,Mughlai Restaurant,Multiplex,Music Venue,Park,Performing Arts Venue,Pizza Place,Playground,Pub,Restaurant,Salad Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Snack Place,South Indian Restaurant,Spa,Stadium,Thai Restaurant,Theater,Toy / Game Store,Vegetarian / Vegan Restaurant
0,Mumbai,0.01,0.01,0.04,0.02,0.01,0.01,0.03,0.01,0.04,0.03,0.02,0.04,0.01,0.02,0.02,0.03,0.01,0.02,0.03,0.01,0.01,0.01,0.01,0.01,0.08,0.02,0.06,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.01,0.01,0.02,0.01,0.05,0.04,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.01


In [40]:
num_top_venues = 5

for name in venues_grouped['City Name']:
    print("------------ "+name+" ------------")
    temp = venues_grouped[venues_grouped['City Name'] == name].T.reset_index()
    temp.columns = ['Venue Category','Freq']
    temp = temp.iloc[1:]
    temp['Freq'] = temp['Freq'].astype(float)
    temp = temp.round({'Freq': 2})
    print(temp.sort_values('Freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

------------ Mumbai ------------
       Venue Category  Freq
0               Hotel  0.08
1   Indian Restaurant  0.06
2      Scenic Lookout  0.05
3              Bakery  0.04
4  Seafood Restaurant  0.04


