# Project - Final Phase

## Import needed packages

In [118]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import requests
from pandas.io.json import json_normalize

## Load the information from an excel sheet

In [119]:
riyadh_neighborhoods = pd.read_excel('Neighboorhood_Riyadh.xlsx', sheet_name='Sheet1')
riyadh_neighborhoods

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Umm Salim,24.633751,46.694103
1,Al Izdihar,24.780655,46.700005
2,At Taawun,24.773076,46.68197
3,Al Jazirah,24.664702,46.77897
4,As Sulimaniyah,24.698527,46.667436
5,As Suwaidi,24.590827,46.661607
6,As Sahafah,24.796546,46.602523
7,Al Olaya,24.692317,46.64848
8,Al Falah,24.797057,46.691701
9,Al Mursalat,24.748866,46.672379


## Locating the center of Riyadh, Saudi Arabia

In [120]:
address = 'Riyadh'
geolocator = Nominatim(user_agent="Mohammed-Aljarbou")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Riyadh are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Riyadh are 24.6319692, 46.7150648.


## Creating a map cetered at Riyadh with nieghborhoods highlighted

In [121]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, neighborhood in zip(riyadh_neighborhoods['Latitude'], riyadh_neighborhoods['Longitude'], riyadh_neighborhoods['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Initilizing my credential for FourSquare

In [122]:
CLIENT_ID = 'FMBS3BEYLVOH1ZLIJGEA5JUIWE25QYTX5FKMBJCGMH3H05FM' # your Foursquare ID
CLIENT_SECRET = 'OTOLEB5UTIQ55MX0LKUPVNT4W1XMMHXBJGRNMEDC253BQOYJ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FMBS3BEYLVOH1ZLIJGEA5JUIWE25QYTX5FKMBJCGMH3H05FM
CLIENT_SECRET:OTOLEB5UTIQ55MX0LKUPVNT4W1XMMHXBJGRNMEDC253BQOYJ


## A method that will return the venues associated with a given neighborhood

In [143]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500 , LIMIT = 2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Requesting venues for each neighborhood

In [144]:
riyadh_venues = getNearbyVenues(names=riyadh_neighborhoods['Neighborhood'],
                                   latitudes=riyadh_neighborhoods['Latitude'],
                                   longitudes=riyadh_neighborhoods['Longitude']
                                  )

In [145]:
riyadh_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Al Falah,99,99,99,99,99,99
Al Izdihar,100,100,100,100,100,100
Al Jazirah,33,33,33,33,33,33
Al Maizilah,47,47,47,47,47,47
Al Malaz,100,100,100,100,100,100
Al Masani,52,52,52,52,52,52
Al Maseef,100,100,100,100,100,100
Al Mughrizat,100,100,100,100,100,100
Al Mursalat,100,100,100,100,100,100
Al Muruj,82,82,82,82,82,82


In [146]:
print('There are {} uniques categories.'.format(len(riyadh_venues['Venue Category'].unique())))

There are 216 uniques categories.


Cool we got a nice distribution of venues in overall.
Now let's break the datafram to get a deep look

In [147]:
riyadh_onehot = pd.get_dummies(riyadh_venues[['Venue Category']], prefix="", prefix_sep="")
riyadh_onehot['Neighborhood'] = riyadh_venues['Neighborhood']
fixed_columns = [riyadh_onehot.columns[-1]] + list(riyadh_onehot.columns[:-1])
riyadh_onehot = riyadh_onehot[fixed_columns]

In [148]:
matrix = riyadh_onehot.groupby('Neighborhood').sum()

In [149]:
riyadh_grouped = riyadh_onehot.groupby('Neighborhood').mean().reset_index()
riyadh_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Afghan Restaurant,American Restaurant,Amphitheater,Antique Shop,Arcade,Arepa Restaurant,Art Gallery,...,Track,Trail,Turkish Restaurant,Vacation Rental,Video Game Store,Watch Shop,Wedding Hall,Wings Joint,Women's Store,Yoga Studio
0,Al Falah,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Al Izdihar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
2,Al Jazirah,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Al Maizilah,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.021277,0.0,0.0,0.0,0.021277,0.0,0.0,0.0
4,Al Malaz,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Al Masani,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Al Maseef,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Al Mughrizat,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Al Mursalat,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
9,Al Muruj,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [150]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]
def getBestBusinessNeighborhood(businessname):
    return matrix[businessname].idxmin()

## Let's get the top 10 venues for each neighborhood

In [151]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = riyadh_grouped['Neighborhood']

for ind in np.arange(riyadh_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(riyadh_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Al Falah,Coffee Shop,Dessert Shop,Café,Falafel Restaurant,Donut Shop,Burger Joint,Pharmacy,Pizza Place,Supermarket,Park
1,Al Izdihar,Coffee Shop,Donut Shop,Gift Shop,Pizza Place,Restaurant,Ice Cream Shop,Pharmacy,Italian Restaurant,Plaza,Café
2,Al Jazirah,Furniture / Home Store,Fast Food Restaurant,Electronics Store,Coffee Shop,Department Store,Park,Italian Restaurant,Boxing Gym,Bookstore,Mobile Phone Shop
3,Al Maizilah,Gym / Fitness Center,Donut Shop,Dessert Shop,Café,Pharmacy,Bookstore,Market,Fast Food Restaurant,Park,Pizza Place
4,Al Malaz,Coffee Shop,Hotel,Asian Restaurant,Indian Restaurant,Fast Food Restaurant,Middle Eastern Restaurant,Convenience Store,Donut Shop,Ice Cream Shop,Bank
5,Al Masani,Middle Eastern Restaurant,Coffee Shop,Dessert Shop,Bakery,Supermarket,Pizza Place,Juice Bar,Ice Cream Shop,Mobile Phone Shop,Market
6,Al Maseef,Coffee Shop,Dessert Shop,Donut Shop,Juice Bar,Italian Restaurant,Pizza Place,Fast Food Restaurant,Grocery Store,Breakfast Spot,Furniture / Home Store
7,Al Mughrizat,Coffee Shop,Bakery,Juice Bar,Pharmacy,Burger Joint,Dessert Shop,Café,Jewelry Store,Supermarket,Cosmetics Shop
8,Al Mursalat,Coffee Shop,Middle Eastern Restaurant,Breakfast Spot,Café,Pizza Place,Cosmetics Shop,Ice Cream Shop,Pharmacy,Candy Store,Fried Chicken Joint
9,Al Muruj,Coffee Shop,Café,Middle Eastern Restaurant,Ice Cream Shop,Dessert Shop,Donut Shop,Gym,Supermarket,Bakery,Falafel Restaurant


## Let's also look at how neighborhoods come together when we cluster them into 10 means

In [160]:
kclusters = 10
riyadh_grouped_clustering = riyadh_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=42).fit(riyadh_grouped_clustering)
label = kmeans.labels_.tolist()

In [161]:
riyadh_merged = riyadh_neighborhoods
riyadh_merged['Cluster Labels'] = label
riyadh_merged = riyadh_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
riyadh_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Umm Salim,24.633751,46.694103,9,Coffee Shop,Breakfast Spot,Middle Eastern Restaurant,Café,Park,Shopping Mall,Fast Food Restaurant,Market,Convenience Store,Falafel Restaurant
1,Al Izdihar,24.780655,46.700005,6,Coffee Shop,Donut Shop,Gift Shop,Pizza Place,Restaurant,Ice Cream Shop,Pharmacy,Italian Restaurant,Plaza,Café
2,At Taawun,24.773076,46.68197,1,Coffee Shop,Dessert Shop,Café,Pizza Place,Donut Shop,Restaurant,Ice Cream Shop,Bookstore,Breakfast Spot,Middle Eastern Restaurant
3,Al Jazirah,24.664702,46.77897,9,Furniture / Home Store,Fast Food Restaurant,Electronics Store,Coffee Shop,Department Store,Park,Italian Restaurant,Boxing Gym,Bookstore,Mobile Phone Shop
4,As Sulimaniyah,24.698527,46.667436,0,Coffee Shop,Dessert Shop,Jewelry Store,Burger Joint,Middle Eastern Restaurant,Clothing Store,Fast Food Restaurant,Art Gallery,Juice Bar,Gift Shop


In [162]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []
for lat, lon, poi, cluster in zip(riyadh_merged['Latitude'], riyadh_merged['Longitude'], riyadh_merged['Neighborhood'], riyadh_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Test our application

### What is the best neighborhood for a pizza place?

In [156]:
getBestBusinessNeighborhood('Pizza Place')

'Al Jazirah'

### What is the best neighborhood for a Coffee Shop?

In [163]:
getBestBusinessNeighborhood('Coffee Shop')

'An Nazim'

### What is the best neighborhood for a Donut Shop?

In [164]:
getBestBusinessNeighborhood('Donut Shop')

'Al Jazirah'

### What is the best neighborhood for a Fast Food Restaurant?

In [165]:
getBestBusinessNeighborhood('Fast Food Restaurant')

'Al Mursalat'

### What is the best neighborhood for an Electronics Store?

In [167]:
getBestBusinessNeighborhood('Electronics Store')

'Al Falah'