<h1 align=center><font size = 5>The battle of neighborhoods in Benin</font></h1>

#### Import the necessary Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup
import requests
import pandas as pd

print('Libraries imported.')

Libraries imported.


#### Scrape the information about Benin Department Borough and Region

In [2]:
source = requests.get('https://en.wikipedia.org/wiki/Benin').text

In [3]:
dep_com_table = pd.read_html(source, header=0, attrs={"class":"wikitable sortable"})[0]
dep_com_table

Unnamed: 0,Map key,Department,Capital[36] [37],Population (2013),Area (km2)[38],FormerDepartment,Region,Sub-Region
0,2,Alibori,Kandi,868046,26242,Borgou,North,North East
1,1,Atakora,Natitingou,769337,20499,Atakora,North,North West
2,10,Atlantique,Allada,1396548,3233,Atlantique,South,South Centre
3,4,Borgou,Parakou,1202095,25856,Borgou,North,North East
4,5,Collines,Dassa-Zoumé,716558,13931,Zou,North,North Centre
5,6,Kouffo,Aplahoué,741895,2404,Mono,South,South West
6,3,Donga,Djougou,542605,11126,Atakora,North,North West
7,11,Littoral,Cotonou,678874,79,Atlantique,South,South Centre
8,9,Mono,Lokossa,495307,1605,Mono,South,South West
9,12,Ouémé,Porto-Novo,1096850,1281,Ouémé,South,South East


We drop the columns we don't need.

In [4]:
dep_com_table.columns

Index(['Map key', 'Department', 'Capital[36] [37]', 'Population (2013)',
       'Area (km2)[38]', 'FormerDepartment', 'Region', 'Sub-Region'],
      dtype='object')

In [5]:
Benin_df = dep_com_table.drop(['Map key', 'Population (2013)','Area (km2)[38]', 'FormerDepartment', 'Region']
                        , axis=1)
Benin_df

Unnamed: 0,Department,Capital[36] [37],Sub-Region
0,Alibori,Kandi,North East
1,Atakora,Natitingou,North West
2,Atlantique,Allada,South Centre
3,Borgou,Parakou,North East
4,Collines,Dassa-Zoumé,North Centre
5,Kouffo,Aplahoué,South West
6,Donga,Djougou,North West
7,Littoral,Cotonou,South Centre
8,Mono,Lokossa,South West
9,Ouémé,Porto-Novo,South East


Define a new Benin dataframe with department, Borough, and Region

In [6]:
Benin_df.rename(columns={"Capital[36] [37]":"Borough", "Sub-Region":"Region"}, inplace=True)
Benin_df

Unnamed: 0,Department,Borough,Region
0,Alibori,Kandi,North East
1,Atakora,Natitingou,North West
2,Atlantique,Allada,South Centre
3,Borgou,Parakou,North East
4,Collines,Dassa-Zoumé,North Centre
5,Kouffo,Aplahoué,South West
6,Donga,Djougou,North West
7,Littoral,Cotonou,South Centre
8,Mono,Lokossa,South West
9,Ouémé,Porto-Novo,South East


In [7]:
print('The Benin_df dataframe has {} departments and {} boroughs.'.format(
        len(Benin_df['Department'].unique()), len(Benin_df['Borough'].unique()))
     )

The Benin_df dataframe has 12 departments and 12 boroughs.


### Creating a Geocoder Dataframe

We choose to build the dataframe with the sample csv file that has the geographical coordinates we obtain from https://www.geopostcodes.com/Benin. You can download it from there.

In [8]:
# Checking current working directory
os.getcwd()

'/resources/labs/DP0701EN'

In [9]:
# Changing to new working directory
os.chdir(r'/resources/labs/DP0701EN')

In [10]:
geo_coord_table=pd.read_csv("benin_places.csv", sep=';')
geo_coord_table.head()

Unnamed: 0,iso,country,language,id,region1,region2,region3,region4,locality,postcode,suburb,latitude,longitude,elevation,iso2,fips,nuts,hasc,stat,timezone,utc,dst
0,BJ,Bénin,FR,1000127461,Alibori,Banikoara,,,Banikoara,,,11.29397,2.439285,311,BJ-AL,BN07,,BJ.AL.BA,,Africa/Porto-Novo,+01:00,+01:00
1,BJ,Bénin,FR,1000127462,Alibori,Banikoara,,,Founougo,,,11.46599,2.539781,287,BJ-AL,BN07,,BJ.AL.BA,,Africa/Porto-Novo,+01:00,+01:00
2,BJ,Bénin,FR,1000127463,Alibori,Banikoara,,,Gomparou,,,11.328576,2.471542,291,BJ-AL,BN07,,BJ.AL.BA,,Africa/Porto-Novo,+01:00,+01:00
3,BJ,Bénin,FR,1000127464,Alibori,Banikoara,,,Goumori,,,11.178435,2.299982,304,BJ-AL,BN07,,BJ.AL.BA,,Africa/Porto-Novo,+01:00,+01:00
4,BJ,Bénin,FR,1000127465,Alibori,Banikoara,,,Kokey,,,11.183333,2.283333,308,BJ-AL,BN07,,BJ.AL.BA,,Africa/Porto-Novo,+01:00,+01:00


In [11]:
geo_coord_table.columns

Index(['iso', 'country', 'language', 'id', 'region1', 'region2', 'region3',
       'region4', 'locality', 'postcode', 'suburb', 'latitude', 'longitude',
       'elevation', 'iso2', 'fips', 'nuts', 'hasc', 'stat', 'timezone', 'utc',
       'dst'],
      dtype='object')

In [12]:
geo_coord_df = geo_coord_table.drop(['iso' ,'country', 'language', 'id', 'region3','region4','postcode', 'suburb',
                                     'elevation', 'iso2', 'fips', 'nuts', 'hasc', 'stat', 'timezone', 'utc','dst'], axis=1)
geo_coord_df.head()

Unnamed: 0,region1,region2,locality,latitude,longitude
0,Alibori,Banikoara,Banikoara,11.29397,2.439285
1,Alibori,Banikoara,Founougo,11.46599,2.539781
2,Alibori,Banikoara,Gomparou,11.328576,2.471542
3,Alibori,Banikoara,Goumori,11.178435,2.299982
4,Alibori,Banikoara,Kokey,11.183333,2.283333


Conform the columns names

In [13]:
geo_coord_df.columns

Index(['region1', 'region2', 'locality', 'latitude', 'longitude'], dtype='object')

In [14]:
geo_coord_df.rename(columns={"country":"Country", "region1":"Department","region2":"Region", "locality":"Neighborhood",
                            "latitude":"Latitude","longitude":"Longitude"}, inplace=True)
geo_coord_df.head()

Unnamed: 0,Department,Region,Neighborhood,Latitude,Longitude
0,Alibori,Banikoara,Banikoara,11.29397,2.439285
1,Alibori,Banikoara,Founougo,11.46599,2.539781
2,Alibori,Banikoara,Gomparou,11.328576,2.471542
3,Alibori,Banikoara,Goumori,11.178435,2.299982
4,Alibori,Banikoara,Kokey,11.183333,2.283333


In [15]:
geo_coord_df

Unnamed: 0,Department,Region,Neighborhood,Latitude,Longitude
0,Alibori,Banikoara,Banikoara,11.29397,2.439285
1,Alibori,Banikoara,Founougo,11.46599,2.539781
2,Alibori,Banikoara,Gomparou,11.328576,2.471542
3,Alibori,Banikoara,Goumori,11.178435,2.299982
4,Alibori,Banikoara,Kokey,11.183333,2.283333
5,Alibori,Banikoara,Kokiborou,11.202332,2.32093
6,Alibori,Banikoara,Ounet,11.227149,2.404817
7,Alibori,Banikoara,Sompérékou,11.322743,2.491331
8,Alibori,Banikoara,Soroko,11.327791,2.466952
9,Alibori,Banikoara,Toura,11.243611,2.383056


In [16]:
print('The table dataframe has {} departments {} boroughs and {} neighborhoods.'.format(
        len(geo_coord_df['Department'].unique()), len(geo_coord_df['Region'].unique()),
        geo_coord_df.shape[0])
     )

The table dataframe has 3 departments 17 boroughs and 100 neighborhoods.


In [17]:
geo_coord_df['Department'].unique()

array(['Alibori', 'Atacora', 'Atlantique'], dtype=object)

<h1 align=center><font size = 5>Segmenting and Clustering Neighborhoods of a region in the northern part of the country</font></h1>

Let's explore the region of Banikoara in the department of Alibori which is located in the north of Benin

In [18]:
banikoara_df = geo_coord_df[geo_coord_df['Region'] == "Banikoara"].reset_index(drop=True)
banikoara_df

Unnamed: 0,Department,Region,Neighborhood,Latitude,Longitude
0,Alibori,Banikoara,Banikoara,11.29397,2.439285
1,Alibori,Banikoara,Founougo,11.46599,2.539781
2,Alibori,Banikoara,Gomparou,11.328576,2.471542
3,Alibori,Banikoara,Goumori,11.178435,2.299982
4,Alibori,Banikoara,Kokey,11.183333,2.283333
5,Alibori,Banikoara,Kokiborou,11.202332,2.32093
6,Alibori,Banikoara,Ounet,11.227149,2.404817
7,Alibori,Banikoara,Sompérékou,11.322743,2.491331
8,Alibori,Banikoara,Soroko,11.327791,2.466952
9,Alibori,Banikoara,Toura,11.243611,2.383056


In [19]:
address = 'Banikoara, Benin'

geolocator = Nominatim(user_agent="ba_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Banikoara are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Banikoara are 11.32595885, 2.47303133750907.


In [20]:
# create map of Banikoara using latitude and longitude values
map_banikoara = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(banikoara_df['Latitude'], banikoara_df['Longitude'], banikoara_df['Region']
                                           , banikoara_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_banikoara)  
    
map_banikoara

#### Define Foursquare Credentials and Version

In [21]:
# @hidden_cell
CLIENT_ID = 'NBFU034EIQA1KZN1031VY3KNP0ZWRDPTZTER3SJDAEAAN1GZ' # your Foursquare ID
CLIENT_SECRET = 'L402VNMJWLJHRI1DZBGORHSCORX4PRN3E3O0YXAMJUPSFSQZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [22]:
#Get the neighborhood's name.

banikoara_df.loc[0, 'Neighborhood']

'Banikoara'

In [23]:
#Get the neighborhood's latitude and longitude values.

neighborhood_latitude = banikoara_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = banikoara_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = banikoara_df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Banikoara are 11.29397, 2.439285.


#### Now, let's get the top 100 venues that are in Banikoara within a radius of 50000 meters.

The choice of such big radius has been conditioned by the scarcity of data returned from Foursquare API. In fact, after many trials with small meters of radius, the radius of 50000 has given some consistent data with some inaccuracy due to interference with other regions. However we have approached this in our analysis

In [24]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 50000 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=NBFU034EIQA1KZN1031VY3KNP0ZWRDPTZTER3SJDAEAAN1GZ&client_secret=L402VNMJWLJHRI1DZBGORHSCORX4PRN3E3O0YXAMJUPSFSQZ&v=20180605&ll=11.29397,2.439285&radius=50000&limit=100'

In [25]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c9e00044c1f672903828ee1'},
 'response': {'headerLocation': 'Current map view',
  'headerFullLocation': 'Current map view',
  'headerLocationGranularity': 'unknown',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 11.74397045000045,
    'lng': 2.897314870077757},
   'sw': {'lat': 10.84396954999955, 'lng': 1.981255129922243}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5c20cb71bfc6d0002c78907d',
       'name': 'Marché Central De Banikoara',
       'location': {'lat': 11.295302,
        'lng': 2.4309196,
        'labeledLatLngs': [{'label': 'display',
          'lat': 11.295302,
          'lng': 2.4309196}],
        'distance': 925,
        'cc': 'BJ',
        'city': 'Banikoara',
        'state': 'Alibori',
        'count

In [26]:
# function that extracts the category of the venue

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [27]:
# Here we select the appropriate venue with regard to our region from the result of our querry 

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Marché Central De Banikoara,Market,11.295302,2.43092
1,Mercearia Costa e Silva,Food & Drink Shop,11.048706,2.393841
2,Бирина,Food & Drink Shop,11.616608,2.456364
3,Dikmen Caddesi,Plaza,11.362971,2.082923


In [28]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


In [29]:
# Function that returns only relevant information about the nearbyvenues of our chosen neighborhood
def getNearbyVenues(names, latitudes, longitudes, radius=50000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
banikoara_venues = getNearbyVenues(names=banikoara_df['Neighborhood'],
                                   latitudes=banikoara_df['Latitude'],
                                   longitudes=banikoara_df['Longitude']
                                  )

Banikoara
Founougo
Gomparou
Goumori
Kokey
Kokiborou
Ounet
Sompérékou
Soroko
Toura


In [31]:
print(banikoara_venues.shape)
banikoara_venues.head()

(52, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Banikoara,11.29397,2.439285,Marché Central De Banikoara,11.295302,2.43092,Market
1,Banikoara,11.29397,2.439285,Mercearia Costa e Silva,11.048706,2.393841,Food & Drink Shop
2,Banikoara,11.29397,2.439285,Бирина,11.616608,2.456364,Food & Drink Shop
3,Banikoara,11.29397,2.439285,Dikmen Caddesi,11.362971,2.082923,Plaza
4,Founougo,11.46599,2.539781,Бирина,11.616608,2.456364,Food & Drink Shop


In [32]:
#Let's check how many venues were returned for each neighborhood

banikoara_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Banikoara,4,4,4,4,4,4
Founougo,6,6,6,6,6,6
Gomparou,5,5,5,5,5,5
Goumori,3,3,3,3,3,3
Kokey,6,6,6,6,6,6
Kokiborou,7,7,7,7,7,7
Ounet,6,6,6,6,6,6
Sompérékou,4,4,4,4,4,4
Soroko,5,5,5,5,5,5
Toura,6,6,6,6,6,6


In [33]:
print('There are {} uniques categories.'.format(len(banikoara_venues['Venue Category'].unique())))

There are 9 uniques categories.


## Analyze Each Neighborhood

In [34]:
# one hot encoding
banikoara_onehot = pd.get_dummies(banikoara_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
banikoara_onehot['Neighborhood'] = banikoara_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [banikoara_onehot.columns[-1]] + list(banikoara_onehot.columns[:-1])
banikoara_onehot = banikoara_onehot[fixed_columns]

banikoara_onehot.head()

Unnamed: 0,Neighborhood,Botanical Garden,Breakfast Spot,Bus Stop,Café,Food & Drink Shop,Market,Plaza,Seafood Restaurant,Steakhouse
0,Banikoara,0,0,0,0,0,1,0,0,0
1,Banikoara,0,0,0,0,1,0,0,0,0
2,Banikoara,0,0,0,0,1,0,0,0,0
3,Banikoara,0,0,0,0,0,0,1,0,0
4,Founougo,0,0,0,0,1,0,0,0,0


In [35]:
banikoara_onehot.shape

(52, 10)

In [36]:
#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category# 

banikoara_grouped = banikoara_onehot.groupby('Neighborhood').mean().reset_index()
banikoara_grouped

Unnamed: 0,Neighborhood,Botanical Garden,Breakfast Spot,Bus Stop,Café,Food & Drink Shop,Market,Plaza,Seafood Restaurant,Steakhouse
0,Banikoara,0.0,0.0,0.0,0.0,0.5,0.25,0.25,0.0,0.0
1,Founougo,0.166667,0.0,0.166667,0.0,0.333333,0.166667,0.0,0.166667,0.0
2,Gomparou,0.0,0.0,0.2,0.0,0.4,0.2,0.2,0.0,0.0
3,Goumori,0.0,0.333333,0.0,0.0,0.0,0.0,0.333333,0.0,0.333333
4,Kokey,0.0,0.166667,0.0,0.166667,0.166667,0.166667,0.166667,0.0,0.166667
5,Kokiborou,0.0,0.142857,0.0,0.142857,0.285714,0.142857,0.142857,0.0,0.142857
6,Ounet,0.0,0.166667,0.0,0.166667,0.333333,0.166667,0.166667,0.0,0.0
7,Sompérékou,0.0,0.0,0.0,0.0,0.5,0.25,0.25,0.0,0.0
8,Soroko,0.0,0.0,0.2,0.0,0.4,0.2,0.2,0.0,0.0
9,Toura,0.0,0.166667,0.0,0.0,0.333333,0.166667,0.166667,0.0,0.166667


In [37]:
banikoara_grouped.shape

(10, 10)

In [38]:
##### Let's print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in banikoara_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = banikoara_grouped[banikoara_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Banikoara----
               venue  freq
0  Food & Drink Shop  0.50
1             Market  0.25
2              Plaza  0.25
3   Botanical Garden  0.00
4     Breakfast Spot  0.00


----Founougo----
                venue  freq
0   Food & Drink Shop  0.33
1    Botanical Garden  0.17
2            Bus Stop  0.17
3              Market  0.17
4  Seafood Restaurant  0.17


----Gomparou----
               venue  freq
0  Food & Drink Shop   0.4
1           Bus Stop   0.2
2             Market   0.2
3              Plaza   0.2
4   Botanical Garden   0.0


----Goumori----
              venue  freq
0    Breakfast Spot  0.33
1             Plaza  0.33
2        Steakhouse  0.33
3  Botanical Garden  0.00
4          Bus Stop  0.00


----Kokey----
               venue  freq
0     Breakfast Spot  0.17
1               Café  0.17
2  Food & Drink Shop  0.17
3             Market  0.17
4              Plaza  0.17


----Kokiborou----
               venue  freq
0  Food & Drink Shop  0.29
1     Breakfast Spot  0.14

In [39]:
# A function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [40]:
#Now let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 9

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = banikoara_grouped['Neighborhood']

for ind in np.arange(banikoara_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(banikoara_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
0,Banikoara,Food & Drink Shop,Plaza,Market,Steakhouse,Seafood Restaurant,Café,Bus Stop,Breakfast Spot,Botanical Garden
1,Founougo,Food & Drink Shop,Seafood Restaurant,Market,Bus Stop,Botanical Garden,Steakhouse,Plaza,Café,Breakfast Spot
2,Gomparou,Food & Drink Shop,Plaza,Market,Bus Stop,Steakhouse,Seafood Restaurant,Café,Breakfast Spot,Botanical Garden
3,Goumori,Steakhouse,Plaza,Breakfast Spot,Seafood Restaurant,Market,Food & Drink Shop,Café,Bus Stop,Botanical Garden
4,Kokey,Steakhouse,Plaza,Market,Food & Drink Shop,Café,Breakfast Spot,Seafood Restaurant,Bus Stop,Botanical Garden


## Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 2 clusters.

In [41]:
# set number of clusters
kclusters = 2

banikoara_grouped_clustering = banikoara_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(banikoara_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 0, 0, 0, 0, 1, 1, 0], dtype=int32)

In [42]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

banikoara_merged = banikoara_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
banikoara_merged = banikoara_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

banikoara_merged.head() # check the last columns!

Unnamed: 0,Department,Region,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
0,Alibori,Banikoara,Banikoara,11.29397,2.439285,1,Food & Drink Shop,Plaza,Market,Steakhouse,Seafood Restaurant,Café,Bus Stop,Breakfast Spot,Botanical Garden
1,Alibori,Banikoara,Founougo,11.46599,2.539781,1,Food & Drink Shop,Seafood Restaurant,Market,Bus Stop,Botanical Garden,Steakhouse,Plaza,Café,Breakfast Spot
2,Alibori,Banikoara,Gomparou,11.328576,2.471542,1,Food & Drink Shop,Plaza,Market,Bus Stop,Steakhouse,Seafood Restaurant,Café,Breakfast Spot,Botanical Garden
3,Alibori,Banikoara,Goumori,11.178435,2.299982,0,Steakhouse,Plaza,Breakfast Spot,Seafood Restaurant,Market,Food & Drink Shop,Café,Bus Stop,Botanical Garden
4,Alibori,Banikoara,Kokey,11.183333,2.283333,0,Steakhouse,Plaza,Market,Food & Drink Shop,Café,Breakfast Spot,Seafood Restaurant,Bus Stop,Botanical Garden


In [43]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(banikoara_merged['Latitude'], banikoara_merged['Longitude'], banikoara_merged['Neighborhood'], banikoara_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [44]:
banikoara_merged.loc[banikoara_merged['Cluster Labels'] == 0, banikoara_merged.columns[[2] + list(range(5, 9))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
3,Goumori,0,Steakhouse,Plaza,Breakfast Spot
4,Kokey,0,Steakhouse,Plaza,Market
5,Kokiborou,0,Food & Drink Shop,Steakhouse,Plaza
6,Ounet,0,Food & Drink Shop,Plaza,Market
9,Toura,0,Food & Drink Shop,Steakhouse,Plaza


In [45]:
banikoara_merged.loc[banikoara_merged['Cluster Labels'] == 1, banikoara_merged.columns[[2] + list(range(5, 9))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Banikoara,1,Food & Drink Shop,Plaza,Market
1,Founougo,1,Food & Drink Shop,Seafood Restaurant,Market
2,Gomparou,1,Food & Drink Shop,Plaza,Market
7,Sompérékou,1,Food & Drink Shop,Plaza,Market
8,Soroko,1,Food & Drink Shop,Plaza,Market


With less quality data we can infer here that in Banikoara a region situtated in the northern part of Benin that the 3
first common venue of interest are : Food & Drink Shop, Plaza and Market The result is quite understandable because this part of the country abunds of farming land as mentioned in the introduction due to the remoted character of the region. And moreover for clarification this venues are full of small business that helps families survive and covers the daily business. 

Let's now explore the region of Abomey-Calavi in the department of Atlantique at the south of the country

<h1 align=center><font size = 5>Segmenting and Clustering Neighborhoods of a region in the southern part of the country</font></h1>

In [46]:
AbomeyCalavi_df = geo_coord_df[geo_coord_df['Region'] == "Abomey-Calavi"].reset_index(drop=True)
AbomeyCalavi_df

Unnamed: 0,Department,Region,Neighborhood,Latitude,Longitude
0,Atlantique,Abomey-Calavi,Abomey-Calavi,6.45,2.35
1,Atlantique,Abomey-Calavi,Akassato,6.507371,2.365643
2,Atlantique,Abomey-Calavi,Glo-Djigbé,6.506988,2.331544
3,Atlantique,Abomey-Calavi,Godomey,6.55,2.366667
4,Atlantique,Abomey-Calavi,Hévié,6.416667,2.25
5,Atlantique,Abomey-Calavi,Kpanroun,6.509167,2.327778
6,Atlantique,Abomey-Calavi,Ouédo,6.452429,2.267099
7,Atlantique,Abomey-Calavi,Togba,6.465136,2.304864
8,Atlantique,Abomey-Calavi,Zinvié,6.616667,2.35


In [47]:
address = 'Abomey-Calavi, Benin'

geolocator = Nominatim(user_agent="ab_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Abomey-Calavi are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Abomey-Calavi are 6.4153695, 2.30622836336264.


In [48]:
# create map of New York using latitude and longitude values
map_AbomeyCalavi = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(AbomeyCalavi_df['Latitude'], AbomeyCalavi_df['Longitude'], 
                                           AbomeyCalavi_df['Region'], AbomeyCalavi_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_AbomeyCalavi)  
    
map_AbomeyCalavi

In [49]:
#Get the neighborhood's name.
AbomeyCalavi_df.loc[0, 'Neighborhood']

'Abomey-Calavi'

In [50]:
neighborhood_latitude = AbomeyCalavi_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = AbomeyCalavi_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = AbomeyCalavi_df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Abomey-Calavi are 6.45, 2.35.


#### Now, let's get the top 100 venues that are in Abomey-Calavi within a radius of 50000 meters.

In [51]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
#2000
radius = 50000 # define radius

url2 = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url2

'https://api.foursquare.com/v2/venues/explore?&client_id=NBFU034EIQA1KZN1031VY3KNP0ZWRDPTZTER3SJDAEAAN1GZ&client_secret=L402VNMJWLJHRI1DZBGORHSCORX4PRN3E3O0YXAMJUPSFSQZ&v=20180605&ll=6.45,2.35&radius=50000&limit=100'

In [52]:
results2 = requests.get(url2).json()
results2

{'meta': {'code': 200, 'requestId': '5c9e00086a607153469ad335'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Current map view',
  'headerFullLocation': 'Current map view',
  'headerLocationGranularity': 'unknown',
  'totalResults': 37,
  'suggestedBounds': {'ne': {'lat': 6.900000450000451,
    'lng': 2.802021423138588},
   'sw': {'lat': 5.99999954999955, 'lng': 1.8979785768614121}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d984cacc19fb60c4e40ab65',
       'name': 'Eldorado Beach Club',
       'location': {'address': 'Rue 1764',
        'lat': 6.3573960448784,
        'lng': 2.4636912230545502,
        'labeledLatLngs': [{'label': 'display',
          'lat': 6.3573960448

In [53]:
AbomeyCalavi_venues = getNearbyVenues(names=AbomeyCalavi_df['Neighborhood'],
                                   latitudes=AbomeyCalavi_df['Latitude'],
                                   longitudes=AbomeyCalavi_df['Longitude']
                                  )

Abomey-Calavi
Akassato
Glo-Djigbé
Godomey
Hévié
Kpanroun
Ouédo
Togba
Zinvié


In [54]:
# Here we select the appropriate venue with regard to our region from the result of our querry 

venues = results2['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Eldorado Beach Club,Beach,6.357396,2.463691
1,Code Bar,Lounge,6.351882,2.386449
2,La Roche,Hardware Store,6.362236,2.446222
3,L'Impala,Restaurant,6.353055,2.419738
4,Bangkok Terrasse,Thai Restaurant,6.353996,2.417403
5,Mojito,Music Venue,6.354585,2.436484
6,Ci Gusta,Ice Cream Shop,6.355329,2.417331
7,La Galette à Sucre,Bakery,6.353679,2.397257
8,Erevan,Shopping Mall,6.349273,2.386542
9,Festival Des Glaces,Dessert Shop,6.365025,2.431271


In [55]:
print(AbomeyCalavi_venues.shape)
AbomeyCalavi_venues.head()

(332, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Abomey-Calavi,6.45,2.35,Eldorado Beach Club,6.357396,2.463691,Beach
1,Abomey-Calavi,6.45,2.35,Code Bar,6.351882,2.386449,Lounge
2,Abomey-Calavi,6.45,2.35,La Roche,6.362236,2.446222,Hardware Store
3,Abomey-Calavi,6.45,2.35,L'Impala,6.353055,2.419738,Restaurant
4,Abomey-Calavi,6.45,2.35,Bangkok Terrasse,6.353996,2.417403,Thai Restaurant


In [56]:
AbomeyCalavi_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abomey-Calavi,37,37,37,37,37,37
Akassato,37,37,37,37,37,37
Glo-Djigbé,37,37,37,37,37,37
Godomey,37,37,37,37,37,37
Hévié,36,36,36,36,36,36
Kpanroun,37,37,37,37,37,37
Ouédo,37,37,37,37,37,37
Togba,37,37,37,37,37,37
Zinvié,37,37,37,37,37,37


In [57]:
print('There are {} uniques categories.'.format(len(AbomeyCalavi_venues['Venue Category'].unique())))

There are 28 uniques categories.


## Analyze Each Neighborhood

In [58]:
# one hot encoding
AbomeyCalavi_onehot = pd.get_dummies(AbomeyCalavi_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
AbomeyCalavi_onehot['Neighborhood'] = AbomeyCalavi_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [AbomeyCalavi_onehot.columns[-1]] + list(AbomeyCalavi_onehot.columns[:-1])
AbomeyCalavi_onehot = AbomeyCalavi_onehot[fixed_columns]

AbomeyCalavi_onehot.sample(frac= 0.7).head(10)

Unnamed: 0,Neighborhood,African Restaurant,Airport Lounge,Bakery,Beach,Dessert Shop,Fast Food Restaurant,Food,Food Court,French Restaurant,Hardware Store,Hotel,Hotel Bar,Ice Cream Shop,Japanese Restaurant,Lounge,Middle Eastern Restaurant,Music Venue,Pier,Pizza Place,Plaza,Resort,Restaurant,Sandwich Place,Shopping Mall,Soccer Stadium,Tapas Restaurant,Thai Restaurant,Whisky Bar
107,Glo-Djigbé,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
89,Glo-Djigbé,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
233,Ouédo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
251,Ouédo,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
173,Hévié,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
148,Hévié,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
223,Ouédo,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
50,Akassato,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
104,Glo-Djigbé,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
80,Glo-Djigbé,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [59]:
AbomeyCalavi_onehot.shape

(332, 29)

In [60]:
AbomeyCalavi_grouped = AbomeyCalavi_onehot.groupby('Neighborhood').mean().reset_index()
AbomeyCalavi_grouped

Unnamed: 0,Neighborhood,African Restaurant,Airport Lounge,Bakery,Beach,Dessert Shop,Fast Food Restaurant,Food,Food Court,French Restaurant,Hardware Store,Hotel,Hotel Bar,Ice Cream Shop,Japanese Restaurant,Lounge,Middle Eastern Restaurant,Music Venue,Pier,Pizza Place,Plaza,Resort,Restaurant,Sandwich Place,Shopping Mall,Soccer Stadium,Tapas Restaurant,Thai Restaurant,Whisky Bar
0,Abomey-Calavi,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027
1,Akassato,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027
2,Glo-Djigbé,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027
3,Godomey,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027
4,Hévié,0.027778,0.027778,0.055556,0.027778,0.027778,0.027778,0.027778,0.027778,0.027778,0.027778,0.055556,0.027778,0.027778,0.027778,0.027778,0.055556,0.027778,0.027778,0.027778,0.055556,0.083333,0.027778,0.027778,0.083333,0.027778,0.027778,0.027778,0.027778
5,Kpanroun,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027
6,Ouédo,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027
7,Togba,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027
8,Zinvié,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027,0.054054,0.027027,0.027027,0.027027,0.054054,0.081081,0.027027,0.027027,0.081081,0.027027,0.027027,0.027027,0.027027


In [61]:
AbomeyCalavi_grouped.shape

(9, 29)

In [62]:
num_top_venues = 5

for hood in AbomeyCalavi_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = AbomeyCalavi_grouped[AbomeyCalavi_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Abomey-Calavi----
                       venue  freq
0              Shopping Mall  0.08
1                     Resort  0.08
2                      Hotel  0.08
3  Middle Eastern Restaurant  0.05
4                     Bakery  0.05


----Akassato----
                       venue  freq
0              Shopping Mall  0.08
1                     Resort  0.08
2                      Hotel  0.08
3  Middle Eastern Restaurant  0.05
4                     Bakery  0.05


----Glo-Djigbé----
                       venue  freq
0              Shopping Mall  0.08
1                     Resort  0.08
2                      Hotel  0.08
3  Middle Eastern Restaurant  0.05
4                     Bakery  0.05


----Godomey----
                       venue  freq
0              Shopping Mall  0.08
1                     Resort  0.08
2                      Hotel  0.08
3  Middle Eastern Restaurant  0.05
4                     Bakery  0.05


----Hévié----
                       venue  freq
0              Shopping Mall 

In [63]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = AbomeyCalavi_grouped['Neighborhood']

for ind in np.arange(AbomeyCalavi_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(AbomeyCalavi_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Abomey-Calavi,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
1,Akassato,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
2,Glo-Djigbé,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
3,Godomey,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
4,Hévié,Shopping Mall,Resort,Bakery,Plaza,Middle Eastern Restaurant,Hotel,Whisky Bar,Hotel Bar,Airport Lounge,Beach


## Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 2 clusters.

In [64]:
# set number of clusters
kclusters = 2

AbomeyCalavi_grouped_clustering = AbomeyCalavi_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(AbomeyCalavi_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int32)

In [65]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

AbomeyCalavi_merged = AbomeyCalavi_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
AbomeyCalavi_merged = AbomeyCalavi_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

AbomeyCalavi_merged.head() # check the last columns!

Unnamed: 0,Department,Region,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Atlantique,Abomey-Calavi,Abomey-Calavi,6.45,2.35,0,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
1,Atlantique,Abomey-Calavi,Akassato,6.507371,2.365643,0,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
2,Atlantique,Abomey-Calavi,Glo-Djigbé,6.506988,2.331544,0,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
3,Atlantique,Abomey-Calavi,Godomey,6.55,2.366667,0,Shopping Mall,Resort,Hotel,Bakery,Plaza,Middle Eastern Restaurant,Whisky Bar,Hotel Bar,Airport Lounge,Beach
4,Atlantique,Abomey-Calavi,Hévié,6.416667,2.25,1,Shopping Mall,Resort,Bakery,Plaza,Middle Eastern Restaurant,Hotel,Whisky Bar,Hotel Bar,Airport Lounge,Beach


In [66]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(AbomeyCalavi_merged['Latitude'], AbomeyCalavi_merged['Longitude'], AbomeyCalavi_merged['Neighborhood'], AbomeyCalavi_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [67]:
AbomeyCalavi_merged.loc[AbomeyCalavi_merged['Cluster Labels'] == 0, AbomeyCalavi_merged.columns[[2] + list(range(5, 9))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Abomey-Calavi,0,Shopping Mall,Resort,Hotel
1,Akassato,0,Shopping Mall,Resort,Hotel
2,Glo-Djigbé,0,Shopping Mall,Resort,Hotel
3,Godomey,0,Shopping Mall,Resort,Hotel
5,Kpanroun,0,Shopping Mall,Resort,Hotel
6,Ouédo,0,Shopping Mall,Resort,Hotel
7,Togba,0,Shopping Mall,Resort,Hotel
8,Zinvié,0,Shopping Mall,Resort,Hotel


In [68]:
AbomeyCalavi_merged.loc[AbomeyCalavi_merged['Cluster Labels'] == 1, AbomeyCalavi_merged.columns[[2] + list(range(5, 9))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
4,Hévié,1,Shopping Mall,Resort,Bakery


We could notice here from the clustering of neighborhood in the southern part we have more of shopping mall, resort and hotels venue.
Which is understandable since this part of the country is in consistent development and very frequented by young people and foreigners. 