# Capstone: Grouping and Analyzing Cities in Africa

## Importing Dependencies 

In [155]:
from bs4 import BeautifulSoup #library for parsing html files (website source code) 
import requests as rq #library for making url requests (talking to foursquare API) 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from pandas.compat import StringIO as io #library for creating pandas dataframe from string 
from pandas.io.json import json_normalize #library for creating pandas dataframe from json file
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm #
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium #map visualization library 
from geopy.geocoders import Nominatim #library for converting an address into latitude and longitude values
import numpy as np
import os #used for setting and changing working directory 
import dill #used for saving and loading variables 
import json

## Retrieving City Data 

In [156]:
#Retrieving website content

afrimain = 'http://www.hostels247.com/hostel_247blog_Countries__in__Africa/' 
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
afrimain_store = rq.get(afrimain, headers=header) #header prevents 403 Forbidden error  
afrimain_soup = BeautifulSoup(afrimain_store.text, 'html.parser')

#Sample
print(afrimain_soup.prettify()[0:1000],end='...')

<html>
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="List of the Countries in Africa, the Capital cities and major cities in Africa, Travel to Africa, Africa Hostels and Budget Hotels" name="keywords"/>
  <meta content="Hostels247.com List of Countries in Africa, the Capital cities and major cities in Africa from North, East, South and West Africa. Travel to Africa." name="description"/>
  <meta content="index,follow,all" name="robots"/>
  <meta content="index,follow" name="GOOGLEBOT"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="GENERAL" name="RATING"/>
  <meta content="2 DAYS" name="REVISIT-AFTER"/>
  <meta content="DOCUMENT" name="RESOURCE-TYPE"/>
  <meta content="GLOBAL" name="DISTRIBUTION"/>
  <title>
   List of the Countries in Africa, the Capital cities and major cities in Africa - Hostels247.com - Travel to Africa
  </title>
  <link .ico"="" href="/favicon.ico" rel="icon"/>
  <link hre

In [157]:
#Extracting website content between <td></td> tags

afrimain_soup_1 = afrimain_soup.find('td', class_='search_txt')

#Sample
print(afrimain_soup_1.prettify()[0:1000], end='...')

<td align="left" class="search_txt" valign="top" width="55%">
 <strong>
  Countries in Africa
 </strong>
 <br/>
 <p>
  <strong>
   <font color="#00ccff">
    List of the Countries in Africa,
   </font>
   <font color="#ffcc00">
    the Capital cities
   </font>
   <font color="#0000ff">
    and major cities in Africa.
   </font>
  </strong>
  <br/>
  <br/>
  All of these African Countries and Cities are top Business, Holiday, Travel and Cultural destinations.
  <br/>
  <br/>
  <div align="center">
   <font color="#0000ff">
    <strong>
     To Book Hostels &amp; Budget Hotels click the Button below.
    </strong>
   </font>
   <a href="javascript:void(0);/*1225736966037*/">
    <font color="#0000ff">
     <br/>
    </font>
   </a>
  </div>
  <p align="center">
   <a href="http://www.hostels247.com/">
    <img align="left" alt="" height="26" src="/userfiles/image/Book%20Now%20Button.png" width="119"/>
   </a>
   Africa has many Traditions and Cultures unique to each country and city, th

In [158]:
#Removing last <a></a> tag

afrimain_soup_1.find_all('a')[-1].decompose()

In [159]:
#Removing all a tags but keeping their content 

for links in afrimain_soup_1.find_all('a'):
    links.unwrap()

In [160]:
for fonts in afrimain_soup_1.find_all('font'):
    fonts.decompose()

In [161]:
for strongs in afrimain_soup_1.find_all('strong'):
    strongs.decompose()

In [162]:
#Sample

print(afrimain_soup_1.prettify()[0:1000],end='...')

<td align="left" class="search_txt" valign="top" width="55%">
 <br/>
 <p>
  <br/>
  <br/>
  All of these African Countries and Cities are top Business, Holiday, Travel and Cultural destinations.
  <br/>
  <br/>
  <div align="center">
  </div>
  <p align="center">
   <img align="left" alt="" height="26" src="/userfiles/image/Book%20Now%20Button.png" width="119"/>
   Africa has many Traditions and Cultures unique to each country and city, the people of Africa speak Hundreds of Languages and any visitor to Africa can expect a warm welcome no matter which country or city they choose to visit.
   <br/>
   <br/>
   Africa has a lot to offer from, North, South, East and West. From the well known tourist and business destinations like, Nigeria, the Heart Of Africa, the most populous country in Africa, Egypt, South Africa, Morocco, Tunisia, Mauritius and Kenya, to the hidden treasures that be found in every African country and city from Gambia, Botswana, Chad and Ghana to Burkina Faso, Cape Ver

In [163]:
#Turning beautiful soup object into string and then a list 

afrimain_list = afrimain_soup_1.text.split('\n\r\n')

In [164]:
afrimain_list[:10]

['\xa0\n ',
 'All of these African Countries and Cities are top Business, Holiday, Travel and Cultural destinations.\n\n ',
 'Africa has many Traditions and Cultures unique to each country and city, the people of Africa speak Hundreds of Languages and any visitor to Africa can expect a warm welcome no matter which country or city they choose to visit.',
 'Africa has a lot to offer from, North, South, East and West. From the well known tourist and business destinations like, Nigeria, the Heart Of Africa, the most populous country in Africa, Egypt, South Africa, Morocco, Tunisia, Mauritius and Kenya, to the hidden treasures that be found in every African country and city from Gambia, Botswana, Chad and Ghana to Burkina Faso, Cape Verde and Cameroon.\n',
 'Capital City in Algeria: Algiers. \r\nOther major cities in Algeria, Oran, Constantine, Batna, Bab Ezzouar, Annaba',
 'Capital City in Angola: Luanda. \r\nOther major cities in Angola, Huambo, Lobito, Benguela,, Kuito, Lubango',
 'Capit

In [175]:
#Delete every list element that does not have the words 'Capital City' in it

for x, text in enumerate(afrimain_list):
    if 'Capital City' not in text:
        del(afrimain_list[x])
    else:
        pass

In [176]:
afrimain_list

['Capital City in Algeria: Algiers. \r\nOther major cities in Algeria, Oran, Constantine, Batna, Bab Ezzouar, Annaba',
 'Capital City in Angola: Luanda. \r\nOther major cities in Angola, Huambo, Lobito, Benguela,, Kuito, Lubango',
 'Capital City in Benin: Porto-Novo. \r\nOther major cities in Benin, Cotonou, Parakou, Djougou, Bohicon, Kandi',
 'Capital City in Botswana: Gaborone\r\nOther major cities in Botswana, Francistown, Molepolole, Selebi-Phikwe, Maun',
 'Capital City in Burkina Faso: Ouagadougou. \r\nOther major cities in Burkina Faso, Bobo Dioulasso, Koudougou, Ouahigouya',
 'Capital City in Burundi: Bujumbura. \r\nOther major cities in Burundi, Muyinga, Ruyigi, Gitega, Ngozi, Rutana, Bururi',
 'Capital City in Cameroon: Yaounde. \r\nOther major cities in Cameroon, Douala, Garoua, Kousseri, Bamenda, Maroua',
 'Capital City Cape Verde: Praia. \r\nOther major cities in Cape Verde, Mindelo, Santa Maria, Santa Cruz, Sao Filipe, Assomada',
 'Capital City in Central African Republic:

In [177]:
#Further splitting list into sublists to separate unwanted text 

new_list = []

for x, cities in enumerate(afrimain_list):
    if ':' in cities:     
        new_list.append(afrimain_list[x].split(':'))
    else:
        new_list.append(afrimain_list[x].split('  '))

In [178]:
new_list[:10]

[['Capital City in Algeria',
  ' Algiers. \r\nOther major cities in Algeria, Oran, Constantine, Batna, Bab Ezzouar, Annaba'],
 ['Capital City in Angola',
  ' Luanda. \r\nOther major cities in Angola, Huambo, Lobito, Benguela,, Kuito, Lubango'],
 ['Capital City in Benin',
  ' Porto-Novo. \r\nOther major cities in Benin, Cotonou, Parakou, Djougou, Bohicon, Kandi'],
 ['Capital City in Botswana',
  ' Gaborone\r\nOther major cities in Botswana, Francistown, Molepolole, Selebi-Phikwe, Maun'],
 ['Capital City in Burkina Faso',
  ' Ouagadougou. \r\nOther major cities in Burkina Faso, Bobo Dioulasso, Koudougou, Ouahigouya'],
 ['Capital City in Burundi',
  ' Bujumbura. \r\nOther major cities in Burundi, Muyinga, Ruyigi, Gitega, Ngozi, Rutana, Bururi'],
 ['Capital City in Cameroon',
  ' Yaounde. \r\nOther major cities in Cameroon, Douala, Garoua, Kousseri, Bamenda, Maroua'],
 ['Capital City Cape Verde',
  ' Praia. \r\nOther major cities in Cape Verde, Mindelo, Santa Maria, Santa Cruz, Sao Filipe,

In [179]:
#Removing repeated phrases 

phrase_1 = 'Capital City in '
for x in range(len(new_list)):
    if phrase_1 in new_list[x][0]: 
        new_list[x][0] = new_list[x][0].replace(phrase_1,'')
    else:
        new_list[x][0] = new_list[x][0].replace('Capital City ','')

In [180]:
phrase_2 = '\r\nOther major cities in'
for x in range(len(new_list)):
    if new_list[x][0]== 'Cote D’Ivoire':
        new_list[x][1] = new_list[x][1].replace(phrase_2 + ' Cote D’lvoire', phrase_2 + ' Cote D’Ivoire') #Correcting spelling of Cote D'Ivoire in the string
    new_list[x][1] = new_list[x][1].replace(phrase_2 + ' ' + new_list[x][0],'')

In [181]:
new_list[:10]

[['Algeria', ' Algiers. , Oran, Constantine, Batna, Bab Ezzouar, Annaba'],
 ['Angola', ' Luanda. , Huambo, Lobito, Benguela,, Kuito, Lubango'],
 ['Benin', ' Porto-Novo. , Cotonou, Parakou, Djougou, Bohicon, Kandi'],
 ['Botswana', ' Gaborone, Francistown, Molepolole, Selebi-Phikwe, Maun'],
 ['Burkina Faso', ' Ouagadougou. , Bobo Dioulasso, Koudougou, Ouahigouya'],
 ['Burundi', ' Bujumbura. , Muyinga, Ruyigi, Gitega, Ngozi, Rutana, Bururi'],
 ['Cameroon', ' Yaounde. , Douala, Garoua, Kousseri, Bamenda, Maroua'],
 ['Cape Verde',
  ' Praia. , Mindelo, Santa Maria, Santa Cruz, Sao Filipe, Assomada'],
 ['Central African Republic', ' Bangui. , Bambari, Berberati, Bouar, Carnot'],
 ['Chad', ' N’Djamena. , Moundou, Sarh, Abeche, Kelo, Koumra, Pala']]

In [182]:
#Separating countries from cities 

afristring_list = []

for d in range(len(new_list)):
    afristring_list.extend([new_list[d][0], new_list[d][1]]) 

In [183]:
afristring_list[:10]

['Algeria',
 ' Algiers. , Oran, Constantine, Batna, Bab Ezzouar, Annaba',
 'Angola',
 ' Luanda. , Huambo, Lobito, Benguela,, Kuito, Lubango',
 'Benin',
 ' Porto-Novo. , Cotonou, Parakou, Djougou, Bohicon, Kandi',
 'Botswana',
 ' Gaborone, Francistown, Molepolole, Selebi-Phikwe, Maun',
 'Burkina Faso',
 ' Ouagadougou. , Bobo Dioulasso, Koudougou, Ouahigouya']

In [184]:
#Converting list elements back into a string 

afristring = '\n\n'.join(afristring_list)

In [185]:
afristring

'Algeria\n\n Algiers. , Oran, Constantine, Batna, Bab Ezzouar, Annaba\n\nAngola\n\n Luanda. , Huambo, Lobito, Benguela,, Kuito, Lubango\n\nBenin\n\n Porto-Novo. , Cotonou, Parakou, Djougou, Bohicon, Kandi\n\nBotswana\n\n Gaborone, Francistown, Molepolole, Selebi-Phikwe, Maun\n\nBurkina Faso\n\n Ouagadougou. , Bobo Dioulasso, Koudougou, Ouahigouya\n\nBurundi\n\n Bujumbura. , Muyinga, Ruyigi, Gitega, Ngozi, Rutana, Bururi\n\nCameroon\n\n Yaounde. , Douala, Garoua, Kousseri, Bamenda, Maroua\n\nCape Verde\n\n Praia. , Mindelo, Santa Maria, Santa Cruz, Sao Filipe, Assomada\n\nCentral African Republic\n\n Bangui. , Bambari, Berberati, Bouar, Carnot\n\nChad\n\n N’Djamena. , Moundou, Sarh, Abeche, Kelo, Koumra, Pala\n\nComoros\n\n Moroni.\n\nCongo, Dem Rep of\n\n Kinshasa. , Lubumbashi, Mbuji-Mayi, Kolwezi, Kananga\n\nCongo, Rep of\n\n Brazzaville. , Pointe-Noire\n\nCote D’Ivoire\n\n Yamoussoukro. , Abidjan, Bouake, Daloa, Korhogo, San-Pedro\n\nDjibouti\n\n Djibouti. , Ali Sabieh, Tadjoura, Ob

In [186]:
#Conforming string into a csv file 

afristring0 = afristring.replace('.','')
afristring1 = afristring0.replace(',','\n,')
afristring2 = afristring1.replace('\n\n ',',')
afristring3 = afristring2.replace('\n\n','\n')
afristring4 = afristring3.replace(' \n','\n')
afristring5 = afristring4.replace(', ',',')

afristring5 = afristring5[:-1]
afristring5 = afristring5.replace('Congo\n,Rep of','Republic of the Congo') 
afristring5 = afristring5.replace('Congo\n,Dem Rep of','Democratic Republic of the Congo')
afristring5

'Algeria,Algiers\n,Oran\n,Constantine\n,Batna\n,Bab Ezzouar\n,Annaba\nAngola,Luanda\n,Huambo\n,Lobito\n,Benguela\n,\n,Kuito\n,Lubango\nBenin,Porto-Novo\n,Cotonou\n,Parakou\n,Djougou\n,Bohicon\n,Kandi\nBotswana,Gaborone\n,Francistown\n,Molepolole\n,Selebi-Phikwe\n,Maun\nBurkina Faso,Ouagadougou\n,Bobo Dioulasso\n,Koudougou\n,Ouahigouya\nBurundi,Bujumbura\n,Muyinga\n,Ruyigi\n,Gitega\n,Ngozi\n,Rutana\n,Bururi\nCameroon,Yaounde\n,Douala\n,Garoua\n,Kousseri\n,Bamenda\n,Maroua\nCape Verde,Praia\n,Mindelo\n,Santa Maria\n,Santa Cruz\n,Sao Filipe\n,Assomada\nCentral African Republic,Bangui\n,Bambari\n,Berberati\n,Bouar\n,Carnot\nChad,N’Djamena\n,Moundou\n,Sarh\n,Abeche\n,Kelo\n,Koumra\n,Pala\nComoros,Moroni\nDemocratic Republic of the Congo,Kinshasa\n,Lubumbashi\n,Mbuji-Mayi\n,Kolwezi\n,Kananga\nRepublic of the Congo,Brazzaville\n,Pointe-Noire\nCote D’Ivoire,Yamoussoukro\n,Abidjan\n,Bouake\n,Daloa\n,Korhogo\n,San-Pedro\nDjibouti,Djibouti\n,Ali Sabieh\n,Tadjoura\n,Obock\n,Dikhil\n,Arta\n,Holhol\

In [187]:
#Turning string into a dataframe 

afrimain_df1 = pd.read_csv(io(afristring5), names=['Country','City'])

In [188]:
afrimain_df1.head(15)

Unnamed: 0,Country,City
0,Algeria,Algiers
1,,Oran
2,,Constantine
3,,Batna
4,,Bab Ezzouar
5,,Annaba
6,Angola,Luanda
7,,Huambo
8,,Lobito
9,,Benguela


## Retrieving City Coordinates

In [190]:
#Filtering out rows with no city 

afrimain_df2 = afrimain_df1[afrimain_df1['City'].notnull()]
afrimain_df2.head(15)

Unnamed: 0,Country,City
0,Algeria,Algiers
1,,Oran
2,,Constantine
3,,Batna
4,,Bab Ezzouar
5,,Annaba
6,Angola,Luanda
7,,Huambo
8,,Lobito
9,,Benguela


In [191]:
afrimain_df2.reset_index(drop=True, inplace=True)

In [192]:
afrimain_df2['Type']= np.nan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [193]:
#Identifying capital cities (CCs) versus major cities (MCs) and putting relevant label in 'Type' column 

for i in range(281): 
    #print(i)
    try:
     if np.isnan(afrimain_df2['Country'][i]):
        afrimain_df2['Type'][i] = 'MC'
    except:
        afrimain_df2['Type'][i] = 'CC'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.loc[key] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


In [194]:
afrimain_df2.head(20)

Unnamed: 0,Country,City,Type
0,Algeria,Algiers,CC
1,,Oran,MC
2,,Constantine,MC
3,,Batna,MC
4,,Bab Ezzouar,MC
5,,Annaba,MC
6,Angola,Luanda,CC
7,,Huambo,MC
8,,Lobito,MC
9,,Benguela,MC


In [196]:
afrimain_df2.fillna(method='ffill',inplace=True)
afrimain_df2.head(15)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


Unnamed: 0,Country,City,Type
0,Algeria,Algiers,CC
1,Algeria,Oran,MC
2,Algeria,Constantine,MC
3,Algeria,Batna,MC
4,Algeria,Bab Ezzouar,MC
5,Algeria,Annaba,MC
6,Angola,Luanda,CC
7,Angola,Huambo,MC
8,Angola,Lobito,MC
9,Angola,Benguela,MC


In [245]:
afrimain_df2['Latitude']=None 
afrimain_df2['Longitude']=None

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [None]:
#Getting cooridinates for each city 

geolocator = Nominatim(user_agent="cities",timeout=5) #To avoid HTTP 403 Forbidden error 

for city, country, row in zip(afrimain_df2['City'],afrimain_df2['Country'],afrimain_df2.index): 

    location = None
    i = 0

    while(location is None and i<6): #If coordinates not obtained first time, try 5 times before moving on to the next city city 

        location = geolocator.geocode(city+', '+country)

        print(city+', '+country)
        
        i+=1

    try:

        afrimain_df2['Latitude'][row]=location.latitude

        afrimain_df2['Longitude'][row]=location.longitude

    except:
        
        print('Failed: '+city+' '+country)

        afrimain_df2['Latitude'][row] = None

        afrimain_df2['Longitude'][row] = None

print("Done")

In [None]:
#Manually obtain coordinates for Addis. Retruned geolocator coordinates were wrong. 

location1 = geolocator.geocode('Addis Ababa, Ethiopia')

afrimain_df2['Latitude'][100]=location1.latitude

afrimain_df2['Longitude'][100]=location1.longitude

In [None]:
#Drop all cities that returned no coordinates  

afrimain_df3 = afrimain_df2[afrimain_df2['Longitude'].notnull()]

In [None]:
afrimain_df3.reset_index(drop=True,inplace=True)

In [9]:
#Get current working directory
os.getcwd()

#Change working directory
os.chdir('/Users/mbongeni/Documents/Career/Data Science/Applied Data Science/4_Capstone/Final Assignment')

In [22]:
#Retrieve saved version of afrimain_df3 variable, AfricaTours_lat&long.pkl, from working directory 

with open('AfricaTours_lat&long.pkl', 'rb') as file:
    afrimain_df3 = dill.load(file)

In [197]:
afrimain_df3.head(20)

Unnamed: 0,Country,City,Type,Latitude,Longitude
0,Algeria,Algiers,CC,28.0,2.99998
1,Algeria,Oran,MC,35.7033,-0.649298
2,Algeria,Constantine,MC,36.3645,6.60826
3,Algeria,Batna,MC,35.5544,6.17675
4,Algeria,Bab Ezzouar,MC,36.722,3.18567
5,Algeria,Annaba,MC,36.8982,7.75493
6,Angola,Luanda,CC,-8.82727,13.244
7,Angola,Huambo,MC,-12.7765,15.732
8,Angola,Lobito,MC,-12.3507,13.5464
9,Angola,Benguela,MC,-12.579,13.4037


In [256]:
#Correcting Algiers Coordinates 

afrimain_df3['Latitude'][0]= 36.7
afrimain_df3['Longitude'][0]= 3.15

## Retrieving City Venue Data

In [200]:
CLIENT_ID = 'EQ0IE0NJHFCKD5SXZX5BMLBW5ATD4ZWDO5FUCRGD3QCSBBHZ' 
CLIENT_SECRET = '01KHK4CNABQSQOI2VQL5J2ATJMO5IIYIEKV3BKQMW2XAOOEJ' 
VERSION = '20190107' 

#### Test: One City

In [201]:
#Test

city_lat =  36.7
city_long = 3.1
city_name = 'Algiers'

In [202]:
Limit = 100

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    city_lat,
    city_long,
    Limit)

In [203]:
results1 = rq.get(url).json()

In [217]:
#Sample

results1['response']['groups'][0]['items'][0]

{'reasons': {'count': 0,
  'items': [{'summary': 'This spot is popular',
    'type': 'general',
    'reasonName': 'globalInteractionReason'}]},
 'venue': {'id': '4ef936d330f894d869ff6fd3',
  'name': "Jardin d'essais d'El Hamma",
  'location': {'address': 'Rue Hassiba Ben Bouali - B.P. 141, Hamma - El Anasser',
   'lat': 36.7487193385486,
   'lng': 3.0758285522460938,
   'labeledLatLngs': [{'label': 'display',
     'lat': 36.7487193385486,
     'lng': 3.0758285522460938}],
   'distance': 5836,
   'postalCode': '16000',
   'cc': 'DZ',
   'country': 'الجزائر',
   'formattedAddress': ['Rue Hassiba Ben Bouali - B.P. 141, Hamma - El Anasser',
    '16000',
    'الجزائر']},
  'categories': [{'id': '4bf58dd8d48988d163941735',
    'name': 'Park',
    'pluralName': 'Parks',
    'shortName': 'Park',
    'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
     'suffix': '.png'},
    'primary': True}],
  'photos': {'count': 0, 'groups': []}},
 'referralId': 'e-0-4ef936d

In [257]:
#Function for retrieving venue category (taken from lab)

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [259]:
#Turn filtered JSON file into a dataframe

venues = results1['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

In [260]:
#Filter dataframe columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

#Get category for each venue
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

#Clean column headings
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

In [262]:
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Jardin d'essais d'El Hamma,Park,36.748719,3.075829
1,Le Tantra,French Restaurant,36.741695,3.073533
2,Bad Buns,Burger Joint,36.738093,3.04546
3,HAVANA Said Hamdine,Diner,36.729668,3.031552
4,Piano Piano,Lounge,36.742123,3.076495


#### Retrieving Venues for All Cities 

In [None]:
#Function for repeating above steps for all cities while dropping cities that retrun fewer than 30 venues 

def getCityVenues(countries, cities, city_categories, latitudes, longitudes):
    
    LIMIT = 100
    
    venues_list=[]
    for country, city, cat, lat, lng in zip(countries, cities, city_categories, latitudes, longitudes):
        print(city)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,  
            LIMIT)
        
        # make the GET request
        feedback = rq.get(url).json()
     
        total_results = feedback['response']['totalResults'] 
        places = feedback['response']['groups'][0]['items']
        
        #Filter countries with too few venues 
        if total_results >= 30:
        
            # return only relevant information for each nearby venue
            venues_list.append([(
                    country, 
                    city,
                    cat,
                    lat, 
                    lng, 
                    v['venue']['name'], 
                    v['venue']['location']['lat'], 
                    v['venue']['location']['lng'],  
                    v['venue']['categories'][0]['name']) for v in places])
        else:

    top_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    top_venues.columns = ['Country', 
                  'City', 
                  'Type', 
                  'Latitude', 
                  'Longitude', 
                  'Venue Name', 
                  'Venue Category']
    
    return(top_venues)

In [None]:
#Apply getCityVeneus 

city_venues = getCityVenues(afrimain_df3['Country'],afrimain_df3['City'],afrimain_df3['Type'],afrimain_df3['Latitude'],afrimain_df3['Longitude'])

In [265]:
city_venues.shape

(3901, 7)

In [277]:
print('{} countries are represented in the dataframe.'.format(len(city_venues['Country'].unique())))

30 countries are represented in the dataframe.


In [278]:
print('There are {} unique venues in the data.'.format(len(city_venues['Venue Category'].unique())))

There are 267 unique venues in the data.


In [276]:
print('{} cities met the threshold of containing more than 30 venues on foursquare.com.'.format(len(city_venues['City'].unique())))

57 cities met the threshold of containing more than 30 venues on foursquare.com.


In [4]:
#Saving variable city_venues as AfricaTours_venues.pkl  

with open('AfricaTours_venues.pkl', 'wb') as file:
    dill.dump(city_venues, file)

In [224]:
#Load city_venues variable

with open('AfricaTours_venues.pkl', 'rb') as file:
    city_venues = dill.load(file)

In [225]:
#Venue data frame. Each venue has its own row. 
city_venues.head(20)

Unnamed: 0,Country,City,Type,Latitude,Longitude,Venue Name,Venue Category
0,Algeria,Algiers,CC,36.7,3.15,Casbah İstanbul,Turkish Restaurant
1,Algeria,Algiers,CC,36.7,3.15,Best Night Hotel,Hotel
2,Algeria,Algiers,CC,36.7,3.15,Salad box Algerie,Salad Place
3,Algeria,Algiers,CC,36.7,3.15,Jardin d'essais d'El Hamma,Park
4,Algeria,Algiers,CC,36.7,3.15,The Crystal Lounge,Restaurant
5,Algeria,Algiers,CC,36.7,3.15,Taste Of India,Indian Restaurant
6,Algeria,Algiers,CC,36.7,3.15,Le Tantra,French Restaurant
7,Algeria,Algiers,CC,36.7,3.15,LEONARD,Restaurant
8,Algeria,Algiers,CC,36.7,3.15,Piano Piano,Lounge
9,Algeria,Algiers,CC,36.7,3.15,Cosmopolitain,Bar


In [229]:
city_venues.shape

(3901, 7)

In [230]:
city_venues.drop(['Latitude','Longitude'],inplace=True,axis=1)

In [222]:
city_venues.head(10)

Unnamed: 0,Country,City,Type,Venue Name,Venue Category
0,Algeria,Algiers,CC,Casbah İstanbul,Turkish Restaurant
1,Algeria,Algiers,CC,Best Night Hotel,Hotel
2,Algeria,Algiers,CC,Salad box Algerie,Salad Place
3,Algeria,Algiers,CC,Jardin d'essais d'El Hamma,Park
4,Algeria,Algiers,CC,The Crystal Lounge,Restaurant
5,Algeria,Algiers,CC,Taste Of India,Indian Restaurant
6,Algeria,Algiers,CC,Le Tantra,French Restaurant
7,Algeria,Algiers,CC,LEONARD,Restaurant
8,Algeria,Algiers,CC,Piano Piano,Lounge
9,Algeria,Algiers,CC,Cosmopolitain,Bar


## Clustering The Cities

In [226]:
#One hot encoding the venues so they can be grouped and put into the machine learning algorithm
city_venues_onehot = pd.get_dummies(city_venues[['Venue Category']], prefix="", prefix_sep="")

#Add the country & city columns back to dataframe
city_venues_onehot = city_venues[['Country','City']].join( city_venues_onehot)

#Group venues by city and get the average for how often each venue appears 
city_venues_grouped = city_venues_onehot.groupby('City', sort=False).mean().reset_index()

In [227]:
city_venues_grouped.head(15)

Unnamed: 0,City,Accessories Store,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auto Dealership,BBQ Joint,Bagel Shop,Bakery,Bar,Baseball Stadium,Basketball Court,Basketball Stadium,Bathing Area,Beach,Beach Bar,Bed & Breakfast,Beer Garden,Big Box Store,Bistro,Board Shop,Boarding House,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Building,Burger Joint,Burrito Place,Bus Station,Bus Stop,Butcher,Café,Cajun / Creole Restaurant,Campground,Casino,Castle,Caucasian Restaurant,Chinese Restaurant,City Hall,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Creperie,Cricket Ground,Cultural Center,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Dive Spot,Donut Shop,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,English Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,Football Stadium,Forest,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Harbor / Marina,Hardware Store,Heliport,Herbs & Spices Store,Himalayan Restaurant,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hot Spring,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Korean Restaurant,Lake,Language School,Latin American Restaurant,Lebanese Restaurant,Library,Lighthouse,Liquor Store,Lounge,Luggage Store,Market,Massage Studio,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Mongolian Restaurant,Moroccan Restaurant,Mosque,Motel,Mountain,Movie Theater,Multiplex,Museum,Music School,Music Store,Music Venue,Nature Preserve,Neighborhood,New American Restaurant,Nightclub,Nightlife Spot,Office,Opera House,Optical Shop,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Pharmacy,Photography Studio,Piano Bar,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Pub,Public Art,Racetrack,Recreation Center,Resort,Rest Area,Restaurant,Roof Deck,Rugby Pitch,Salad Place,Sandwich Place,Scenic Lookout,Science Museum,Sculpture Garden,Seafood Restaurant,Shop & Service,Shopping Mall,Shopping Plaza,Snack Place,Soccer Field,Soccer Stadium,Social Club,Soup Place,Spa,Spanish Restaurant,Speakeasy,Sports Bar,Sports Club,Stables,Stadium,Steakhouse,Supermarket,Surf Spot,Sushi Restaurant,Swiss Restaurant,Syrian Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Tennis Stadium,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Tiki Bar,Toll Plaza,Trail,Train Station,Travel & Transport,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Volcano,Water Park,Waterfront,Whisky Bar,Wings Joint,Zoo,Zoo Exhibit
0,Algiers,0.0,0.021739,0.0,0.0,0.021739,0.021739,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.021739,0.021739,0.0,0.0,0.0,0.0,0.195652,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.086957,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Oran,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.102564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.076923,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.128205,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bab Ezzouar,0.0,0.027778,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.055556,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.055556,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Luanda,0.0,0.065574,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032787,0.0,0.016393,0.032787,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.04918,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065574,0.0,0.0,0.0,0.032787,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.081967,0.0,0.0,0.0,0.0,0.0,0.0,0.04918,0.0,0.0,0.0,0.0,0.0,0.0,0.147541,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04918,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Cotonou,0.0,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.055556,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0
5,Gaborone,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.051282,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.128205,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.179487,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Yaounde,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.142857,0.061224,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.020408,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.020408,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.102041,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.040816,0.0,0.0,0.0,0.081633,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.061224,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Douala,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.1,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Santa Maria,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046875,0.046875,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.109375,0.078125,0.0,0.015625,0.0,0.0,0.0,0.0,0.046875,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.15625,0.0,0.09375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.046875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Kinshasa,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.088235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176471,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.088235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [231]:
city_venues_grouped.shape

(57, 268)

In [232]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [233]:
#Return top 20 venues for each city

num_top_venues = 20

indicators = ['st', 'nd', 'rd']

#Create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

#Create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] =city_venues_grouped['City']

for ind in np.arange(city_venues_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_venues_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted.head(20)

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Algiers,Hotel,Restaurant,Mediterranean Restaurant,Coffee Shop,French Restaurant,Park,Cupcake Shop,Burger Joint,Salad Place,Recreation Center,Plaza,Convenience Store,Indian Restaurant,Department Store,Steakhouse,Lounge,Lighthouse,Lake,Italian Restaurant,Gym / Fitness Center
1,Oran,Hotel,Restaurant,Café,Ice Cream Shop,Boat or Ferry,Sandwich Place,Breakfast Spot,Shopping Mall,Burger Joint,Seafood Restaurant,Scenic Lookout,Indian Restaurant,Tea Room,Eastern European Restaurant,Pizza Place,Construction & Landscaping,Dessert Shop,Pedestrian Plaza,Big Box Store,Middle Eastern Restaurant
2,Bab Ezzouar,Hotel,Mediterranean Restaurant,Coffee Shop,Restaurant,Lake,Plaza,Recreation Center,Salad Place,Burger Joint,Shopping Mall,Metro Station,Park,French Restaurant,Lounge,Steakhouse,Beach,Indian Restaurant,Cupcake Shop,Airport Terminal,Turkish Restaurant
3,Luanda,Restaurant,Pizza Place,African Restaurant,Lounge,Hotel,Seafood Restaurant,Portuguese Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Mediterranean Restaurant,Beach,BBQ Joint,Coffee Shop,Convenience Store,Supermarket,Fast Food Restaurant,Dessert Shop,Office,Movie Theater
4,Cotonou,Resort,Hotel,Restaurant,Shopping Mall,Hotel Bar,Bakery,Plaza,Middle Eastern Restaurant,Beach,Lounge,Music Venue,Tapas Restaurant,French Restaurant,Thai Restaurant,Food Court,Pizza Place,Soccer Stadium,Dessert Shop,Harbor / Marina,Hardware Store
5,Gaborone,Shopping Mall,Hotel,Restaurant,Coffee Shop,Café,Burger Joint,Portuguese Restaurant,Fast Food Restaurant,Bistro,Steakhouse,Miscellaneous Shop,Cocktail Bar,Middle Eastern Restaurant,Seafood Restaurant,Brazilian Restaurant,Shop & Service,Soccer Field,Mexican Restaurant,Food,Hotel Bar
6,Yaounde,Bakery,Hotel,Plaza,Bar,Restaurant,Pizza Place,Lounge,Nightclub,Shopping Mall,Café,Food & Drink Shop,Fast Food Restaurant,Bus Station,Pub,Casino,Tennis Court,Cocktail Bar,Coffee Shop,Comedy Club,Piano Bar
7,Douala,Bakery,Lounge,Restaurant,Hotel,Shopping Mall,Nightclub,Big Box Store,Diner,Pizza Place,French Restaurant,Ice Cream Shop,African Restaurant,Department Store,Food Court,Mediterranean Restaurant,Bed & Breakfast,Pharmacy,Plaza,Pub,Café
8,Santa Maria,Resort,Hotel,Seafood Restaurant,Restaurant,Hotel Bar,Beach,Beach Bar,Surf Spot,Italian Restaurant,Spa,African Restaurant,Lounge,Portuguese Restaurant,Bistro,Steakhouse,Cocktail Bar,Botanical Garden,English Restaurant,Pub,BBQ Joint
9,Kinshasa,Hotel,Café,Restaurant,Fast Food Restaurant,Lounge,Plaza,Resort,Furniture / Home Store,Market,Steakhouse,Cocktail Bar,Grocery Store,Pool,Bar,Italian Restaurant,Diner,Pizza Place,Shopping Mall,Breakfast Spot,Airport


In [234]:
#Add country and coordinates to city_venues_sorted dataframe 

countries_df = afrimain_df3[['Country','City','Latitude','Longitude']]
city_venues_final = countries_df.merge(city_venues_sorted, on='City')

In [235]:
city_venues_final.head()

Unnamed: 0,Country,City,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Algeria,Algiers,28.0,2.99998,Hotel,Restaurant,Mediterranean Restaurant,Coffee Shop,French Restaurant,Park,Cupcake Shop,Burger Joint,Salad Place,Recreation Center,Plaza,Convenience Store,Indian Restaurant,Department Store,Steakhouse,Lounge,Lighthouse,Lake,Italian Restaurant,Gym / Fitness Center
1,Algeria,Oran,35.7033,-0.649298,Hotel,Restaurant,Café,Ice Cream Shop,Boat or Ferry,Sandwich Place,Breakfast Spot,Shopping Mall,Burger Joint,Seafood Restaurant,Scenic Lookout,Indian Restaurant,Tea Room,Eastern European Restaurant,Pizza Place,Construction & Landscaping,Dessert Shop,Pedestrian Plaza,Big Box Store,Middle Eastern Restaurant
2,Algeria,Bab Ezzouar,36.722,3.18567,Hotel,Mediterranean Restaurant,Coffee Shop,Restaurant,Lake,Plaza,Recreation Center,Salad Place,Burger Joint,Shopping Mall,Metro Station,Park,French Restaurant,Lounge,Steakhouse,Beach,Indian Restaurant,Cupcake Shop,Airport Terminal,Turkish Restaurant
3,Angola,Luanda,-8.82727,13.244,Restaurant,Pizza Place,African Restaurant,Lounge,Hotel,Seafood Restaurant,Portuguese Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Mediterranean Restaurant,Beach,BBQ Joint,Coffee Shop,Convenience Store,Supermarket,Fast Food Restaurant,Dessert Shop,Office,Movie Theater
4,Benin,Cotonou,6.3677,2.42525,Resort,Hotel,Restaurant,Shopping Mall,Hotel Bar,Bakery,Plaza,Middle Eastern Restaurant,Beach,Lounge,Music Venue,Tapas Restaurant,French Restaurant,Thai Restaurant,Food Court,Pizza Place,Soccer Stadium,Dessert Shop,Harbor / Marina,Hardware Store


In [236]:
#Run rows in city_venues_grouped through K-means clustering algorithm  

#Set number of clusters
kclusters = 10

#Filter City_venues_grouped to create City_venues_grouped_clustering (cvgc)
cvgc_df = city_venues_grouped.iloc[:,1:]

#Run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cvgc_df)

#Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([9, 0, 9, 2, 7, 6, 3, 7, 1, 3], dtype=int32)

In [237]:
#Create new dataframe with cluster labels an ranked venues  

city_venues_merged = city_venues_final.iloc[:,:2]
city_venues_merged.head()

Unnamed: 0,Country,City
0,Algeria,Algiers
1,Algeria,Oran
2,Algeria,Bab Ezzouar
3,Angola,Luanda
4,Benin,Cotonou


In [238]:
#Add additional information to city_venues_merged
city_venues_merged = city_venues_merged.merge(afrimain_df3, on='City')
city_venues_merged.head()

Unnamed: 0,Country_x,City,Country_y,Type,Latitude,Longitude
0,Algeria,Algiers,Algeria,CC,28.0,2.99998
1,Algeria,Oran,Algeria,MC,35.7033,-0.649298
2,Algeria,Bab Ezzouar,Algeria,MC,36.722,3.18567
3,Angola,Luanda,Angola,CC,-8.82727,13.244
4,Benin,Cotonou,Benin,MC,6.3677,2.42525


In [239]:
city_venues_merged.rename(columns={'Country_x':'Country'}, inplace=True)

In [240]:
city_venues_merged.drop('Country_y', axis=1,inplace=True)

In [241]:
#Add cluster labels
city_venues_merged['Cluster Labels'] = kmeans.labels_
city_venues_merged.head()

Unnamed: 0,Country,City,Type,Latitude,Longitude,Cluster Labels
0,Algeria,Algiers,CC,28.0,2.99998,9
1,Algeria,Oran,MC,35.7033,-0.649298,0
2,Algeria,Bab Ezzouar,MC,36.722,3.18567,9
3,Angola,Luanda,CC,-8.82727,13.244,2
4,Benin,Cotonou,MC,6.3677,2.42525,7


In [242]:
#Add ranked venues for each city
city_venues_merged = city_venues_merged.join(city_venues_sorted.iloc[:,1:])

In [245]:
city_venues_merged.head(10)

Unnamed: 0,Country,City,Type,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Algeria,Algiers,CC,28.0,2.99998,9,Hotel,Restaurant,Mediterranean Restaurant,Coffee Shop,French Restaurant,Park,Cupcake Shop,Burger Joint,Salad Place,Recreation Center,Plaza,Convenience Store,Indian Restaurant,Department Store,Steakhouse,Lounge,Lighthouse,Lake,Italian Restaurant,Gym / Fitness Center
1,Algeria,Oran,MC,35.7033,-0.649298,0,Hotel,Restaurant,Café,Ice Cream Shop,Boat or Ferry,Sandwich Place,Breakfast Spot,Shopping Mall,Burger Joint,Seafood Restaurant,Scenic Lookout,Indian Restaurant,Tea Room,Eastern European Restaurant,Pizza Place,Construction & Landscaping,Dessert Shop,Pedestrian Plaza,Big Box Store,Middle Eastern Restaurant
2,Algeria,Bab Ezzouar,MC,36.722,3.18567,9,Hotel,Mediterranean Restaurant,Coffee Shop,Restaurant,Lake,Plaza,Recreation Center,Salad Place,Burger Joint,Shopping Mall,Metro Station,Park,French Restaurant,Lounge,Steakhouse,Beach,Indian Restaurant,Cupcake Shop,Airport Terminal,Turkish Restaurant
3,Angola,Luanda,CC,-8.82727,13.244,2,Restaurant,Pizza Place,African Restaurant,Lounge,Hotel,Seafood Restaurant,Portuguese Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Mediterranean Restaurant,Beach,BBQ Joint,Coffee Shop,Convenience Store,Supermarket,Fast Food Restaurant,Dessert Shop,Office,Movie Theater
4,Benin,Cotonou,MC,6.3677,2.42525,7,Resort,Hotel,Restaurant,Shopping Mall,Hotel Bar,Bakery,Plaza,Middle Eastern Restaurant,Beach,Lounge,Music Venue,Tapas Restaurant,French Restaurant,Thai Restaurant,Food Court,Pizza Place,Soccer Stadium,Dessert Shop,Harbor / Marina,Hardware Store
5,Botswana,Gaborone,CC,-24.6553,25.9087,6,Shopping Mall,Hotel,Restaurant,Coffee Shop,Café,Burger Joint,Portuguese Restaurant,Fast Food Restaurant,Bistro,Steakhouse,Miscellaneous Shop,Cocktail Bar,Middle Eastern Restaurant,Seafood Restaurant,Brazilian Restaurant,Shop & Service,Soccer Field,Mexican Restaurant,Food,Hotel Bar
6,Cameroon,Yaounde,CC,3.86899,11.5213,3,Bakery,Hotel,Plaza,Bar,Restaurant,Pizza Place,Lounge,Nightclub,Shopping Mall,Café,Food & Drink Shop,Fast Food Restaurant,Bus Station,Pub,Casino,Tennis Court,Cocktail Bar,Coffee Shop,Comedy Club,Piano Bar
7,Cameroon,Douala,MC,4.05374,9.66444,7,Bakery,Lounge,Restaurant,Hotel,Shopping Mall,Nightclub,Big Box Store,Diner,Pizza Place,French Restaurant,Ice Cream Shop,African Restaurant,Department Store,Food Court,Mediterranean Restaurant,Bed & Breakfast,Pharmacy,Plaza,Pub,Café
8,Cape Verde,Santa Maria,MC,16.5987,-22.905,1,Resort,Hotel,Seafood Restaurant,Restaurant,Hotel Bar,Beach,Beach Bar,Surf Spot,Italian Restaurant,Spa,African Restaurant,Lounge,Portuguese Restaurant,Bistro,Steakhouse,Cocktail Bar,Botanical Garden,English Restaurant,Pub,BBQ Joint
9,Democratic Republic of the Congo,Kinshasa,CC,-4.32171,15.3126,3,Hotel,Café,Restaurant,Fast Food Restaurant,Lounge,Plaza,Resort,Furniture / Home Store,Market,Steakhouse,Cocktail Bar,Grocery Store,Pool,Bar,Italian Restaurant,Diner,Pizza Place,Shopping Mall,Breakfast Spot,Airport


In [36]:
#Draw interactive map to visualize clusters. You can click on map circle markers for city and cluster information. 

afri_lat = -1.7832
afri_long = 20.5085

#Create map
map_clusters = folium.Map(location=[afri_lat, afri_long], zoom_start=3, tiles='Mapbox Bright')

#Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(city_venues_merged['Latitude'], city_venues_merged['Longitude'], city_venues_merged['City'], city_venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ', Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Analyzing and Classifying Clusters

### Level 1

In [246]:
#Create a dictionary of each cluster by filtering city_venues_merged according to each cluster lable from 0 to 9 

cluster_dict={}
for i in range(10):
    cluster_dict['cluster'+str(i+1)] = city_venues_merged.loc[city_venues_merged['Cluster Labels'] == i, city_venues_merged.columns[[0,1,2]+ list(range(6, city_venues_merged.shape[1]))]]
    

In [247]:
cluster_dict['cluster1']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
1,Algeria,Oran,MC,Hotel,Restaurant,Café,Ice Cream Shop,Boat or Ferry,Sandwich Place,Breakfast Spot,Shopping Mall,Burger Joint,Seafood Restaurant,Scenic Lookout,Indian Restaurant,Tea Room,Eastern European Restaurant,Pizza Place,Construction & Landscaping,Dessert Shop,Pedestrian Plaza,Big Box Store,Middle Eastern Restaurant
12,Egypt,Cairo,CC,Historic Site,Lounge,Café,Italian Restaurant,Egyptian Restaurant,Theater,Hotel,Middle Eastern Restaurant,Coffee Shop,Pastry Shop,Seafood Restaurant,Performing Arts Venue,Hotel Bar,Kebab Restaurant,Bookstore,Dessert Shop,Falafel Restaurant,Plaza,Art Gallery,Neighborhood
13,Egypt,Alexandria,MC,Café,Seafood Restaurant,Coffee Shop,Ice Cream Shop,Sandwich Place,Middle Eastern Restaurant,Historic Site,Restaurant,Egyptian Restaurant,Syrian Restaurant,Bar,Bakery,Italian Restaurant,Lebanese Restaurant,Plaza,Lounge,Juice Bar,Pizza Place,Hotel,Gym
29,Morocco,Rabat,CC,Café,Historic Site,Restaurant,Hotel,Moroccan Restaurant,French Restaurant,Diner,Coffee Shop,Dessert Shop,Pizza Place,Art Gallery,Falafel Restaurant,Movie Theater,Italian Restaurant,Tapas Restaurant,Department Store,Burger Joint,Scenic Lookout,Plaza,Seafood Restaurant
30,Morocco,Casablanca,MC,Café,Coffee Shop,Hotel,Fast Food Restaurant,French Restaurant,Burger Joint,Pub,Tapas Restaurant,Gastropub,Restaurant,Diner,Lounge,Ice Cream Shop,Mediterranean Restaurant,Seafood Restaurant,Bar,Italian Restaurant,Moroccan Restaurant,Chinese Restaurant,Plaza
33,Morocco,Agadir,MC,Restaurant,Resort,Hotel,Café,Moroccan Restaurant,Coffee Shop,Beach,Seafood Restaurant,Nightclub,Diner,Hotel Bar,Snack Place,Italian Restaurant,Surf Spot,Supermarket,Campground,Department Store,Spanish Restaurant,Lounge,Tapas Restaurant
34,Morocco,Tangiers,MC,Café,Hotel,Moroccan Restaurant,Coffee Shop,Italian Restaurant,Resort,Diner,Restaurant,Ice Cream Shop,Middle Eastern Restaurant,Fast Food Restaurant,Scenic Lookout,Pub,Seafood Restaurant,Shopping Mall,Beach,Hostel,Flea Market,Art Gallery,Cocktail Bar
35,Mozambique,Maputo,CC,Café,Hotel,Restaurant,Plaza,Pizza Place,Fast Food Restaurant,Italian Restaurant,Fried Chicken Joint,Indian Restaurant,Seafood Restaurant,Portuguese Restaurant,Coffee Shop,Park,Bar,Bakery,Museum,African Restaurant,Art Gallery,Lounge,Karaoke Bar
45,South Africa,Cape Town,MC,Coffee Shop,Café,Indian Restaurant,Restaurant,Italian Restaurant,Bar,Burger Joint,French Restaurant,Cocktail Bar,Hostel,Museum,Hotel,Pizza Place,Gastropub,Tapas Restaurant,Breakfast Spot,Seafood Restaurant,Steakhouse,Asian Restaurant,Ice Cream Shop
46,South Africa,Durban,MC,Hotel,Café,Restaurant,Coffee Shop,Indian Restaurant,Grocery Store,Fast Food Restaurant,Seafood Restaurant,Burger Joint,Shopping Mall,Italian Restaurant,Gastropub,Breakfast Spot,Gym,Steakhouse,Beach,Bakery,Theater,Japanese Restaurant,French Restaurant


In [248]:
#Convert each cluster dataframe into a series so that its venues can be counted 

cluster_series_dict = {}

for i in range(1,11): #So that clusters can be naemd from 1 to 10 instead of 0 to 9

    #Split each city and its venues into lists within a list 
    cluster_list = cluster_dict['cluster'+str(i)].iloc[:,3:].values.tolist()

    #Join each city's venues into one list
    cluster_list_joined = cluster_list[0]

    for venues in cluster_list[1:]:
        cluster_list_joined = cluster_list_joined + venues
    
    #Convert cluster list into a series 
    cluster_series_dict['cluster'+str(i)] = pd.Series(cluster_list_joined)

In [249]:
cluster_series_dict['cluster1'].head(15)

0                           Hotel
1                      Restaurant
2                            Café
3                  Ice Cream Shop
4                   Boat or Ferry
5                  Sandwich Place
6                  Breakfast Spot
7                   Shopping Mall
8                    Burger Joint
9              Seafood Restaurant
10                 Scenic Lookout
11              Indian Restaurant
12                       Tea Room
13    Eastern European Restaurant
14                    Pizza Place
dtype: object

In [319]:
#Count the venues in each cluster series and convert count into a dataframe  

cluster_counts = {}
cluster_dataframes = {}

for series, cluster in zip(list(cluster_series_dict.values()),list(cluster_series_dict.keys())):
    cluster_counts[cluster] = series.value_counts().head(20) #Returns top 20 venues that appear the most in each cluster
    cluster_dataframes[cluster] = cluster_counts[cluster].to_frame(cluster).reset_index() 


In [310]:
cluster_counts['cluster1']

Café                         14
Hotel                        14
Coffee Shop                  13
Seafood Restaurant           12
Restaurant                   12
Italian Restaurant           10
Fast Food Restaurant          8
Pizza Place                   8
Bar                           7
Burger Joint                  7
Indian Restaurant             7
Art Gallery                   6
Ice Cream Shop                6
Lounge                        6
Historic Site                 5
French Restaurant             5
Plaza                         5
Middle Eastern Restaurant     5
Shopping Mall                 5
Moroccan Restaurant           4
dtype: int64

In [43]:
cluster_dataframes['cluster1'].head(10)

Unnamed: 0,index,cluster1
0,Café,14
1,Hotel,14
2,Coffee Shop,13
3,Seafood Restaurant,12
4,Restaurant,12
5,Italian Restaurant,10
6,Fast Food Restaurant,8
7,Pizza Place,8
8,Bar,7
9,Burger Joint,7


In [251]:
#Merge each cluster venue count dataframe into one dataframe for easy comparison 

clusters_mrgd_new = cluster_dataframes['cluster1'].merge(cluster_dataframes['cluster2'],on='index',how='outer')

for frame in list(cluster_dataframes.values())[2:]:

    clusters_mrgd_new = clusters_mrgd_new.merge(frame,on='index',how='outer')

In [252]:
clusters_mrgd_new.rename(columns={'index':'Venues'},inplace=True)

clusters_mrgd_new.fillna('NaN',inplace=True)

In [46]:
clusters_mrgd_new

Unnamed: 0,Venues,cluster1,cluster2,cluster3,cluster4,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10
0,Café,14.0,4.0,3.0,3.0,1.0,2.0,4.0,9.0,3.0,4.0
1,Hotel,14.0,5.0,5.0,3.0,1.0,2.0,5.0,12.0,,6.0
2,Coffee Shop,13.0,,3.0,1.0,1.0,,2.0,4.0,,4.0
3,Seafood Restaurant,12.0,5.0,,,1.0,,2.0,,2.0,
4,Restaurant,12.0,5.0,5.0,3.0,1.0,2.0,4.0,11.0,3.0,6.0
5,Italian Restaurant,10.0,5.0,,,1.0,,2.0,6.0,2.0,3.0
6,Fast Food Restaurant,8.0,2.0,5.0,3.0,1.0,1.0,5.0,8.0,3.0,3.0
7,Pizza Place,8.0,,4.0,3.0,1.0,,,7.0,3.0,3.0
8,Bar,7.0,3.0,4.0,3.0,,,,8.0,,
9,Burger Joint,7.0,,,,,,,,,4.0


### Level 2

We will now conduct a thorough analysis of each cluster. Paying particular attention to venues that might be of interest to tourists. Each cluster will then be classified based on the results. 

#### Cluster 1

In [253]:
#Shows all venues that appear in cluster 1 

clusters_mrgd_new[clusters_mrgd_new['cluster1']!='NaN']

Unnamed: 0,Venues,cluster1,cluster2,cluster3,cluster4,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10
0,Café,14,4.0,3.0,3.0,1.0,2.0,4.0,9.0,3.0,4.0
1,Hotel,14,5.0,5.0,3.0,1.0,2.0,5.0,12.0,,6.0
2,Coffee Shop,13,,3.0,1.0,1.0,,2.0,4.0,,4.0
3,Seafood Restaurant,12,5.0,,,1.0,,2.0,,2.0,
4,Restaurant,12,5.0,5.0,3.0,1.0,2.0,4.0,11.0,3.0,6.0
5,Italian Restaurant,10,5.0,,,1.0,,2.0,6.0,2.0,3.0
6,Fast Food Restaurant,8,2.0,5.0,3.0,1.0,1.0,5.0,8.0,3.0,3.0
7,Pizza Place,8,,4.0,3.0,1.0,,,7.0,3.0,3.0
8,Bar,7,3.0,4.0,3.0,,,,8.0,,
9,Burger Joint,7,,,,,,,,,4.0


In [254]:
print('Countries in cluster 1: ', cluster_dict['cluster1']['City'].count())

Countries in cluster 1:  14


These are all the venues for which cluster 1 has a value. Observations:

1) General offerings
    - Greatest quantity and variety of restaurants, cafes, and hotels.
   
2) Moderately unique offerings (defined as being one of three or four clusters that contain these venues):
    - Restaurants:
        - French
        - Middle Eastern
    - Burger joints
    - Ice cream shops
    - Breakfast spots 
   
3) Highly unique offerings (defined as being one of two or less clusters that contain these venues): 
    - Historic sites
    - Art Galleries 

This cluster would be a cosmopolitan food lovers dream. It also has a lot to offer for people who consider themselves to be refined. It is the only cluster with a dense frequency of art galleries and one of two with a number of historic sites. Additionally, it has old countries like Egypt. Therefore, this will be classified as the Ancient Africa Cluster.    

In [255]:
#Renaming cluster 

clusters_mrgd_new.rename(columns={'cluster1':'Ancient Africa'},inplace=True)

#### Cluster 2

In [256]:
cluster_dict['cluster2']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
8,Cape Verde,Santa Maria,MC,Resort,Hotel,Seafood Restaurant,Restaurant,Hotel Bar,Beach,Beach Bar,Surf Spot,Italian Restaurant,Spa,African Restaurant,Lounge,Portuguese Restaurant,Bistro,Steakhouse,Cocktail Bar,Botanical Garden,English Restaurant,Pub,BBQ Joint
27,Mauritius,Triolet,MC,Beach,Resort,Hotel,Seafood Restaurant,Bar,Italian Restaurant,Restaurant,Shopping Mall,Cajun / Creole Restaurant,Nightclub,Café,Miscellaneous Shop,Buffet,Mediterranean Restaurant,French Restaurant,Lounge,Department Store,Indian Restaurant,Fish & Chips Shop,Garden
28,Mauritius,Goodlands,MC,Beach,Resort,Hotel,Bar,Shopping Mall,Seafood Restaurant,Italian Restaurant,Café,Nightclub,Restaurant,Bistro,Buffet,Fast Food Restaurant,Farmers Market,Indian Restaurant,Cajun / Creole Restaurant,Farm,Movie Theater,Pub,Portuguese Restaurant
43,Seychelles,Victoria,CC,Resort,Beach,Seafood Restaurant,Hotel,Restaurant,Italian Restaurant,Airport,Bar,Indian Restaurant,Café,Spa,Cajun / Creole Restaurant,French Restaurant,Historic Site,Cocktail Bar,Airport Lounge,Sandwich Place,Market,Boat or Ferry,Farmers Market
53,Tunisia,Bizerte,MC,Beer Garden,Beach,Resort,Seafood Restaurant,Diner,Italian Restaurant,Fast Food Restaurant,Café,Hotel,Restaurant,Tea Room,Plaza,Coffee Shop,Creperie,Mediterranean Restaurant,Cupcake Shop,Steakhouse,Pizza Place,Pedestrian Plaza,Toll Plaza


In [257]:
clusters_mrgd_new[clusters_mrgd_new['cluster2']!='NaN']

Unnamed: 0,Venues,Ancient Africa,cluster2,cluster3,cluster4,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10
0,Café,14.0,4,3.0,3.0,1.0,2.0,4.0,9.0,3.0,4.0
1,Hotel,14.0,5,5.0,3.0,1.0,2.0,5.0,12.0,,6.0
3,Seafood Restaurant,12.0,5,,,1.0,,2.0,,2.0,
4,Restaurant,12.0,5,5.0,3.0,1.0,2.0,4.0,11.0,3.0,6.0
5,Italian Restaurant,10.0,5,,,1.0,,2.0,6.0,2.0,3.0
6,Fast Food Restaurant,8.0,2,5.0,3.0,1.0,1.0,5.0,8.0,3.0,3.0
8,Bar,7.0,3,4.0,3.0,,,,8.0,,
10,Indian Restaurant,7.0,3,2.0,,,,,,3.0,3.0
13,Lounge,6.0,2,2.0,3.0,,2.0,2.0,9.0,3.0,6.0
15,French Restaurant,5.0,2,,,,,,,,4.0


In [258]:
print('Countries in cluster 2: ', cluster_dict['cluster2']['City'].count())

Countries in cluster 2:  5


These are all the venues for which cluster 2 has a value. Observations:

1) General offerings: 
    - Nothing of note   
    
2) Moderately unique offerings: 
    - Restaurants:
        - Mediterranean
        - French
    - Cocktail bars
    - Bistros
    - Resorts
    
3) Highly unique offerings: 
    - Steakhouses
    - Nightclubs
    - Spas
    - Restaurants:
        - Portugese
        - Cajun/Creole
    - Beaches

This cluster has a number of unique offerings including the most beaches and resorts. Therefore, this will be classifed as the African Getaway Cluster.   

In [259]:
clusters_mrgd_new.rename(columns={'cluster2':'African Getaway'},inplace=True)

#### Cluster 3

In [260]:
cluster_dict['cluster3']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
3,Angola,Luanda,CC,Restaurant,Pizza Place,African Restaurant,Lounge,Hotel,Seafood Restaurant,Portuguese Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Mediterranean Restaurant,Beach,BBQ Joint,Coffee Shop,Convenience Store,Supermarket,Fast Food Restaurant,Dessert Shop,Office,Movie Theater
16,Ghana,Kumasi,MC,African Restaurant,Fast Food Restaurant,Hotel,Chinese Restaurant,Indian Restaurant,Bar,Hotel Bar,Nightclub,Bus Station,Shopping Mall,Pizza Place,Shop & Service,English Restaurant,Food Court,Burrito Place,Café,Restaurant,Shopping Plaza,Snack Place,Brewery
19,Kenya,Nairobi,CC,Coffee Shop,Café,African Restaurant,Bar,Hotel,Fried Chicken Joint,Fast Food Restaurant,Performing Arts Venue,Italian Restaurant,Lounge,Indian Restaurant,Pool,Garden Center,Clothing Store,Chinese Restaurant,Pub,Movie Theater,Spa,Restaurant,Food
37,Nigeria,Abuja,CC,African Restaurant,Hotel,Pizza Place,Café,Restaurant,Department Store,Movie Theater,Fried Chicken Joint,Chinese Restaurant,Convenience Store,Pharmacy,Bar,Fast Food Restaurant,Arcade,Golf Course,Grocery Store,Snack Place,Shopping Mall,Burger Joint,Hotel Bar
50,Sudan,Omdurman,MC,African Restaurant,Market,Fast Food Restaurant,Pizza Place,Hotel,Ice Cream Shop,Concert Hall,Supermarket,Juice Bar,Coffee Shop,Clothing Store,Restaurant,Bus Stop,Bus Station,Shopping Mall,Accessories Store,Burger Joint,Steakhouse,Electronics Store,Pharmacy


In [261]:
clusters_mrgd_new[clusters_mrgd_new['cluster3']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,cluster3,cluster4,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10
0,Café,14.0,4.0,3,3.0,1.0,2.0,4.0,9.0,3.0,4.0
1,Hotel,14.0,5.0,5,3.0,1.0,2.0,5.0,12.0,,6.0
2,Coffee Shop,13.0,,3,1.0,1.0,,2.0,4.0,,4.0
4,Restaurant,12.0,5.0,5,3.0,1.0,2.0,4.0,11.0,3.0,6.0
6,Fast Food Restaurant,8.0,2.0,5,3.0,1.0,1.0,5.0,8.0,3.0,3.0
7,Pizza Place,8.0,,4,3.0,1.0,,,7.0,3.0,3.0
8,Bar,7.0,3.0,4,3.0,,,,8.0,,
10,Indian Restaurant,7.0,3.0,2,,,,,,3.0,3.0
12,Ice Cream Shop,6.0,,2,,1.0,,,5.0,2.0,
13,Lounge,6.0,2.0,2,3.0,,2.0,2.0,9.0,3.0,6.0


In [262]:
print('Countries in cluster 3: ', cluster_dict['cluster3']['City'].count())

Countries in cluster 3:  5


These are all the venues for which cluster 3 has a value. Observations:

1) General offerings: 
    - Nothing of note  
2) Moderately unique offerings: 
    - Restaurants:
        - Chinese 
        - African 
    - Burger joints
3) Highly unique offerings: 
    - Bus stations
    - Fried chicken joints
    - Clothing stores
    - Movie theaters

What stands out about this cluster is its high number of African restaurants, as well as the fact that it has movie thaters and bus stations. The stations imply that it would be fairly easy to get around even if you don't have a car. And the theaters makes one think that this cluster is quite metropolitan. However, the presence of stations among the top venues also suggests that these places are not too westernized. Therefore, this cluster will be classified as the African Experience cluster. 

In [263]:
clusters_mrgd_new.rename(columns={'cluster3':'African Experience'},inplace=True)

#### Cluster 4

In [264]:
cluster_dict['cluster4']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
6,Cameroon,Yaounde,CC,Bakery,Hotel,Plaza,Bar,Restaurant,Pizza Place,Lounge,Nightclub,Shopping Mall,Café,Food & Drink Shop,Fast Food Restaurant,Bus Station,Pub,Casino,Tennis Court,Cocktail Bar,Coffee Shop,Comedy Club,Piano Bar
9,Democratic Republic of the Congo,Kinshasa,CC,Hotel,Café,Restaurant,Fast Food Restaurant,Lounge,Plaza,Resort,Furniture / Home Store,Market,Steakhouse,Cocktail Bar,Grocery Store,Pool,Bar,Italian Restaurant,Diner,Pizza Place,Shopping Mall,Breakfast Spot,Airport
10,Republic of the Congo,Brazzaville,CC,Hotel,Restaurant,Fast Food Restaurant,Café,Plaza,Airport,Lounge,Diner,Bakery,Resort,Pool,Market,Shopping Mall,Furniture / Home Store,French Restaurant,Bar,Cocktail Bar,Pizza Place,Grocery Store,Supermarket


In [265]:
clusters_mrgd_new[clusters_mrgd_new['cluster4']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,cluster4,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10
0,Café,14.0,4.0,3.0,3,1.0,2.0,4.0,9.0,3.0,4.0
1,Hotel,14.0,5.0,5.0,3,1.0,2.0,5.0,12.0,,6.0
2,Coffee Shop,13.0,,3.0,1,1.0,,2.0,4.0,,4.0
4,Restaurant,12.0,5.0,5.0,3,1.0,2.0,4.0,11.0,3.0,6.0
6,Fast Food Restaurant,8.0,2.0,5.0,3,1.0,1.0,5.0,8.0,3.0,3.0
7,Pizza Place,8.0,,4.0,3,1.0,,,7.0,3.0,3.0
8,Bar,7.0,3.0,4.0,3,,,,8.0,,
13,Lounge,6.0,2.0,2.0,3,,2.0,2.0,9.0,3.0,6.0
16,Plaza,5.0,,,3,1.0,1.0,,,,2.0
18,Shopping Mall,5.0,,3.0,3,1.0,,5.0,13.0,3.0,3.0


In [266]:
print('Countries in cluster 4: ', cluster_dict['cluster4']['City'].count())

Countries in cluster 4:  3


These are all the venues for which cluster 4 has a value. Observations:

1) General offerings: 
    - Nothing of note

2) Moderately unique offerings:
    - Nothinf of note 

3) Highly unique offerings: 
    - Nothing of note 

All of its venues are general, including its unique offerings. Therefore, this cluster will be omitted as it has nothing distinct to offer. 

In [267]:
clusters_mrgd_new.drop('cluster4', 1, inplace=True)

#### Cluster 5

In [268]:
cluster_dict['cluster5']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
22,Libya,Tripoli,CC,Café,Coffee Shop,Italian Restaurant,Bakery,Fast Food Restaurant,Hotel,Pizza Place,Shopping Mall,Seafood Restaurant,Ice Cream Shop,Restaurant,Plaza,Diner,Soccer Field,Middle Eastern Restaurant,Park,Fried Chicken Joint,Supermarket,Latin American Restaurant,Tea Room


In [269]:
clusters_mrgd_new[clusters_mrgd_new['cluster5']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,cluster5,cluster6,cluster7,cluster8,cluster9,cluster10
0,Café,14.0,4.0,3.0,1,2.0,4.0,9.0,3.0,4.0
1,Hotel,14.0,5.0,5.0,1,2.0,5.0,12.0,,6.0
2,Coffee Shop,13.0,,3.0,1,,2.0,4.0,,4.0
3,Seafood Restaurant,12.0,5.0,,1,,2.0,,2.0,
4,Restaurant,12.0,5.0,5.0,1,2.0,4.0,11.0,3.0,6.0
5,Italian Restaurant,10.0,5.0,,1,,2.0,6.0,2.0,3.0
6,Fast Food Restaurant,8.0,2.0,5.0,1,1.0,5.0,8.0,3.0,3.0
7,Pizza Place,8.0,,4.0,1,,,7.0,3.0,3.0
12,Ice Cream Shop,6.0,,2.0,1,,,5.0,2.0,
16,Plaza,5.0,,,1,1.0,,,,2.0


In [270]:
print('Countries in cluster 5: ', cluster_dict['cluster5']['City'].count())

Countries in cluster 5:  1


These are all the venues for which Cluster 5 has a value. Observations:

1) General offerings:
     - Restaurants:
         1) Seafood 
         2) Italian 
         3) Fast Food
         4) Pizza
         
2) Moderately unique offerings: 
    - Bakeries
    - Diners
    - Middle Eastern Restaurants
    - Ice cream shops
    
3) Highly unique offerings 
    - Latin American Restaurants
    - Tea rooms
    - Fried chicken joints 

The most unique thing about this cluster is that it is the only one that has Latin American restaurants. This suggests that there is some Latin American influence on the culture. The fusion of Latin American and African culture could be quite an interesting thing to experience. This cluster will be classified as the Latin Fusion African Cluster. 

In [271]:
clusters_mrgd_new.rename(columns={'cluster5':'Latin Fusion'},inplace=True)

#### Cluster 6

In [272]:
cluster_dict['cluster6']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
31,Morocco,Fez,MC,Café,Hotel,Moroccan Restaurant,Historic Site,Diner,Coffee Shop,Bed & Breakfast,Restaurant,Resort,Tea Room,Fast Food Restaurant,Lounge,Castle,Spa,Sandwich Place,Molecular Gastronomy Restaurant,Plaza,Pub,Chinese Restaurant,Multiplex
32,Morocco,Marrakech,MC,Hotel,Moroccan Restaurant,Café,Bed & Breakfast,Historic Site,Restaurant,Spa,Cocktail Bar,Lounge,Resort,Bistro,French Restaurant,Museum,Garden,Sushi Restaurant,Boutique,Burger Joint,Snack Place,Tea Room,History Museum


In [273]:
clusters_mrgd_new[clusters_mrgd_new['cluster6']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,Latin Fusion,cluster6,cluster7,cluster8,cluster9,cluster10
0,Café,14.0,4.0,3.0,1.0,2,4.0,9.0,3.0,4.0
1,Hotel,14.0,5.0,5.0,1.0,2,5.0,12.0,,6.0
4,Restaurant,12.0,5.0,5.0,1.0,2,4.0,11.0,3.0,6.0
6,Fast Food Restaurant,8.0,2.0,5.0,1.0,1,5.0,8.0,3.0,3.0
13,Lounge,6.0,2.0,2.0,,2,2.0,9.0,3.0,6.0
14,Historic Site,5.0,,,,2,,,1.0,
16,Plaza,5.0,,,1.0,1,,,,2.0
19,Moroccan Restaurant,4.0,,,,2,,,,
20,Resort,,5.0,,,2,,,1.0,
26,Bistro,,2.0,,,1,3.0,,,


In [274]:
print('Countries in cluster 6: ', cluster_dict['cluster6']['City'].count())

Countries in cluster 6:  2


These are the venues for which cluster 6 has a vlaue. Observations:

1) General offerings: 
    - Nothing of note 
    
2) Moderately unique offerings: 
    - French restaurants
    - Resorts
    - Bistros
    - Diners
    
3) Highly unique offerings: 
    - Historic sites
    - Spas
    - Tea rooms
    - Bed & Breakfasts
    - Moroccan restaurants
    - Museums
    - Gardens
    - Pubs
    - Molecular gastronomy restaurants
    - Sandwich places
    - Castles 

The most unique thing about this cluster is that it has castles, molecular gastronomy and moroccan restaurants, and museums among other things. Given that it has the most unique offerings out of all of the clusters we've examined, it will be classified as the One-of-a-kind cluster.   

In [275]:
clusters_mrgd_new.rename(columns={'cluster6':'One-of-a-kind'},inplace=True)

#### Cluster 7

In [276]:
cluster_dict['cluster7']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
5,Botswana,Gaborone,CC,Shopping Mall,Hotel,Restaurant,Coffee Shop,Café,Burger Joint,Portuguese Restaurant,Fast Food Restaurant,Bistro,Steakhouse,Miscellaneous Shop,Cocktail Bar,Middle Eastern Restaurant,Seafood Restaurant,Brazilian Restaurant,Shop & Service,Soccer Field,Mexican Restaurant,Food,Hotel Bar
21,Kenya,Kisumu,MC,Shopping Mall,Hotel,Flea Market,Beach,Bed & Breakfast,Lounge,Boat or Ferry,Soup Place,Food Court,Food,Speakeasy,Lake,Scenic Lookout,Fast Food Restaurant,Nightclub,Cocktail Bar,Coffee Shop,Diner,Bar,Department Store
36,Namibia,Windhoek,CC,Hotel,Shopping Mall,Restaurant,Steakhouse,Italian Restaurant,Portuguese Restaurant,Café,Hostel,Bed & Breakfast,Beer Garden,Bistro,Breakfast Spot,Fast Food Restaurant,German Restaurant,Museum,Chinese Restaurant,Pub,Convenience Store,Tapas Restaurant,Mediterranean Restaurant
55,Zambia,Lusaka,CC,Shopping Mall,Hotel,Restaurant,Lounge,Steakhouse,Café,Seafood Restaurant,Bistro,Italian Restaurant,Fast Food Restaurant,American Restaurant,Bar,Boarding House,Farmers Market,Middle Eastern Restaurant,Big Box Store,Bus Station,Supermarket,Movie Theater,Bed & Breakfast
56,Zimbabwe,Harare,CC,Shopping Mall,Restaurant,Hotel,Café,Convenience Store,Performing Arts Venue,Grocery Store,Golf Course,Cocktail Bar,Fast Food Restaurant,Supermarket,Food Court,Steakhouse,Chinese Restaurant,Liquor Store,Multiplex,Pizza Place,Mediterranean Restaurant,Bookstore,Clothing Store


In [277]:
clusters_mrgd_new[clusters_mrgd_new['cluster7']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,Latin Fusion,One-of-a-kind,cluster7,cluster8,cluster9,cluster10
0,Café,14.0,4.0,3.0,1.0,2.0,4,9.0,3.0,4.0
1,Hotel,14.0,5.0,5.0,1.0,2.0,5,12.0,,6.0
2,Coffee Shop,13.0,,3.0,1.0,,2,4.0,,4.0
3,Seafood Restaurant,12.0,5.0,,1.0,,2,,2.0,
4,Restaurant,12.0,5.0,5.0,1.0,2.0,4,11.0,3.0,6.0
5,Italian Restaurant,10.0,5.0,,1.0,,2,6.0,2.0,3.0
6,Fast Food Restaurant,8.0,2.0,5.0,1.0,1.0,5,8.0,3.0,3.0
13,Lounge,6.0,2.0,2.0,,2.0,2,9.0,3.0,6.0
17,Middle Eastern Restaurant,5.0,,,1.0,,2,5.0,,
18,Shopping Mall,5.0,,3.0,1.0,,5,13.0,3.0,3.0


In [278]:
print('Countries in cluster 7: ', cluster_dict['cluster7']['City'].count())

Countries in cluster 7:  5


These are venues for which cluster 7 has values. Observations:

1) General offerings:
    - Variety of palces to eat 
    
2) Moderately unique offerings: 
    - Restaurants: 
        - Chinese
        - Mediterranean 
        - Middle Eastern
        - Bistros
        - Cocktail bars
        
3) Highly unique offerings:
    - Bed & Breakfasts
    - Steakhouses
    - Portugese restaurants 

This cluster doesnt have any unique venues that set it apart. Therefore, it will be omitted. 

In [279]:
clusters_mrgd_new.drop('cluster7',1,inplace=True)

#### Cluster 8

In [280]:
cluster_dict['cluster8']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
4,Benin,Cotonou,MC,Resort,Hotel,Restaurant,Shopping Mall,Hotel Bar,Bakery,Plaza,Middle Eastern Restaurant,Beach,Lounge,Music Venue,Tapas Restaurant,French Restaurant,Thai Restaurant,Food Court,Pizza Place,Soccer Stadium,Dessert Shop,Harbor / Marina,Hardware Store
7,Cameroon,Douala,MC,Bakery,Lounge,Restaurant,Hotel,Shopping Mall,Nightclub,Big Box Store,Diner,Pizza Place,French Restaurant,Ice Cream Shop,African Restaurant,Department Store,Food Court,Mediterranean Restaurant,Bed & Breakfast,Pharmacy,Plaza,Pub,Café
11,Cote D’Ivoire,Abidjan,MC,Ice Cream Shop,African Restaurant,Fast Food Restaurant,Bakery,Italian Restaurant,Nightclub,Restaurant,Shopping Mall,Plaza,Café,Lounge,Coffee Shop,Bus Station,Boutique,Steakhouse,Soccer Field,Middle Eastern Restaurant,Supermarket,Bar,Brewery
15,Ghana,Accra,CC,Hotel,African Restaurant,Bar,Fast Food Restaurant,Nightclub,Restaurant,Indian Restaurant,Lounge,Chinese Restaurant,Pizza Place,Shopping Mall,Thai Restaurant,Grocery Store,Sculpture Garden,Bed & Breakfast,Cocktail Bar,Asian Restaurant,Furniture / Home Store,Market,Snack Place
17,Ghana,Tema,MC,Shopping Mall,Fast Food Restaurant,Convenience Store,Golf Course,Fried Chicken Joint,Restaurant,Hotel,African Restaurant,American Restaurant,Breakfast Spot,Mediterranean Restaurant,Food Truck,Food Court,Food,Café,Flea Market,Casino,Fish Market,Airport Food Court,Chinese Restaurant
18,Ghana,Teshi,MC,Hotel,African Restaurant,Fast Food Restaurant,Shopping Mall,Café,Restaurant,American Restaurant,Cocktail Bar,Beach,Bakery,Burger Joint,Chinese Restaurant,Coffee Shop,Bar,Thai Restaurant,Nightclub,Middle Eastern Restaurant,Ice Cream Shop,Lounge,Furniture / Home Store
20,Kenya,Mombasa,MC,Café,Nightclub,Resort,African Restaurant,Shopping Mall,Lounge,Coffee Shop,Pizza Place,Beach,Hotel,Department Store,Italian Restaurant,Burger Joint,Scenic Lookout,Restaurant,Hotel Bar,Airport Terminal,Fast Food Restaurant,Seafood Restaurant,Bar
38,Nigeria,Kano,MC,Shopping Mall,Convenience Store,Middle Eastern Restaurant,Department Store,BBQ Joint,Airport,Hotel,Fried Chicken Joint,Gym / Fitness Center,Mobile Phone Shop,Movie Theater,Bakery,Pizza Place,Resort,Restaurant,Café,Shop & Service,Soccer Field,Nightclub,Indian Restaurant
39,Nigeria,Port Harcourt,MC,Multiplex,Department Store,Shopping Mall,African Restaurant,Hotel,Fast Food Restaurant,Mediterranean Restaurant,Boutique,Bar,Bed & Breakfast,Sports Bar,Movie Theater,Food Court,Karaoke Bar,Clothing Store,Nightclub,Dive Bar,Thai Restaurant,Deli / Bodega,Jewelry Store
40,Nigeria,Lagos,MC,Lounge,Hotel,African Restaurant,Café,Shopping Mall,Italian Restaurant,Pizza Place,Art Gallery,Market,Japanese Restaurant,Indian Restaurant,Bar,Mediterranean Restaurant,Park,Department Store,Nightclub,Restaurant,Diner,Ice Cream Shop,Gym / Fitness Center


In [281]:
clusters_mrgd_new[clusters_mrgd_new['cluster8']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,Latin Fusion,One-of-a-kind,cluster8,cluster9,cluster10
0,Café,14.0,4.0,3.0,1.0,2.0,9,3.0,4.0
1,Hotel,14.0,5.0,5.0,1.0,2.0,12,,6.0
2,Coffee Shop,13.0,,3.0,1.0,,4,,4.0
4,Restaurant,12.0,5.0,5.0,1.0,2.0,11,3.0,6.0
5,Italian Restaurant,10.0,5.0,,1.0,,6,2.0,3.0
6,Fast Food Restaurant,8.0,2.0,5.0,1.0,1.0,8,3.0,3.0
7,Pizza Place,8.0,,4.0,1.0,,7,3.0,3.0
8,Bar,7.0,3.0,4.0,,,8,,
12,Ice Cream Shop,6.0,,2.0,1.0,,5,2.0,
13,Lounge,6.0,2.0,2.0,,2.0,9,3.0,6.0


In [282]:
print('Countries in cluster 8: ', cluster_dict['cluster8']['City'].count())

Countries in cluster 8:  13


These are the venues for which cluster 8 has a vlaue. Observations:

1) General offerings: 
    - The highest occurence of shopping malls by a significant margin
    - The highest occurence of lounges 
    - The second highest occurence of cafe and coffee shops

2) Moderately unique offerings: 
    - Ice Cream Shops
    - Restaurants:
        - Middle Eastern
        - African (most) 
    - Bakeries
    
3) Highly unique offerings:
    - Breakfast spots
    - Beaches
    - Night clubs
    - Thai restaurants 
    
This cluster has a lot to offer in terms of opportunities for shopping and night life. Not to mention things to do throughout the day since it has breakfast spots and coffe shops. What's also very unique about it is that it has the most African restaurants by a significant margin, while still having a fair amount of variety when it comes to places to eat. With a breadth of fun, luxurious, and culinary offerings, this cluster will be classified as the All-in-one cluster.   

In [283]:
clusters_mrgd_new.rename(columns={'cluster8':'All-in-one'},inplace=True)

#### Cluster 9

In [284]:
cluster_dict['cluster9']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
24,Mauritius,Port Louis,CC,Hotel,Chinese Restaurant,Café,Resort,Fast Food Restaurant,Shopping Mall,Indian Restaurant,Ice Cream Shop,Beach,Lounge,Seafood Restaurant,Historic Site,Italian Restaurant,Brewery,Japanese Restaurant,Asian Restaurant,Restaurant,Diner,Pizza Place,Coffee Shop
25,Mauritius,Curepipe,MC,Chinese Restaurant,Café,Indian Restaurant,Shopping Mall,Gym / Fitness Center,Grocery Store,Fast Food Restaurant,Pizza Place,Department Store,Irish Pub,Sculpture Garden,Volcano,Food Court,Restaurant,Lounge,Spa,Furniture / Home Store,Garden,Portuguese Restaurant,Movie Theater
26,Mauritius,Quatre Bornes,MC,Chinese Restaurant,Café,Shopping Mall,Indian Restaurant,Diner,Pizza Place,Park,Lounge,Restaurant,Italian Restaurant,Steakhouse,Ice Cream Shop,Gym / Fitness Center,Food Court,Fast Food Restaurant,Grocery Store,Seafood Restaurant,Zoo,Sandwich Place,Athletics & Sports


In [285]:
clusters_mrgd_new[clusters_mrgd_new['cluster9']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,Latin Fusion,One-of-a-kind,All-in-one,cluster9,cluster10
0,Café,14.0,4.0,3.0,1.0,2.0,9.0,3,4.0
3,Seafood Restaurant,12.0,5.0,,1.0,,,2,
4,Restaurant,12.0,5.0,5.0,1.0,2.0,11.0,3,6.0
5,Italian Restaurant,10.0,5.0,,1.0,,6.0,2,3.0
6,Fast Food Restaurant,8.0,2.0,5.0,1.0,1.0,8.0,3,3.0
7,Pizza Place,8.0,,4.0,1.0,,7.0,3,3.0
10,Indian Restaurant,7.0,3.0,2.0,,,,3,3.0
12,Ice Cream Shop,6.0,,2.0,1.0,,5.0,2,
13,Lounge,6.0,2.0,2.0,,2.0,9.0,3,6.0
14,Historic Site,5.0,,,,2.0,,1,


In [286]:
print('Countries in cluster 9:', cluster_dict['cluster9']['City'].count())

Countries in cluster 9: 3


These are the venues for which cluster 9 has a vlaue. Observations:

1) General offerings:
    - Restaurants:
        - Seafood 
        - Italian
        - Fast food 
        - Indian 
    - Shopping Malls 
    
2) Moderatley unique offerings:
    - Ice Cream Shops
    - Chinese restaurants  
    - Diner
   
3) Highly unique offerings:
    - Movie theaters 
    - Gardens
    
This clsuter has no standout characteristics. Therefore it will be dropped.    

In [287]:
clusters_mrgd_new.drop('cluster9',1,inplace=True)

#### Cluster 10

In [288]:
cluster_dict['cluster10']

Unnamed: 0,Country,City,Type,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Algeria,Algiers,CC,Hotel,Restaurant,Mediterranean Restaurant,Coffee Shop,French Restaurant,Park,Cupcake Shop,Burger Joint,Salad Place,Recreation Center,Plaza,Convenience Store,Indian Restaurant,Department Store,Steakhouse,Lounge,Lighthouse,Lake,Italian Restaurant,Gym / Fitness Center
2,Algeria,Bab Ezzouar,MC,Hotel,Mediterranean Restaurant,Coffee Shop,Restaurant,Lake,Plaza,Recreation Center,Salad Place,Burger Joint,Shopping Mall,Metro Station,Park,French Restaurant,Lounge,Steakhouse,Beach,Indian Restaurant,Cupcake Shop,Airport Terminal,Turkish Restaurant
14,Ethiopia,Addia Ababa,CC,Hotel,Ethiopian Restaurant,Italian Restaurant,Restaurant,Grocery Store,Pizza Place,Nightclub,Burger Joint,Café,Coffee Shop,Convenience Store,Cupcake Shop,Fast Food Restaurant,Spa,Middle Eastern Restaurant,Massage Studio,Lounge,Golf Course,Greek Restaurant,Gym / Fitness Center
23,Madagascar,Antananarivo,CC,Hotel,Shopping Mall,Department Store,Asian Restaurant,Restaurant,Pizza Place,Mediterranean Restaurant,African Restaurant,French Restaurant,Burger Joint,Tea Room,Sandwich Place,Soccer Field,Market,Lounge,Snack Place,Food Court,Salad Place,Café,Fast Food Restaurant
41,Rwanda,Kigali,CC,Hotel,Café,Nightclub,African Restaurant,Soccer Stadium,Italian Restaurant,Lounge,Coffee Shop,Indian Restaurant,Restaurant,Caucasian Restaurant,Pub,Department Store,Mexican Restaurant,Farmers Market,Bar,French Restaurant,Golf Course,Grocery Store,Japanese Restaurant
51,Tanzania,Dar Es Salaam,CC,Hotel,Bus Station,Resort,Restaurant,African Restaurant,Pizza Place,Lounge,Beach,Fast Food Restaurant,Casino,Café,Motel,BBQ Joint,Football Stadium,Middle Eastern Restaurant,Seafood Restaurant,Electronics Store,Bar,Shopping Mall,Asian Restaurant


In [289]:
clusters_mrgd_new[clusters_mrgd_new['cluster10']!='NaN']

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,Latin Fusion,One-of-a-kind,All-in-one,cluster10
0,Café,14.0,4.0,3.0,1.0,2.0,9.0,4
1,Hotel,14.0,5.0,5.0,1.0,2.0,12.0,6
2,Coffee Shop,13.0,,3.0,1.0,,4.0,4
4,Restaurant,12.0,5.0,5.0,1.0,2.0,11.0,6
5,Italian Restaurant,10.0,5.0,,1.0,,6.0,3
6,Fast Food Restaurant,8.0,2.0,5.0,1.0,1.0,8.0,3
7,Pizza Place,8.0,,4.0,1.0,,7.0,3
9,Burger Joint,7.0,,,,,,4
10,Indian Restaurant,7.0,3.0,2.0,,,,3
13,Lounge,6.0,2.0,2.0,,2.0,9.0,6


In [290]:
print('Countries in cluster 10: ', cluster_dict['cluster10']['City'].count())

Countries in cluster 10:  6


These are the venues for whcih cluster 10 had a value. Observations:

1) General offerings:
    - Restaurants:
        - Italian
        - Fast fOOD
        - Pizza 
        - Indian
    - Lounge 
    
2) Moderately unique offerings:
    - Restaurants
        - French
        - Mediterranean 
        - African 
        
3) Highly unique offerings:
    - Salad place
    - Cupcake shop
    
The most unique aspects of this cluster are that it appears to have restaurants that specifically serve salads, and cupcake shops. However, these aren't features that would be a big draw for tourists because they aren't substantial enough. For this reason, this cluster will be dropped.   

In [291]:
clusters_mrgd_new.drop('cluster10',1,inplace=True)

### Level 3

#### Final Cluster Table 



In [292]:
clusters_mrgd_new

Unnamed: 0,Venues,Ancient Africa,African Getaway,African Experience,Latin Fusion,One-of-a-kind,All-in-one
0,Café,14.0,4.0,3.0,1.0,2.0,9.0
1,Hotel,14.0,5.0,5.0,1.0,2.0,12.0
2,Coffee Shop,13.0,,3.0,1.0,,4.0
3,Seafood Restaurant,12.0,5.0,,1.0,,
4,Restaurant,12.0,5.0,5.0,1.0,2.0,11.0
5,Italian Restaurant,10.0,5.0,,1.0,,6.0
6,Fast Food Restaurant,8.0,2.0,5.0,1.0,1.0,8.0
7,Pizza Place,8.0,,4.0,1.0,,7.0
8,Bar,7.0,3.0,4.0,,,8.0
9,Burger Joint,7.0,,,,,


#### Creating Final Map

In [293]:
city_venues_merged.head(5)

Unnamed: 0,Country,City,Type,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Algeria,Algiers,CC,28.0,2.99998,9,Hotel,Restaurant,Mediterranean Restaurant,Coffee Shop,French Restaurant,Park,Cupcake Shop,Burger Joint,Salad Place,Recreation Center,Plaza,Convenience Store,Indian Restaurant,Department Store,Steakhouse,Lounge,Lighthouse,Lake,Italian Restaurant,Gym / Fitness Center
1,Algeria,Oran,MC,35.7033,-0.649298,0,Hotel,Restaurant,Café,Ice Cream Shop,Boat or Ferry,Sandwich Place,Breakfast Spot,Shopping Mall,Burger Joint,Seafood Restaurant,Scenic Lookout,Indian Restaurant,Tea Room,Eastern European Restaurant,Pizza Place,Construction & Landscaping,Dessert Shop,Pedestrian Plaza,Big Box Store,Middle Eastern Restaurant
2,Algeria,Bab Ezzouar,MC,36.722,3.18567,9,Hotel,Mediterranean Restaurant,Coffee Shop,Restaurant,Lake,Plaza,Recreation Center,Salad Place,Burger Joint,Shopping Mall,Metro Station,Park,French Restaurant,Lounge,Steakhouse,Beach,Indian Restaurant,Cupcake Shop,Airport Terminal,Turkish Restaurant
3,Angola,Luanda,CC,-8.82727,13.244,2,Restaurant,Pizza Place,African Restaurant,Lounge,Hotel,Seafood Restaurant,Portuguese Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Mediterranean Restaurant,Beach,BBQ Joint,Coffee Shop,Convenience Store,Supermarket,Fast Food Restaurant,Dessert Shop,Office,Movie Theater
4,Benin,Cotonou,MC,6.3677,2.42525,7,Resort,Hotel,Restaurant,Shopping Mall,Hotel Bar,Bakery,Plaza,Middle Eastern Restaurant,Beach,Lounge,Music Venue,Tapas Restaurant,French Restaurant,Thai Restaurant,Food Court,Pizza Place,Soccer Stadium,Dessert Shop,Harbor / Marina,Hardware Store


In [294]:
#Filter out dropped clusters 
y = city_venues_merged['Cluster Labels']

city_venues_final = city_venues_merged[(y!=3)&(y!=6)&(y!=9)]

In [295]:
city_venues_final.head(15)

Unnamed: 0,Country,City,Type,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
1,Algeria,Oran,MC,35.7033,-0.649298,0,Hotel,Restaurant,Café,Ice Cream Shop,Boat or Ferry,Sandwich Place,Breakfast Spot,Shopping Mall,Burger Joint,Seafood Restaurant,Scenic Lookout,Indian Restaurant,Tea Room,Eastern European Restaurant,Pizza Place,Construction & Landscaping,Dessert Shop,Pedestrian Plaza,Big Box Store,Middle Eastern Restaurant
3,Angola,Luanda,CC,-8.82727,13.244,2,Restaurant,Pizza Place,African Restaurant,Lounge,Hotel,Seafood Restaurant,Portuguese Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Mediterranean Restaurant,Beach,BBQ Joint,Coffee Shop,Convenience Store,Supermarket,Fast Food Restaurant,Dessert Shop,Office,Movie Theater
4,Benin,Cotonou,MC,6.3677,2.42525,7,Resort,Hotel,Restaurant,Shopping Mall,Hotel Bar,Bakery,Plaza,Middle Eastern Restaurant,Beach,Lounge,Music Venue,Tapas Restaurant,French Restaurant,Thai Restaurant,Food Court,Pizza Place,Soccer Stadium,Dessert Shop,Harbor / Marina,Hardware Store
7,Cameroon,Douala,MC,4.05374,9.66444,7,Bakery,Lounge,Restaurant,Hotel,Shopping Mall,Nightclub,Big Box Store,Diner,Pizza Place,French Restaurant,Ice Cream Shop,African Restaurant,Department Store,Food Court,Mediterranean Restaurant,Bed & Breakfast,Pharmacy,Plaza,Pub,Café
8,Cape Verde,Santa Maria,MC,16.5987,-22.905,1,Resort,Hotel,Seafood Restaurant,Restaurant,Hotel Bar,Beach,Beach Bar,Surf Spot,Italian Restaurant,Spa,African Restaurant,Lounge,Portuguese Restaurant,Bistro,Steakhouse,Cocktail Bar,Botanical Garden,English Restaurant,Pub,BBQ Joint
11,Cote D’Ivoire,Abidjan,MC,5.40912,-4.04221,7,Ice Cream Shop,African Restaurant,Fast Food Restaurant,Bakery,Italian Restaurant,Nightclub,Restaurant,Shopping Mall,Plaza,Café,Lounge,Coffee Shop,Bus Station,Boutique,Steakhouse,Soccer Field,Middle Eastern Restaurant,Supermarket,Bar,Brewery
12,Egypt,Cairo,CC,30.0488,31.2437,0,Historic Site,Lounge,Café,Italian Restaurant,Egyptian Restaurant,Theater,Hotel,Middle Eastern Restaurant,Coffee Shop,Pastry Shop,Seafood Restaurant,Performing Arts Venue,Hotel Bar,Kebab Restaurant,Bookstore,Dessert Shop,Falafel Restaurant,Plaza,Art Gallery,Neighborhood
13,Egypt,Alexandria,MC,31.199,29.8944,0,Café,Seafood Restaurant,Coffee Shop,Ice Cream Shop,Sandwich Place,Middle Eastern Restaurant,Historic Site,Restaurant,Egyptian Restaurant,Syrian Restaurant,Bar,Bakery,Italian Restaurant,Lebanese Restaurant,Plaza,Lounge,Juice Bar,Pizza Place,Hotel,Gym
15,Ghana,Accra,CC,5.56001,-0.205744,7,Hotel,African Restaurant,Bar,Fast Food Restaurant,Nightclub,Restaurant,Indian Restaurant,Lounge,Chinese Restaurant,Pizza Place,Shopping Mall,Thai Restaurant,Grocery Store,Sculpture Garden,Bed & Breakfast,Cocktail Bar,Asian Restaurant,Furniture / Home Store,Market,Snack Place
16,Ghana,Kumasi,MC,6.69808,-1.62304,2,African Restaurant,Fast Food Restaurant,Hotel,Chinese Restaurant,Indian Restaurant,Bar,Hotel Bar,Nightclub,Bus Station,Shopping Mall,Pizza Place,Shop & Service,English Restaurant,Food Court,Burrito Place,Café,Restaurant,Shopping Plaza,Snack Place,Brewery


In [308]:
city_venues_final.shape

(43, 27)

In [296]:
#Split dataframe in order to add the 'New Cluster Label' column in between 'Cluster Labels' and '1st Most Common Venue' columns

city_venues_final_part2 = city_venues_final.iloc[:,6:] 

In [297]:
city_venues_final = city_venues_final.iloc[:,:6].reset_index(drop=True)

In [298]:
city_venues_final['New Cluster Label'] = None

In [299]:
city_venues_final.head(10)

Unnamed: 0,Country,City,Type,Latitude,Longitude,Cluster Labels,New Cluster Label
0,Algeria,Oran,MC,35.7033,-0.649298,0,
1,Angola,Luanda,CC,-8.82727,13.244,2,
2,Benin,Cotonou,MC,6.3677,2.42525,7,
3,Cameroon,Douala,MC,4.05374,9.66444,7,
4,Cape Verde,Santa Maria,MC,16.5987,-22.905,1,
5,Cote D’Ivoire,Abidjan,MC,5.40912,-4.04221,7,
6,Egypt,Cairo,CC,30.0488,31.2437,0,
7,Egypt,Alexandria,MC,31.199,29.8944,0,
8,Ghana,Accra,CC,5.56001,-0.205744,7,
9,Ghana,Kumasi,MC,6.69808,-1.62304,2,


In [300]:
#Fill in new cluster label names according to cluster label numebrs  

for d,label in enumerate(city_venues_final['Cluster Labels']):  

    if city_venues_final['Cluster Labels'][d] == 0:
        city_venues_final['New Cluster Label'][d] = 'Ancient Africa'
    elif city_venues_final['Cluster Labels'][d]==1:
        city_venues_final['New Cluster Label'][d] = 'African Getaway'
    elif city_venues_final['Cluster Labels'][d]==2:
        city_venues_final['New Cluster Label'][d] = 'African Experience'
    elif city_venues_final['Cluster Labels'][d]==4:
        city_venues_final['New Cluster Label'][d] = 'Latin Fusion'
    elif city_venues_final['Cluster Labels'][d]==5:
        city_venues_final['New Cluster Label'][d] = 'One-of-a-kind'
    else:
        city_venues_final['New Cluster Label'][d] = 'All-in-one'
   


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexin

In [301]:
city_venues_final.head(10)

Unnamed: 0,Country,City,Type,Latitude,Longitude,Cluster Labels,New Cluster Label
0,Algeria,Oran,MC,35.7033,-0.649298,0,Ancient Africa
1,Angola,Luanda,CC,-8.82727,13.244,2,African Experience
2,Benin,Cotonou,MC,6.3677,2.42525,7,All-in-one
3,Cameroon,Douala,MC,4.05374,9.66444,7,All-in-one
4,Cape Verde,Santa Maria,MC,16.5987,-22.905,1,African Getaway
5,Cote D’Ivoire,Abidjan,MC,5.40912,-4.04221,7,All-in-one
6,Egypt,Cairo,CC,30.0488,31.2437,0,Ancient Africa
7,Egypt,Alexandria,MC,31.199,29.8944,0,Ancient Africa
8,Ghana,Accra,CC,5.56001,-0.205744,7,All-in-one
9,Ghana,Kumasi,MC,6.69808,-1.62304,2,African Experience


In [302]:
city_venues_final_part2.reset_index(drop=True,inplace=True)

In [303]:
#Rejoin split data frames  

city_venues_final = city_venues_final.join(city_venues_final_part2)

In [304]:
city_venues_final

Unnamed: 0,Country,City,Type,Latitude,Longitude,Cluster Labels,New Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Algeria,Oran,MC,35.7033,-0.649298,0,Ancient Africa,Hotel,Restaurant,Café,Ice Cream Shop,Boat or Ferry,Sandwich Place,Breakfast Spot,Shopping Mall,Burger Joint,Seafood Restaurant,Scenic Lookout,Indian Restaurant,Tea Room,Eastern European Restaurant,Pizza Place,Construction & Landscaping,Dessert Shop,Pedestrian Plaza,Big Box Store,Middle Eastern Restaurant
1,Angola,Luanda,CC,-8.82727,13.244,2,African Experience,Restaurant,Pizza Place,African Restaurant,Lounge,Hotel,Seafood Restaurant,Portuguese Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Mediterranean Restaurant,Beach,BBQ Joint,Coffee Shop,Convenience Store,Supermarket,Fast Food Restaurant,Dessert Shop,Office,Movie Theater
2,Benin,Cotonou,MC,6.3677,2.42525,7,All-in-one,Resort,Hotel,Restaurant,Shopping Mall,Hotel Bar,Bakery,Plaza,Middle Eastern Restaurant,Beach,Lounge,Music Venue,Tapas Restaurant,French Restaurant,Thai Restaurant,Food Court,Pizza Place,Soccer Stadium,Dessert Shop,Harbor / Marina,Hardware Store
3,Cameroon,Douala,MC,4.05374,9.66444,7,All-in-one,Bakery,Lounge,Restaurant,Hotel,Shopping Mall,Nightclub,Big Box Store,Diner,Pizza Place,French Restaurant,Ice Cream Shop,African Restaurant,Department Store,Food Court,Mediterranean Restaurant,Bed & Breakfast,Pharmacy,Plaza,Pub,Café
4,Cape Verde,Santa Maria,MC,16.5987,-22.905,1,African Getaway,Resort,Hotel,Seafood Restaurant,Restaurant,Hotel Bar,Beach,Beach Bar,Surf Spot,Italian Restaurant,Spa,African Restaurant,Lounge,Portuguese Restaurant,Bistro,Steakhouse,Cocktail Bar,Botanical Garden,English Restaurant,Pub,BBQ Joint
5,Cote D’Ivoire,Abidjan,MC,5.40912,-4.04221,7,All-in-one,Ice Cream Shop,African Restaurant,Fast Food Restaurant,Bakery,Italian Restaurant,Nightclub,Restaurant,Shopping Mall,Plaza,Café,Lounge,Coffee Shop,Bus Station,Boutique,Steakhouse,Soccer Field,Middle Eastern Restaurant,Supermarket,Bar,Brewery
6,Egypt,Cairo,CC,30.0488,31.2437,0,Ancient Africa,Historic Site,Lounge,Café,Italian Restaurant,Egyptian Restaurant,Theater,Hotel,Middle Eastern Restaurant,Coffee Shop,Pastry Shop,Seafood Restaurant,Performing Arts Venue,Hotel Bar,Kebab Restaurant,Bookstore,Dessert Shop,Falafel Restaurant,Plaza,Art Gallery,Neighborhood
7,Egypt,Alexandria,MC,31.199,29.8944,0,Ancient Africa,Café,Seafood Restaurant,Coffee Shop,Ice Cream Shop,Sandwich Place,Middle Eastern Restaurant,Historic Site,Restaurant,Egyptian Restaurant,Syrian Restaurant,Bar,Bakery,Italian Restaurant,Lebanese Restaurant,Plaza,Lounge,Juice Bar,Pizza Place,Hotel,Gym
8,Ghana,Accra,CC,5.56001,-0.205744,7,All-in-one,Hotel,African Restaurant,Bar,Fast Food Restaurant,Nightclub,Restaurant,Indian Restaurant,Lounge,Chinese Restaurant,Pizza Place,Shopping Mall,Thai Restaurant,Grocery Store,Sculpture Garden,Bed & Breakfast,Cocktail Bar,Asian Restaurant,Furniture / Home Store,Market,Snack Place
9,Ghana,Kumasi,MC,6.69808,-1.62304,2,African Experience,African Restaurant,Fast Food Restaurant,Hotel,Chinese Restaurant,Indian Restaurant,Bar,Hotel Bar,Nightclub,Bus Station,Shopping Mall,Pizza Place,Shop & Service,English Restaurant,Food Court,Burrito Place,Café,Restaurant,Shopping Plaza,Snack Place,Brewery


In [305]:
#Make sure there is no number 8 in cluster labels

city_venues_final['Cluster Labels'].unique()

array([0, 2, 7, 1, 4, 8, 5])

In [306]:
#Specify colour for each cluster 

colors_dict = {'0':'red','2':'green','7':'purple','1':'blue','4':'brown','8':'orange','5':'grey'}

In [307]:
#Visualize final clusters 

afri_lat = -1.7832
afri_long = 20.5085

#Create map
map_clusters_fin = folium.Map(location=[afri_lat, afri_long], zoom_start=3, tiles='Mapbox Bright')

#Set color scheme for the clusters
x2 = np.arange(kclusters)
ys2 = [i+x+(i*x)**2 for i in range(kclusters)]

#Add markers to the map
for lat, lon, poi, cluster, label in zip(city_venues_final['Latitude'], city_venues_final['Longitude'], city_venues_final['City'], city_venues_final['Cluster Labels'], city_venues_final['New Cluster Label']):
    label2 = folium.Popup(str(poi) + ', ' + label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label2,
        color=colors_dict[str(cluster)],
        fill=True,
        fill_color=colors_dict[str(cluster)],
        fill_opacity=1).add_to(map_clusters_fin)
    
#Add image of legend to map 
from folium.plugins import FloatImage

image_file = '/Users/mbongeni/Documents/Career/Data Science/Applied Data Science/4_Capstone/Final Assignment/Legend4_Final.png'
FloatImage(image_file, bottom=0,left=71).add_to(map_clusters_fin)
map_clusters_fin.save('FinalClustersMap.html')
    
map_clusters_fin

### NB: Legend was not rendered in notebook and instead appears as float_image. See power point presentation for map with rendered legend. 

## End of Notebook 