# Google Map analysis of the supermarkets in five African cities
- Cape Town, South Africa
- Lomé, Togo
- Yaoundé, Cameroon
- Nairobi, Kenya
- Addis Ababa, Ethiopia

### 1. List all supermarkets of each city
- Collect latitude/longitude for each supermarket
- Filter out errors of scraping by cross-validation with lat/long

### 2. Scrape google map reviews for each supermarket
- Collect reviewer's name, rating, and body of the review
- Clean the data collected

### 3. Exploratory Data Analysis
- What is the mean rating of supermarkets for each city? 
- What is the mean number of reviews by supermarkets?
- What is the category of supermarket for each city?
- How many supermarkets propose delivery services in each city?
- How many are closed on sundays?
- Delivery services and/or sundays closed is it affecting supermarket rating?
- Can we identify spatial correlation of positive ratings? are there geographical clusters of good or bad reviews?

### 5. visualisation on maps
- pinpoint supermarket ratings on a map
- heatmap supermarket ratings

In [127]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

import pysal # cross-platform library for geospatial data science
#from pysal import weights, esda 
from esda.moran import Moran, Moran_Local

import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster

#import googlemaps
#from selenium import webdriver
#from parsel import Selector
from random import randint
from time import sleep

#from geopy.geocoders import Nominatim
#from geopy.extra.rate_limiter import RateLimiter

import folium
from folium.plugins import HeatMap

import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Note: previously scraped urls for supermarkets in five case study cities

In [125]:
# googlemaps supermarkets urls from phantombuster
supermarkets = pd.read_csv('/Users/juliencarbonnell/Desktop/African Urban Research/datasets/GM_supermarkets.csv')

In [114]:
supermarkets

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,wednesday,thursday,friday,saturday,sunday,monday,tuesday,info,imgurl,latitude,longitude,query
0,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...",7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,-33.933313,18.451812,CapeTown
1,https://www.google.com/maps/place/R+%26+K+Supe...,R & K Supermarket,,,Grocery store,· In-store shopping,"Salt River, Cape Town, 7925, South Africa",,,,,,,,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,-33.928938,18.462562,CapeTown
2,https://www.google.com/maps/place/Family+Super...,Family Supermarket,,,Grocery store,· In-store shopping,"Cape Town City Centre, Cape Town, 8000, South ...",,,,,,,,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipNre6...,-33.919438,18.424813,CapeTown
3,https://www.google.com/maps/place/City+Cafe+%2...,City Cafe & Supermarkets,5.0,1.0,Grocery store,· In-store shopping,"Shop 6A, 31A Long St, Cape Town City Centre, C...",8AM–7PM,8AM–7PM,8AM–7PM,8AM–7PM,8AM–7PM,8AM–7PM,8AM–7PM,Hours or services may differ,https://lh6.googleusercontent.com/proxy/yEvgHH...,-33.920313,18.421813,CapeTown
4,https://www.google.com/maps/place/Best+Price+S...,Best Price Supermarket,,,Grocery store,,"Salt River, Cape Town, 7925, South Africa",,,,,,,,,https://maps.gstatic.com/tactile/pane/default_...,-33.928438,18.460437,CapeTown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,https://www.google.com/maps/place/Lomyad+Super...,Lomyad Supermarket,4.0,4.0,Shopping mall,,"2RCQ+F9C, Addis Ababa, Ethiopia",7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba
1036,https://www.google.com/maps/place/Safeway+Supe...,Safeway Supermarket,4.0,18.0,Shopping mall,,"2V82+6HR, Addis Ababa, Ethiopia",12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,,https://www.gstatic.com/images/icons/material/...,,,AddisAbaba
1037,https://www.google.com/maps/place/Amanda+Super...,Amanda Supermarket,,,Shopping mall,,"2RCQ+FQR, Addis Ababa, Ethiopia",,,,,,,,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba
1038,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,,,AddisAbaba


In [4]:
# rename headers with lower case
supermarkets.columns= supermarkets.columns.str.lower()

In [126]:
# list columns
supermarkets.columns

Index(['placeurl', 'title', 'rating', 'reviewcount', 'category', 'attributes',
       'address', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
       'monday', 'tuesday', 'info', 'imgurl', 'latitude', 'longitude',
       'query'],
      dtype='object')

In [116]:
#check nulls
supermarkets.isna().sum()

placeurl         0
title            1
rating         229
reviewcount    229
category         3
attributes     105
address          2
wednesday      294
thursday       294
friday         294
saturday       294
sunday         294
monday         294
tuesday        294
info           905
imgurl           1
latitude       265
longitude      265
query            0
dtype: int64

In [7]:
# drop useless columns
supermarkets=supermarkets.drop(['unnamed: 0',
                                'website',
                                'pluscode',
                                'phonenumber',
                                'currentstatus',
                                'isclaimed',
                                'timestamp'
                               ], axis=1)

In [84]:
# count query values
supermarkets['query'].value_counts()

Nairobi       267
Lomé          200
AddisAbaba    197
CapeTown      192
Yaoundé       184
Name: query, dtype: int64

In [9]:
# rename query values with city names
cities = {'https://www.google.com/maps/search/supermarket/@-33.9236772,18.3865141,13z/data=!3m1!4b1': "CapeTown",
          'https://www.google.com/maps/search/supermarket/@6.1874341,1.1353437,11z/data=!3m1!4b1': "Lomé",
          'https://www.google.com/maps/search/supermarket/@3.873919,11.4754269,13z/data=!3m1!4b1': "Yaoundé",
          'https://www.google.com/maps/search/supermarket/@-1.3031622,36.5672034,10z/data=!3m1!4b1': "Nairobi",
          'https://www.google.com/maps/search/supermarket/@8.9630978,38.6380589,11z/data=!3m1!4b1': "AddisAbaba"
         }
supermarkets['query'] = supermarkets['query'].replace(cities)
# be careful : this brings errors. need to fix it

In [10]:
# how many supermarkets listed for each city?
supermarkets['query'].value_counts()

Nairobi       267
Lomé          200
AddisAbaba    197
CapeTown      192
Yaoundé       184
Name: query, dtype: int64

In [85]:
# is there null lat/long values ?
print(supermarkets['latitude'].isnull().groupby([supermarkets['query']]).sum())
print(supermarkets['longitude'].isnull().groupby([supermarkets['query']]).sum())

query
AddisAbaba    110
CapeTown        1
Lomé           42
Nairobi        39
Yaoundé        73
Name: latitude, dtype: int64
query
AddisAbaba    110
CapeTown        1
Lomé           42
Nairobi        39
Yaoundé        73
Name: longitude, dtype: int64


### get missing lat/long from address

In [27]:
# list missing lat/long in a new df
missinglat = supermarkets[supermarkets['latitude'].isnull()]
# drop null addresses
missinglat = missinglat[missinglat['address'].notna()]
# reset index
missinglat = missinglat.reset_index(drop=True)
missinglat

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,wednesday,thursday,friday,saturday,sunday,monday,tuesday,info,imgurl,latitude,longitude,query
0,https://www.google.com/maps/place/Super+Source...,Super Sources,,,Grocery store,,"South Ln, Rondebosch, Cape Town, 8005, South A...",9AM–5PM,9AM–5PM,9AM–5PM,Closed,Closed,9AM–5PM,9AM–5PM,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,,,CapeTown
1,https://www.google.com/maps/place/Elidec+Shopr...,Elidec Shoprite,,,Grocery store,· In-store shopping,"62J3+Q33, Dzodze, Ghana",,,,,,,,,https://www.gstatic.com/images/icons/material/...,,,Lomé
2,https://www.google.com/maps/place/Alice+Provis...,Alice Provision Store,,,Grocery store,· In-store shopping,"6XMW+R9V, Dzodze, Ghana",,,,,,,,,https://www.gstatic.com/images/icons/material/...,,,Lomé
3,https://www.google.com/maps/place/Kenecab/data...,Kenecab,3.9,14.0,Grocery store,· In-store shopping,"3V66+JC2, Abor, Ghana",7AM–9:30PM,7AM–9:30PM,7AM–9:30PM,7AM–9:30PM,7AM–9:30PM,7AM–9:30PM,7AM–9:30PM,,https://www.gstatic.com/images/icons/material/...,,,Lomé
4,https://www.google.com/maps/place/Adzodogu+Min...,Adzodogu Mini Market Aflaoga,5.0,1.0,Grocery store,,"45CP+9MF, Denu, Ghana",,,,,,,,,https://www.gstatic.com/images/icons/material/...,,,Lomé
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258,https://www.google.com/maps/place/Lomyad+Super...,Lomyad Supermarket,4.0,4.0,Shopping mall,,"2RCQ+F9C, Addis Ababa, Ethiopia",7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba
259,https://www.google.com/maps/place/Safeway+Supe...,Safeway Supermarket,4.0,18.0,Shopping mall,,"2V82+6HR, Addis Ababa, Ethiopia",12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,,https://www.gstatic.com/images/icons/material/...,,,AddisAbaba
260,https://www.google.com/maps/place/Amanda+Super...,Amanda Supermarket,,,Shopping mall,,"2RCQ+FQR, Addis Ababa, Ethiopia",,,,,,,,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba
261,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,,,AddisAbaba


In [33]:
# try geolocator for one value
geolocator = Nominatim(user_agent='julien.carbonnell@gmail.com')
location = geolocator.geocode(missinglat['address'][0])
location

Location(South Lane, Rondebosch, Cape Town Ward 59, Cape Town, City of Cape Town, Western Cape, CAPE TOWN, South Africa, (-33.9577131, 18.459655, 0.0))

In [34]:
print(location.latitude)
print(location.longitude)

-33.9577131
18.459655


In [42]:
location = geolocator.geocode(missinglat['address'][1])
location
print(missinglat['address'][1])

62J3+Q33, Dzodze, Ghana


### most address are recorded with weird code e.g. XQJF+2X8, and a lot are outside of the city targetted


In [47]:
# select rows only if address mentions the city targetted
validlocation = missinglat[missinglat['address'].str.contains("Cape Town")]
validlocation

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,wednesday,thursday,friday,...,monday,tuesday,info,imgurl,latitude,longitude,query,location,point,altitude
0,https://www.google.com/maps/place/Super+Source...,Super Sources,,,Grocery store,,"South Ln, Rondebosch, Cape Town, 8005, South A...",9AM–5PM,9AM–5PM,9AM–5PM,...,9AM–5PM,9AM–5PM,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,-33.957713,18.459655,CapeTown,"(South Lane, Rondebosch, Cape Town Ward 59, Ca...","(-33.9577131, 18.459655, 0.0)",0.0


In [52]:
#append the other cities
validlocation = validlocation.append(missinglat[missinglat['address'].str.contains("Lomé")])
validlocation = validlocation.append(missinglat[missinglat['address'].str.contains("Yaoundé")])
validlocation = validlocation.append(missinglat[missinglat['address'].str.contains("Nairobi")])
validlocation = validlocation.append(missinglat[missinglat['address'].str.contains("Addis Ababa")])
# reset index
validlocation = validlocation.reset_index(drop=True)
validlocation

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,wednesday,thursday,friday,...,monday,tuesday,info,imgurl,latitude,longitude,query,location,point,altitude
0,https://www.google.com/maps/place/Super+Source...,Super Sources,,,Grocery store,,"South Ln, Rondebosch, Cape Town, 8005, South A...",9AM–5PM,9AM–5PM,9AM–5PM,...,9AM–5PM,9AM–5PM,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,-33.957713,18.459655,CapeTown,"(South Lane, Rondebosch, Cape Town Ward 59, Ca...","(-33.9577131, 18.459655, 0.0)",0.0
1,https://www.google.com/maps/place/Supermarch%C...,Supermarché ALZARAA,3.0,11.0,Supermarket,· In-store shopping,"65PX+3XC, Lomé, Togo",8AM–9PM,8AM–9PM,8AM–9PM,...,8AM–9PM,8AM–9PM,,https://lh5.googleusercontent.com/p/AF1QipPnZi...,,,Lomé,,,
2,https://www.google.com/maps/place/GIONA+Supere...,GIONA Superette,3.5,4.0,Supermarket,· In-store shopping · In-store pick-up · ...,"646M+55X, Lomé, Togo",,,,...,,,,https://lh5.googleusercontent.com/p/AF1QipPQaN...,,,Lomé,,,
3,https://www.google.com/maps/place/Super+March%...,Super Marché Miséricordia Vrai,,,Supermarket,· In-store shopping,"658C+V77, Lomé, Togo",,,,...,,,,https://lh5.googleusercontent.com/p/AF1QipNAxM...,,,Lomé,,,
4,https://www.google.com/maps/place/CM+PETROLEUM...,CM PETROLEUM AKODESSEWA,,,Supermarket,· In-store shopping,"5749+62W, Lomé, Togo",8AM–10PM,8AM–10PM,8AM–10PM,...,8AM–10PM,8AM–10PM,,https://lh5.googleusercontent.com/p/AF1QipOVqs...,,,Lomé,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,https://www.google.com/maps/place/Lomyad+Super...,Lomyad Supermarket,4.0,4.0,Shopping mall,,"2RCQ+F9C, Addis Ababa, Ethiopia",7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,...,7:30a.m.–11p.m.,7:30a.m.–11p.m.,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba,,,
217,https://www.google.com/maps/place/Safeway+Supe...,Safeway Supermarket,4.0,18.0,Shopping mall,,"2V82+6HR, Addis Ababa, Ethiopia",12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,...,12a.m.–11p.m.,12a.m.–11p.m.,,https://www.gstatic.com/images/icons/material/...,,,AddisAbaba,,,
218,https://www.google.com/maps/place/Amanda+Super...,Amanda Supermarket,,,Shopping mall,,"2RCQ+FQR, Addis Ababa, Ethiopia",,,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba,,,
219,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,...,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,,,AddisAbaba,,,


In [134]:
# replace missinglat with validlocation
missinglat = validlocation
missinglat

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,wednesday,thursday,friday,...,monday,tuesday,info,imgurl,latitude,longitude,query,location,point,altitude
0,https://www.google.com/maps/place/Super+Source...,Super Sources,,,Grocery store,,"South Ln, Rondebosch, Cape Town, 8005, South A...",9AM–5PM,9AM–5PM,9AM–5PM,...,9AM–5PM,9AM–5PM,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,-33.957713,18.459655,CapeTown,"(South Lane, Rondebosch, Cape Town Ward 59, Ca...","(-33.9577131, 18.459655, 0.0)",0.0
1,https://www.google.com/maps/place/Supermarch%C...,Supermarché ALZARAA,3.0,11.0,Supermarket,· In-store shopping,"65PX+3XC, Lomé, Togo",8AM–9PM,8AM–9PM,8AM–9PM,...,8AM–9PM,8AM–9PM,,https://lh5.googleusercontent.com/p/AF1QipPnZi...,,,Lomé,,,
2,https://www.google.com/maps/place/GIONA+Supere...,GIONA Superette,3.5,4.0,Supermarket,· In-store shopping · In-store pick-up · ...,"646M+55X, Lomé, Togo",,,,...,,,,https://lh5.googleusercontent.com/p/AF1QipPQaN...,,,Lomé,,,
3,https://www.google.com/maps/place/Super+March%...,Super Marché Miséricordia Vrai,,,Supermarket,· In-store shopping,"658C+V77, Lomé, Togo",,,,...,,,,https://lh5.googleusercontent.com/p/AF1QipNAxM...,,,Lomé,,,
4,https://www.google.com/maps/place/CM+PETROLEUM...,CM PETROLEUM AKODESSEWA,,,Supermarket,· In-store shopping,"5749+62W, Lomé, Togo",8AM–10PM,8AM–10PM,8AM–10PM,...,8AM–10PM,8AM–10PM,,https://lh5.googleusercontent.com/p/AF1QipOVqs...,,,Lomé,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,https://www.google.com/maps/place/Lomyad+Super...,Lomyad Supermarket,4.0,4.0,Shopping mall,,"2RCQ+F9C, Addis Ababa, Ethiopia",7:30a.m.–11p.m.,7:30a.m.–11p.m.,7:30a.m.–11p.m.,...,7:30a.m.–11p.m.,7:30a.m.–11p.m.,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba,,,
217,https://www.google.com/maps/place/Safeway+Supe...,Safeway Supermarket,4.0,18.0,Shopping mall,,"2V82+6HR, Addis Ababa, Ethiopia",12a.m.–11p.m.,12a.m.–11p.m.,12a.m.–11p.m.,...,12a.m.–11p.m.,12a.m.–11p.m.,,https://www.gstatic.com/images/icons/material/...,,,AddisAbaba,,,
218,https://www.google.com/maps/place/Amanda+Super...,Amanda Supermarket,,,Shopping mall,,"2RCQ+FQR, Addis Ababa, Ethiopia",,,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,,,AddisAbaba,,,
219,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,...,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,,,AddisAbaba,,,


In [54]:
# check if wrong location have disappeared
location = geolocator.geocode(missinglat['address'][1])
location
print(missinglat['address'][1])

65PX+3XC, Lomé, Togo


In [66]:
missinglat.isna().sum()
# too many missing values remain

placeurl         0
title            0
rating          77
reviewcount     77
category         0
attributes      35
address          0
wednesday       94
thursday        94
friday          94
saturday        94
sunday          94
monday          94
tuesday         94
info           220
imgurl           0
latitude       220
longitude      220
query            0
location       220
point          220
altitude       220
dtype: int64

In [117]:
# check what does placeurl looks like
missinglat['placeurl'][1]

'https://www.google.com/maps/place/Supermarch%C3%A9+ALZARAA/data=!4m5!3m4!1s0x1021581d9ebe81fd:0x7b848f3f4ce9898f!8m2!3d6.2351801!4d1.1998825'

In [137]:
# split the placeurl to get lat out of it
urlsplit = pd.concat([missinglat['placeurl'], missinglat['placeurl'].str.split('!3d', expand=True)], axis=1)
urlsplit.head()

Unnamed: 0,placeurl,0,1
0,https://www.google.com/maps/place/Super+Source...,https://www.google.com/maps/place/Super+Source...,-33.9577461!4d18.4598667
1,https://www.google.com/maps/place/Supermarch%C...,https://www.google.com/maps/place/Supermarch%C...,6.2351801!4d1.1998825
2,https://www.google.com/maps/place/GIONA+Supere...,https://www.google.com/maps/place/GIONA+Supere...,6.2104762!4d1.1329886
3,https://www.google.com/maps/place/Super+March%...,https://www.google.com/maps/place/Super+March%...,6.2171574!4d1.170659
4,https://www.google.com/maps/place/CM+PETROLEUM...,https://www.google.com/maps/place/CM+PETROLEUM...,6.1556064!4d1.2675667


In [138]:
# split the resulting collumn to get long out of it
urlsplit = pd.concat([urlsplit['placeurl'], urlsplit[1].str.split('!4d', expand=True)], axis=1)
urlsplit.head()

Unnamed: 0,placeurl,0,1
0,https://www.google.com/maps/place/Super+Source...,-33.9577461,18.4598667
1,https://www.google.com/maps/place/Supermarch%C...,6.2351801,1.1998825
2,https://www.google.com/maps/place/GIONA+Supere...,6.2104762,1.1329886
3,https://www.google.com/maps/place/Super+March%...,6.2171574,1.170659
4,https://www.google.com/maps/place/CM+PETROLEUM...,6.1556064,1.2675667


In [139]:
urlsplit.shape

(221, 3)

In [136]:
missinglat.shape

(221, 22)

In [140]:
# rename columns
urlsplit.rename(columns={0: 'latitude',
                       1: 'longitude'
                      }, inplace=True, errors='raise')

In [141]:
del urlsplit['placeurl']

In [143]:
# concat lat/long values from missing lat df by the urlsplit ones
missinglat = missinglat.join(urlsplit)
missinglat = missinglat.reset_index(drop=True)
missinglat.head()

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,wednesday,thursday,friday,saturday,sunday,monday,tuesday,info,imgurl,query,latitude,longitude
0,https://www.google.com/maps/place/Super+Source...,Super Sources,,,Grocery store,,"South Ln, Rondebosch, Cape Town, 8005, South A...",9AM–5PM,9AM–5PM,9AM–5PM,Closed,Closed,9AM–5PM,9AM–5PM,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,CapeTown,-33.9577461,18.4598667
1,https://www.google.com/maps/place/Supermarch%C...,Supermarché ALZARAA,3.0,11.0,Supermarket,· In-store shopping,"65PX+3XC, Lomé, Togo",8AM–9PM,8AM–9PM,8AM–9PM,8AM–9PM,"8:30AM–2PM, 3–7PM",8AM–9PM,8AM–9PM,,https://lh5.googleusercontent.com/p/AF1QipPnZi...,Lomé,6.2351801,1.1998825
2,https://www.google.com/maps/place/GIONA+Supere...,GIONA Superette,3.5,4.0,Supermarket,· In-store shopping · In-store pick-up · ...,"646M+55X, Lomé, Togo",,,,,,,,,https://lh5.googleusercontent.com/p/AF1QipPQaN...,Lomé,6.2104762,1.1329886
3,https://www.google.com/maps/place/Super+March%...,Super Marché Miséricordia Vrai,,,Supermarket,· In-store shopping,"658C+V77, Lomé, Togo",,,,,,,,,https://lh5.googleusercontent.com/p/AF1QipNAxM...,Lomé,6.2171574,1.170659
4,https://www.google.com/maps/place/CM+PETROLEUM...,CM PETROLEUM AKODESSEWA,,,Supermarket,· In-store shopping,"5749+62W, Lomé, Togo",8AM–10PM,8AM–10PM,8AM–10PM,8AM–10PM,8AM–10PM,8AM–10PM,8AM–10PM,,https://lh5.googleusercontent.com/p/AF1QipOVqs...,Lomé,6.1556064,1.2675667


In [118]:
missinglat['placeurl'][6]

'https://www.google.com/maps/place/FIRST+MARKET/data=!4m5!3m4!1s0x1021585d38bc30c9:0xbef15782355fe035!8m2!3d6.2034887!4d1.1916575'

In [119]:
supermarkets['placeurl'][6]

"https://www.google.com/maps/place/Harry's+Supermarket/data=!4m5!3m4!1s0x1dcc6724b86be857:0xf5db44505e608fcb!8m2!3d-33.9139759!4d18.3912139"

In [75]:
print(missinglat['latitude'][6])
print(missinglat['longitude'][6])

6.2034887
1.1916575


In [132]:
supermarkets.columns

Index(['placeurl', 'title', 'rating', 'reviewcount', 'category', 'attributes',
       'address', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
       'monday', 'tuesday', 'info', 'imgurl', 'latitude', 'longitude',
       'query'],
      dtype='object')

In [144]:
missinglat.columns

Index(['placeurl', 'title', 'rating', 'reviewcount', 'category', 'attributes',
       'address', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
       'monday', 'tuesday', 'info', 'imgurl', 'query', 'latitude',
       'longitude'],
      dtype='object')

In [145]:
# align columns in both dataframes
missinglat = missinglat[['placeurl', 'title', 'rating', 'reviewcount', 'category', 'attributes',
       'address', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
       'monday', 'tuesday', 'info', 'imgurl', 'latitude', 'longitude',
       'query']]

In [146]:
# delete null latitude rows in original file
supermarkets = supermarkets[supermarkets['latitude'].notna()]
print(supermarkets['longitude'].isnull().sum())

0


In [150]:
# append missinglat rows to original file
supermarkets = supermarkets.append(missinglat, ignore_index=True)
supermarkets

In [151]:
# how many lat/long keep missing ?
supermarkets.isna().sum()

placeurl         0
title            0
rating         219
reviewcount    219
category         2
attributes      97
address          0
wednesday      279
thursday       279
friday         279
saturday       279
sunday         279
monday         279
tuesday        279
info           862
imgurl           0
latitude         0
longitude        0
query            0
dtype: int64

In [152]:
supermarkets.shape

(996, 19)

In [153]:
# save file
#supermarkets.to_csv('GM_supermarkets.csv', index=False)

## Scrape the reviews for each supermarket

In [30]:
# scrape customer reviews based on urls
supermarkets['placeurl'][0]

'https://www.google.com/maps/place/S.E.+Supermarkets/data=!4m5!3m4!1s0x1dcc5d0aae05dc67:0x502209aedfc8c871!8m2!3d-33.933285!4d18.4518531'

In [38]:
supermarkets['placeurl'][421]

'https://www.google.com/maps/place/Fosak+Sarl/data=!4m5!3m4!1s0x108bcf007189df15:0x3eb9475e4fdea686!8m2!3d3.867464!4d11.5177656'

In [157]:
# start the browser
executable_path = '/usr/local/bin/chromedriver'
driver = webdriver.Chrome(executable_path)

In [34]:
# open reviews page
url = supermarkets['placeurl'][0]
driver.get(url)

In [35]:
# parse reviews
page_content = driver.page_source
response = Selector(page_content)

In [36]:
# iterate over the reviews
results = []

for el in response.xpath('//div/div[@data-review-id]/div[contains(@class, "content")]'):
    results.append({
        'title': el.xpath('.//div[contains(@class, "title")]/span/text()').extract_first(''),
        'rating': el.xpath('.//span[contains(@aria-label, "stars")]/@aria-label').extract_first('').replace('stars' ,'').strip(),
        'body': el.xpath('.//span[contains(@class, "text")]/text()').extract_first(''),
    })

print(results)

[{'title': 'Hair by Michelle and Beauty Parlour phala', 'rating': '5', 'body': "This is a go to supermarket, they go out of their way to satisfy clients. I've been going there for over a decade and always satisfied with their service."}, {'title': 'Jayan Smart', 'rating': '5', 'body': 'Simply the best corner store. Just make sure you are nice to Cloe!'}, {'title': 'Loretta Chan-Sam', 'rating': '5', 'body': "If you're looking for a consumer-friendly par excellence environment you're at the right place. Friendly, helpful and accommodating atmosphere with a good range of organic produce and a sure-fire bet to find that missing ingredient at the last minute ..."}]


In [None]:
# create empty column to fill
supermarkets['reviews'] = ''

In [None]:
# iterate through places in dataframe
count = 0

for i in range(supermarkets.shape[0]):
    print(count)
    try:
        driver.get(supermarkets['placeurl'][i])
        page_content = driver.page_source
        response = Selector(page_content)
        results = []
        for el in response.xpath('//div/div[@data-review-id]/div[contains(@class, "content")]'):
            results.append({
                'title': el.xpath('.//div[contains(@class, "title")]/span/text()').extract_first(''),
                'rating': el.xpath('.//span[contains(@aria-label, "stars")]/@aria-label').extract_first('').replace('stars' ,'').strip(),
                'body': el.xpath('.//span[contains(@class, "text")]/text()').extract_first(''),
            })
        supermarkets["reviews"][i] = results
        sleep(randint(10,100))
    except: pass
    count += 1

In [None]:
# check the result
supermarkets.tail()

In [174]:
# stop the browser before leaving
driver.quit

<bound method WebDriver.quit of <selenium.webdriver.chrome.webdriver.WebDriver (session="56fe66d079e19dfd91e0bf9022105484")>>

In [None]:
# save reviews dataframe
#supermarkets.to_csv('GM_reviews.csv', index=False)

## Note: reviews scraping has been done in two rounds. 
#### append the two files contains reviews

In [5]:
# import reviews files
reviews = pd.read_csv('/Users/juliencarbonnell/Desktop/African Urban Research/datasets/GM_reviews.csv')

In [6]:
reviews

Unnamed: 0,placeurl,placename,overall_rating,reviewcount,category,attributes,address,wednesday,thursday,friday,...,monday,tuesday,info,imgurl,latitude,longitude,query,reviewer,rating,review
0,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...",7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,...,7:30AM–8PM,7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,-33.933312,18.451812,Lomé,Hair by Michelle and Beauty Parlour phala,5.0,"This is a go to supermarket, they go out of th..."
1,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...",7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,...,7:30AM–8PM,7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,-33.933312,18.451812,Lomé,Jayan Smart,5.0,Simply the best corner store. Just make sure y...
2,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...",7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,...,7:30AM–8PM,7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,-33.933312,18.451812,Lomé,Loretta Chan-Sam,5.0,If youre looking for a consumer-friendly par e...
3,https://www.google.com/maps/place/City+Cafe+%2...,City Cafe & Supermarkets,5.0,1.0,Grocery store,· In-store shopping,"Shop 6A, 31A Long St, Cape Town City Centre, C...",8AM–7PM,8AM–7PM,8AM–7PM,...,8AM–7PM,8AM–7PM,Hours or services may differ,https://lh6.googleusercontent.com/proxy/yEvgHH...,-33.920313,18.421813,Lomé,محمود Mhmood,5.0,😉👍🏻
4,https://www.google.com/maps/place/Brothers+Sup...,Brothers Supermarket,4.0,9.0,Supermarket,· In-store shopping,"Corner House, Sea Point, Cape Town, 8005, Sout...",6:30AM–11:30PM,6:30AM–11:30PM,6:30AM–11:30PM,...,6:30AM–11:30PM,6:30AM–11:30PM,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,-33.912937,18.391938,Lomé,ANTHONY.capetown,5.0,Amazingly friendly staff that make the neighbo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1927,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,...,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,9.047319,38.747162,AddisAbaba,anteneh alemu,5.0,"Amazing fresh vegetable, my favorite one from ..."
1928,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,...,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,9.047319,38.747162,AddisAbaba,Philipous Girma,4.0,Good service and Good food
1929,https://www.google.com/maps/place/Ready+superm...,Ready supermarket | ረዲ ሱፐር ማርኬት,4.5,11.0,Shopping mall,,"XQJF+2X8, Addis Ababa, Ethiopia",6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,...,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,,https://lh5.googleusercontent.com/p/AF1QipN5aq...,8.980050,38.774962,AddisAbaba,Josy Mulatu,5.0,They have good communication with their consum...
1930,https://www.google.com/maps/place/Ready+superm...,Ready supermarket | ረዲ ሱፐር ማርኬት,4.5,11.0,Shopping mall,,"XQJF+2X8, Addis Ababa, Ethiopia",6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,...,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,,https://lh5.googleusercontent.com/p/AF1QipN5aq...,8.980050,38.774962,AddisAbaba,Yared Fekede,5.0,Best Super Market in Bole Michael


In [65]:
print(reviews.columns)
print(missinglat.columns)

Index(['placeurl', 'placename', 'overall_rating', 'reviewcount', 'category',
       'attributes', 'address', 'wednesday', 'thursday', 'friday', 'saturday',
       'sunday', 'monday', 'tuesday', 'info', 'imgurl', 'latitude',
       'longitude', 'query', 'reviewer', 'rating', 'review'],
      dtype='object')


In [118]:
print(reviews.shape)
print(missinglat.shape)
# same number of columns

(517, 26)
(775, 26)


In [119]:
reviews.dtypes

placeurl          object
title             object
rating           float64
reviewcount      float64
category          object
attributes        object
address           object
pluscode          object
website           object
phonenumber       object
wednesday         object
thursday          object
friday            object
saturday          object
sunday            object
monday            object
tuesday           object
currentstatus     object
info              object
imgurl            object
isclaimed           bool
latitude         float64
longitude        float64
query             object
timestamp         object
reviews           object
dtype: object

In [120]:
missinglat.dtypes
# same dtypes of columns

placeurl          object
title             object
rating           float64
reviewcount      float64
category          object
attributes        object
address           object
pluscode          object
website           object
phonenumber       object
wednesday         object
thursday          object
friday            object
saturday          object
sunday            object
monday            object
tuesday           object
currentstatus     object
info              object
imgurl            object
isclaimed           bool
latitude         float64
longitude        float64
query             object
timestamp         object
reviews           object
dtype: object

In [121]:
# append the two datasets
reviews = reviews.append(missinglat)

In [122]:
reviews

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,pluscode,website,phonenumber,...,tuesday,currentstatus,info,imgurl,isclaimed,latitude,longitude,query,timestamp,reviews
0,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...","3F82+MP Cape Town, South Africa",,+27 21 447 4626,...,7:30AM–8PM,Open now 7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,False,-33.933313,18.451812,Lomé,2021-10-20T10:15:46.195Z,[{'title': 'Hair by Michelle and Beauty Parlou...
1,https://www.google.com/maps/place/R+%26+K+Supe...,R & K Supermarket,,,Grocery store,· In-store shopping,"Salt River, Cape Town, 7925, South Africa","3FC7+C2 Cape Town, South Africa",,,...,,,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,False,-33.928938,18.462562,Lomé,2021-10-20T10:15:50.950Z,[]
2,https://www.google.com/maps/place/Family+Super...,Family Supermarket,,,Grocery store,· In-store shopping,"Cape Town City Centre, Cape Town, 8000, South ...","3CJF+6W Cape Town, South Africa",,,...,,,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipNre6...,False,-33.919438,18.424813,Lomé,2021-10-20T10:15:55.919Z,[]
3,https://www.google.com/maps/place/City+Cafe+%2...,City Cafe & Supermarkets,5.0,1.0,Grocery store,· In-store shopping,"Shop 6A, 31A Long St, Cape Town City Centre, C...","3CHC+VP Cape Town, South Africa",,+27 84 867 6872,...,8AM–7PM,Open now 8AM–7PM,Hours or services may differ,https://lh6.googleusercontent.com/proxy/yEvgHH...,False,-33.920313,18.421813,Lomé,2021-10-20T10:16:00.191Z,"[{'title': 'محمود Mhmood', 'rating': '5', 'bod..."
4,https://www.google.com/maps/place/Best+Price+S...,Best Price Supermarket,,,Grocery store,,"Salt River, Cape Town, 7925, South Africa","3FC6+J5 Cape Town, South Africa",,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,False,-33.928438,18.460437,Lomé,2021-10-20T10:16:05.964Z,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
770,https://www.google.com/maps/place/Besh+Gebeya+...,Besh Gebeya - Ayat Branch,4.6,7.0,Supermarket,· In-store shopping,"Ayat Zone, 2, Addis Ababa, Ethiopia","2V8C+7H Addis Ababa, Ethiopia",,+251 11 551 4236,...,8:30a.m.–7:30p.m.,Open now 8:30a.m.–7:30p.m.,,https://www.gstatic.com/images/icons/material/...,False,9.015688,38.871437,AddisAbaba,2021-10-21T07:39:23.555Z,"[{'title': 'Mel Bek', 'rating': '5', 'body': ""..."
771,https://www.google.com/maps/place/Mekdi+superm...,Mekdi supermarket,,,Supermarket,· In-store shopping,"micheal, Bole, Ethiopia","XQMF+4G Addis Ababa, Ethiopia",,+251 92 366 6706,...,Open 24 hours,Open now Open 24 hours,,https://maps.gstatic.com/tactile/pane/default_...,False,8.982812,38.773813,AddisAbaba,2021-10-21T07:39:28.866Z,[]
772,https://www.google.com/maps/place/Bashir+Super...,Bashir Supermarket,,,Store,· In-store shopping,"Unnamed Road, Addis Ababa, Ethiopia","WPWQ+V3 Addis Ababa, Ethiopia",,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,False,8.947188,38.737687,AddisAbaba,2021-10-21T07:39:39.853Z,[]
773,https://www.google.com/maps/place/Lege+Tafo+le...,Lege Tafo lege Dadi Supermarket,,,Market,,"Gwala Dale Denibel, Ethiopia","3V7P+82 Addis Ababa, Ethiopia",,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,False,9.063312,38.885062,AddisAbaba,2021-10-21T07:39:53.268Z,[]


In [123]:
# reset index
reviews = reviews.reset_index(drop=True)
reviews

Unnamed: 0,placeurl,title,rating,reviewcount,category,attributes,address,pluscode,website,phonenumber,...,tuesday,currentstatus,info,imgurl,isclaimed,latitude,longitude,query,timestamp,reviews
0,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...","3F82+MP Cape Town, South Africa",,+27 21 447 4626,...,7:30AM–8PM,Open now 7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,False,-33.933313,18.451812,Lomé,2021-10-20T10:15:46.195Z,[{'title': 'Hair by Michelle and Beauty Parlou...
1,https://www.google.com/maps/place/R+%26+K+Supe...,R & K Supermarket,,,Grocery store,· In-store shopping,"Salt River, Cape Town, 7925, South Africa","3FC7+C2 Cape Town, South Africa",,,...,,,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,False,-33.928938,18.462562,Lomé,2021-10-20T10:15:50.950Z,[]
2,https://www.google.com/maps/place/Family+Super...,Family Supermarket,,,Grocery store,· In-store shopping,"Cape Town City Centre, Cape Town, 8000, South ...","3CJF+6W Cape Town, South Africa",,,...,,,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipNre6...,False,-33.919438,18.424813,Lomé,2021-10-20T10:15:55.919Z,[]
3,https://www.google.com/maps/place/City+Cafe+%2...,City Cafe & Supermarkets,5.0,1.0,Grocery store,· In-store shopping,"Shop 6A, 31A Long St, Cape Town City Centre, C...","3CHC+VP Cape Town, South Africa",,+27 84 867 6872,...,8AM–7PM,Open now 8AM–7PM,Hours or services may differ,https://lh6.googleusercontent.com/proxy/yEvgHH...,False,-33.920313,18.421813,Lomé,2021-10-20T10:16:00.191Z,"[{'title': 'محمود Mhmood', 'rating': '5', 'bod..."
4,https://www.google.com/maps/place/Best+Price+S...,Best Price Supermarket,,,Grocery store,,"Salt River, Cape Town, 7925, South Africa","3FC6+J5 Cape Town, South Africa",,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,False,-33.928438,18.460437,Lomé,2021-10-20T10:16:05.964Z,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1287,https://www.google.com/maps/place/Besh+Gebeya+...,Besh Gebeya - Ayat Branch,4.6,7.0,Supermarket,· In-store shopping,"Ayat Zone, 2, Addis Ababa, Ethiopia","2V8C+7H Addis Ababa, Ethiopia",,+251 11 551 4236,...,8:30a.m.–7:30p.m.,Open now 8:30a.m.–7:30p.m.,,https://www.gstatic.com/images/icons/material/...,False,9.015688,38.871437,AddisAbaba,2021-10-21T07:39:23.555Z,"[{'title': 'Mel Bek', 'rating': '5', 'body': ""..."
1288,https://www.google.com/maps/place/Mekdi+superm...,Mekdi supermarket,,,Supermarket,· In-store shopping,"micheal, Bole, Ethiopia","XQMF+4G Addis Ababa, Ethiopia",,+251 92 366 6706,...,Open 24 hours,Open now Open 24 hours,,https://maps.gstatic.com/tactile/pane/default_...,False,8.982812,38.773813,AddisAbaba,2021-10-21T07:39:28.866Z,[]
1289,https://www.google.com/maps/place/Bashir+Super...,Bashir Supermarket,,,Store,· In-store shopping,"Unnamed Road, Addis Ababa, Ethiopia","WPWQ+V3 Addis Ababa, Ethiopia",,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,False,8.947188,38.737687,AddisAbaba,2021-10-21T07:39:39.853Z,[]
1290,https://www.google.com/maps/place/Lege+Tafo+le...,Lege Tafo lege Dadi Supermarket,,,Market,,"Gwala Dale Denibel, Ethiopia","3V7P+82 Addis Ababa, Ethiopia",,,...,,,,https://maps.gstatic.com/tactile/pane/default_...,False,9.063312,38.885062,AddisAbaba,2021-10-21T07:39:53.268Z,[]


## Clean the reviews column

In [124]:
reviews['reviews'][1291]

'[]'

In [125]:
# How many reviews are empty in the new dataframe ?
(reviews['reviews']=='[]').sum()

142

In [129]:
# drop empty reviews rows
reviews = reviews[reviews['reviews'] != '[]']

In [136]:
# other kind of null values ?
reviews['reviews'].isna().sum()

517

In [141]:
# drop null values from the review column
reviews = reviews[reviews['reviews'].notna()]

In [151]:
# distribution of reviews over cities
reviews['query'].value_counts()

Nairobi       221
Lomé          148
CapeTown      119
Yaoundé        90
AddisAbaba     55
Name: query, dtype: int64

In [155]:
# are there duplicate rows ?
print(reviews['placeurl'].size)
print(reviews['placeurl'].drop_duplicates().size)

633
582


In [156]:
print(reviews['reviews'].size)
print(reviews['reviews'].drop_duplicates().size)
# yes

633
581


In [157]:
reviews = reviews.drop_duplicates(subset = 'reviews', keep = 'first')

In [163]:
reviews.shape

(581, 26)

In [None]:
reviews.columns

In [None]:
# drop useless columns
reviews=reviews.drop([#'Unnamed: 0',
                                'website',
                                'pluscode',
                                'phonenumber',
                                'currentstatus',
                                'isclaimed',
                                'timestamp'
                               ], axis=1)

In [None]:
reviews['reviews'][0]

In [None]:
# split reviews column in individual row for each review
new_df = reviews['reviews'].str.split('}, ').apply(pd.Series, 1).stack()
# match index with medium df index
new_df.index = new_df.index.droplevel(-1)
# name to join
new_df.name = 'reviews'

In [None]:
#delete original reviews column
del reviews['reviews']

In [None]:
# import new expanded column in dataframe
reviews = reviews.join(new_df)
reviews = reviews.reset_index(drop=True)
reviews.head()

In [None]:
# reset index
reviews = reviews.drop(['index'], axis=1)
reviews.head()

In [None]:
# rename 'title' column in 'placename' and 'rating' in 'overall_rating'
reviews.rename(columns={'rating': 'overall_rating',
                       'title': 'placename'
                      }, inplace=True, errors='raise')

In [None]:
reviews['reviews'][0]

In [None]:
# split the reviews column in title, rating , body
review_split = pd.concat([reviews['placeurl'], reviews['reviews'].str.split('\', ', expand=True)], axis=1)
review_split.head()

In [None]:
#check nulls
review_split.isna().sum()

In [None]:
# where are the errors ?
review_split['rating'].value_counts()

#### the 13 missing reviews are in the rating column. probably due to an error at the split

In [None]:
# get rows containing 'body' in the rating column
missing_split = review_split[review_split['rating'].str.contains("body")]
missing_split

In [None]:
# check syntax
missing_split['reviewer'][267]

In [None]:
# drop empty review column
del missing_split['review']

In [None]:
# split the missing splits on specific syntax
missing_split = pd.concat([missing_split['placeurl'], missing_split['reviewer'].str.split('", \'', expand=True), missing_split['2']], axis=1)
missing_split

In [None]:
# replace missing review_split rows with the ones corrected
indices = [267,392,544,589,610,909,980,1162,1344,1347,1355,1371,1548]

for i in indices:
    review_split.loc[i,'reviewer'] = missing_split.loc[i,0]
    review_split.loc[i,'rating'] = missing_split.loc[i,1]
    review_split.loc[i,'review'] = missing_split.loc[i,'rating']

In [None]:
review_split.loc[267]

In [None]:
review_split.isna().sum()

In [None]:
# rename newly created columns
review_split.rename(columns={0: 'reviewer',
                        1: 'rating',
                        2: 'review'
                      }, inplace=True, errors='raise')
review_split.head()

In [None]:
print(review_split['reviewer'][0])
print(review_split['rating'][0])
print(review_split['review'][0])

In [None]:
# clean the columns
review_split['reviewer'] = review_split['reviewer'].str.replace("{'title': '",'')
review_split['reviewer'] = review_split['reviewer'].str.replace("[",'')
review_split['reviewer'] = review_split['reviewer'].str.replace("{'title': ",'')
review_split['reviewer'] = review_split['reviewer'].str.replace('"','')
review_split['rating'] = review_split['rating'].str.replace("'rating': '",'')
review_split['rating'] = review_split['rating'].str.replace("rating': '",'')
review_split['review'] = review_split['review'].str.replace("'body': ",'')
review_split['review'] = review_split['review'].str.replace("]",'')
review_split['review'] = review_split['review'].str.replace("}",'')
review_split['review'] = review_split['review'].str.replace("'",'')
review_split['review'] = review_split['review'].str.replace('"','')

In [None]:
# check clean result
print(review_split['reviewer'][0])
print(review_split['rating'][0])
print(review_split['review'][0])

In [None]:
# check clean result on the corrected ones
print(review_split['reviewer'][909])
print(review_split['rating'][909])
print(review_split['review'][909])

In [None]:
# merge with main reviews file
reviews = pd.merge(
    left=reviews,
    right=review_split,
    on='placeurl',
    how='left'
)
reviews.head()

In [50]:
#drop duplicate
reviews = reviews.drop_duplicates(keep='first',inplace=False)
# reset index
reviews = reviews.reset_index(drop=True)
reviews

Unnamed: 0,placeurl,placename,overall_rating,reviewcount,category,attributes,address,wednesday,thursday,friday,...,monday,tuesday,info,imgurl,latitude,longitude,query,reviewer,rating,review
0,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...",7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,...,7:30AM–8PM,7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,-33.933313,18.451812,Lomé,Hair by Michelle and Beauty Parlour phala,5.0,"This is a go to supermarket, they go out of th..."
1,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...",7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,...,7:30AM–8PM,7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,-33.933313,18.451812,Lomé,Jayan Smart,5.0,Simply the best corner store. Just make sure y...
2,https://www.google.com/maps/place/S.E.+Superma...,S.E. Supermarkets,4.7,68.0,Supermarket,· In-store shopping,"52 Roodebloem Rd, Woodstock, Cape Town, 7915, ...",7:30AM–8PM,7:30AM–8PM,7:30AM–8PM,...,7:30AM–8PM,7:30AM–8PM,Hours or services may differ,https://lh5.googleusercontent.com/p/AF1QipOJZx...,-33.933313,18.451812,Lomé,Loretta Chan-Sam,5.0,If youre looking for a consumer-friendly par e...
3,https://www.google.com/maps/place/City+Cafe+%2...,City Cafe & Supermarkets,5.0,1.0,Grocery store,· In-store shopping,"Shop 6A, 31A Long St, Cape Town City Centre, C...",8AM–7PM,8AM–7PM,8AM–7PM,...,8AM–7PM,8AM–7PM,Hours or services may differ,https://lh6.googleusercontent.com/proxy/yEvgHH...,-33.920313,18.421813,Lomé,محمود Mhmood,5.0,😉👍🏻
4,https://www.google.com/maps/place/Brothers+Sup...,Brothers Supermarket,4.0,9.0,Supermarket,· In-store shopping,"Corner House, Sea Point, Cape Town, 8005, Sout...",6:30AM–11:30PM,6:30AM–11:30PM,6:30AM–11:30PM,...,6:30AM–11:30PM,6:30AM–11:30PM,Hours or services may differ,https://www.gstatic.com/images/icons/material/...,-33.912938,18.391938,Lomé,ANTHONY.capetown,5.0,Amazingly friendly staff that make the neighbo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1927,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,...,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,9.047319,38.747162,AddisAbaba,anteneh alemu,5,"Amazing fresh vegetable, my favorite one from ..."
1928,https://www.google.com/maps/place/Wina+Superma...,Wina Supermarket And Juice Shola Branch,4.5,4.0,Restaurant,· Takeout,"2PWW+WVF, Addis Ababa, Ethiopia",8a.m.–9p.m.,8a.m.–9p.m.,8a.m.–9p.m.,...,8a.m.–9p.m.,8a.m.–9p.m.,,https://lh5.googleusercontent.com/p/AF1QipPSlz...,9.047319,38.747162,AddisAbaba,Philipous Girma,4,Good service and Good food
1929,https://www.google.com/maps/place/Ready+superm...,Ready supermarket | ረዲ ሱፐር ማርኬት,4.5,11.0,Shopping mall,,"XQJF+2X8, Addis Ababa, Ethiopia",6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,...,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,,https://lh5.googleusercontent.com/p/AF1QipN5aq...,8.980050,38.774962,AddisAbaba,Josy Mulatu,5,They have good communication with their consum...
1930,https://www.google.com/maps/place/Ready+superm...,Ready supermarket | ረዲ ሱፐር ማርኬት,4.5,11.0,Shopping mall,,"XQJF+2X8, Addis Ababa, Ethiopia",6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,...,6:30a.m.–1:30p.m.,6:30a.m.–1:30p.m.,,https://lh5.googleusercontent.com/p/AF1QipN5aq...,8.980050,38.774962,AddisAbaba,Yared Fekede,5,Best Super Market in Bole Michael


In [74]:
# check nulls
reviews.isna().sum()

placeurl             0
placename            0
overall_rating       1
reviewcount          1
category             0
attributes         144
address              0
wednesday          261
thursday           261
friday             261
saturday           261
sunday             261
monday             261
tuesday            261
info              1676
imgurl               0
latitude             0
longitude            0
query                0
reviewer             0
rating             173
review             233
dtype: int64

In [72]:
reviews['rating'].value_counts()

5.0    853
4.0    476
3.0    326
2.0    104
Name: rating, dtype: int64

In [73]:
reviews['rating'].dtype

dtype('float64')

In [111]:
# save reviews file
#reviews.to_csv('GM_reviews.csv', index=False)

# EDA on reviews

In [105]:
reviews = pd.read_csv('/Users/juliencarbonnell/Desktop/African Urban Research/datasets/GM_reviews.csv')

In [108]:
reviews.columns

Index(['placeurl', 'placename', 'overall_rating', 'reviewcount', 'category',
       'attributes', 'address', 'wednesday', 'thursday', 'friday', 'saturday',
       'sunday', 'monday', 'tuesday', 'info', 'imgurl', 'latitude',
       'longitude', 'query', 'reviewer', 'rating', 'review'],
      dtype='object')

In [118]:
# how many supermarkets do we have in each city
reviews.drop_duplicates(subset='placeurl',keep='first').groupby(['city'])['city'].count().sort_values(ascending=False)

city
Nairobi        185
Cape Town      148
Yaoundé        141
Addis Ababa    101
Lomé            90
Name: city, dtype: int64

In [119]:
# statistical overview of the supermarkets of our dataset
reviews.drop_duplicates(subset='placeurl',keep='first').describe()

Unnamed: 0,overall_rating,reviewcount,latitude,longitude,rating
count,664.0,664.0,665.0,665.0,616.0
mean,3.894578,267.347892,-4.882276,22.849013,4.207792
std,0.687423,578.387359,15.935734,13.724335,0.902933
min,1.0,1.0,-34.031813,0.742687,2.0
25%,3.6,3.0,-1.326426,11.518937,4.0
50%,3.95,18.5,-0.776813,18.469812,4.0
75%,4.3,240.0,6.134563,36.891438,5.0
max,5.0,5092.0,9.070813,38.969437,5.0


In [120]:
# mean of overall_ratings in each city
reviews.drop_duplicates(subset='placeurl',keep='first').groupby(['city'])['overall_rating'].mean().sort_values(ascending=False)

city
Cape Town      4.029730
Nairobi        3.936757
Addis Ababa    3.929703
Lomé           3.852222
Yaoundé        3.697857
Name: overall_rating, dtype: float64

In [135]:
# mean number of reviews for each city
reviews.drop_duplicates(subset='placeurl',keep='first').groupby(['city'])['reviewcount'].mean().sort_values(ascending=False)

city
Nairobi        499.545946
Cape Town      325.587838
Yaoundé        174.907143
Lomé           127.355556
Addis Ababa      9.574257
Name: reviewcount, dtype: float64

In [138]:
# counts the categories of supermarkets in total
reviews.drop_duplicates(subset='placeurl',keep='first')['category'].value_counts()

Supermarket                         431
Grocery store                       179
Shopping mall                         8
Chinese supermarket                   6
Shop supermarket furniture store      5
Convenience store                     4
Market                                4
Discount supermarket                  3
General store                         3
Home goods store                      2
Clothing store                        2
Travel agency                         1
Hardware store                        1
Hotel                                 1
Greengrocer                           1
Make-up artist                        1
E-commerce service                    1
Hypermarket                           1
Money transfer service                1
Natural goods store                   1
Variety store                         1
Butcher shop                          1
Fresh food market                     1
Fruit and vegetable processing        1
Coffee shop                           1


In [146]:
# counts the categories of supermarkets for each city
reviews.drop_duplicates(subset='placeurl',keep='first').groupby(['city'])['category'].value_counts()

city         category                        
Addis Ababa  Supermarket                          64
             Grocery store                        31
             Shopping mall                         4
             Discount supermarket                  1
             Restaurant                            1
Cape Town    Grocery store                        66
             Supermarket                          65
             Convenience store                     4
             Chinese supermarket                   2
             Butcher shop                          1
             Clothing store                        1
             Coffee shop                           1
             Fresh food market                     1
             Fruit and vegetable processing        1
             Fruit and vegetable store             1
             General store                         1
             Home goods store                      1
             Natural goods store                   1


#### how many of them propose a Delivery service ?

In [155]:
# delete null attributes
delivery = reviews[reviews['attributes'].notna()]

In [156]:
# extract attributes mentionning delivery
delivery = delivery[delivery['attributes'].str.contains("Delivery")]

In [161]:
# dropduplicate and groupby city
delivery.drop_duplicates(subset='placeurl',keep='first').groupby(['city'])['city'].value_counts()

city         city       
Addis Ababa  Addis Ababa      1
Cape Town    Cape Town       30
Lomé         Lomé            19
Nairobi      Nairobi        123
Yaoundé      Yaoundé         29
Name: city, dtype: int64

In [128]:
# how many supermarkets are closed on each day?
print('mondays closed :', reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['monday']=="Closed"].shape[0])
print('tuesdays closed :', reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['tuesday']=="Closed"].shape[0])
print('wednesdays closed :', reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['wednesday']=="Closed"].shape[0])
print('thursdays closed :', reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['thursday']=="Closed"].shape[0])
print('fridays closed :', reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['friday']=="Closed"].shape[0])
print('saturdays closed :', reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['saturday']=="Closed"].shape[0])
print('sundays closed :', reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['sunday']=="Closed"].shape[0])

mondays closed : 7
tuesdays closed : 7
wednesdays closed : 9
thursdays closed : 9
fridays closed : 8
saturdays closed : 9
sundays closed : 102


In [129]:
# check number of closed supermarkets on sunday by city
reviews.drop_duplicates(subset='placeurl',keep='first')[reviews['sunday']=="Closed"].groupby('city')['city'].count().sort_values(ascending=False)

city
Nairobi        41
Yaoundé        22
Lomé           16
Cape Town      14
Addis Ababa     9
Name: city, dtype: int64

In [134]:
# in terms of percentage ?
print(round(14*100/148,2),'% of the supermarkets in Cape Town are closed on sundays')
print(round(16*100/90,2),'% of the supermarkets in Lomé are closed on sundays')
print(round(22*100/141,2),'% of the supermarkets in Yaoundé are closed on sundays')
print(round(41*100/185,2),'% of the supermarkets in Nairobi are closed on sundays')
print(round(9*100/101,2),'% of the supermarkets in Addis Ababa are closed on sundays')

9.46 % of the supermarkets in Cape Town are closed on sundays
17.78 % of the supermarkets in Lomé are closed on sundays
15.6 % of the supermarkets in Yaoundé are closed on sundays
22.16 % of the supermarkets in Nairobi are closed on sundays
8.91 % of the supermarkets in Addis Ababa are closed on sundays


In [None]:
#cluster lat/long to check if there are spatial correlation of ratings


# Map supermarkets on cities

In [166]:
# enter city coordinates
CapeTown=[-33.9249,18.4241]
Lomé=[6.1256,1.2254]
Yaoundé=[3.8480,11.5021]
Nairobi=[-1.2921,36.8219]
AddisAbaba=[8.9806,38.7578]

# generate maps
map_capetown = folium.Map(location=CapeTown,zoom_start=12)
map_lome = folium.Map(location=Lomé,zoom_start=12)
map_yaounde = folium.Map(location=Yaoundé,zoom_start=12)
map_nairobi = folium.Map(location=Nairobi,zoom_start=12)
map_addisababa = folium.Map(location=AddisAbaba,zoom_start=12)

In [164]:
# map supermarkets
for lat, long in zip(reviews['latitude'],
                    reviews['longitude']):
    folium.CircleMarker([lat, long],
                        radius=2, 
                        stroke=False, 
                        fill_color='blue',
                        fill_opacity=0.5).add_to(map_capetown)

map_capetown

In [169]:
# map supermarkets ratings
for lat, long, rating in zip(reviews['latitude'],
                             reviews['longitude'],
                             reviews['overall_rating']):
    folium.Marker(location=[lat,long],
                  icon=folium.DivIcon(
                      html=rating,
                  )).add_to(map_capetown)

map_capetown

In [178]:
# select non null dataset
ratings = reviews[reviews['overall_rating'].notna()]

# heatmap the ratings ()
heatmap = folium.Map(location=CapeTown,zoom_start=12)

locs = zip(ratings['latitude'],
           ratings['longitude'],
           ratings['overall_rating'])

HeatMap(locs, radius=15, blur=5).add_to(heatmap)

heatmap

### technically working but hardly readable. seems like having errors