In [25]:
!ls ../Data

atm.csv		 mosque.csv		      shopping_mall.csv
bank.csv	 pharmacy.csv		      SubCity-Coordinates.json
bus_station.csv  Population_per_subcity.json  SubCity-Populations.json
church.csv	 restaurants.csv	      supermarket.csv
gas_station.csv  school.csv		      train_station.csv
hospital.csv	 scripts


In [26]:
import pandas as pd

In [27]:
nearby_places_csvs = {
    'atms' : '../Data/atm.csv',
    'banks' : '../Data/bank.csv',
    'bus_stations' : '../Data/bus_station.csv',
    'churches' : '../Data/church.csv',
    'gas_stations' : '../Data/gas_station.csv',
    'hospitals' : '../Data/hospital.csv',
    'mosques' : '../Data/mosque.csv',
    'pharmacies' : '../Data/pharmacy.csv',
    'restaurants' : '../Data/restaurants.csv',
    'schools' : '../Data/school.csv',
    'train_stations' : '../Data/train_station.csv'
}
shopping_malls_csv = '../Data/shopping_mall.csv'
supermarkets_csv = '../Data/supermarket.csv'

In [28]:
nearby_places = pd.DataFrame()
for key in nearby_places_csvs:
    df = pd.read_csv(nearby_places_csvs[key])
    if 'permanently_closed' in df.columns:
        df = df[df['permanently_closed']!=True]
    df = df[['business_status', 'name', 'latitude', 'longitude']]
    df['type'] = key
    nearby_places = nearby_places.append(df)
supermarkets = pd.read_csv(supermarkets_csv)
supermarkets = supermarkets.append(pd.read_csv(shopping_malls_csv))
supermarkets = supermarkets[['business_status', 'name', 'latitude', 'longitude', 'rating']]

In [29]:
supermarkets['rating'].fillna('-', inplace=True)

In [30]:
for key in nearby_places_csvs:
    supermarkets[key] = 0

In [31]:
supermarkets.head()

Unnamed: 0,business_status,name,latitude,longitude,rating,atms,banks,bus_stations,churches,gas_stations,hospitals,mosques,pharmacies,restaurants,schools,train_stations
0,OPERATIONAL,"AllMart Plus, Ayat Neighborhood, Addis Ababa",9.020171,38.875108,-,0,0,0,0,0,0,0,0,0,0,0
1,OPERATIONAL,Nahuta SuperMarket,9.022502,38.785853,5,0,0,0,0,0,0,0,0,0,0,0
2,OPERATIONAL,Afran Mini Market,9.057363,38.724341,-,0,0,0,0,0,0,0,0,0,0,0
3,OPERATIONAL,251 Store ET Gerji store,9.001777,38.810458,-,0,0,0,0,0,0,0,0,0,0,0
4,OPERATIONAL,Selam Super Market,9.011199,38.796551,-,0,0,0,0,0,0,0,0,0,0,0


In [32]:
from math import sin, cos, sqrt, atan2, radians

def calculateDistance(lat1, lon1, lat2, lon2):
    # approximate radius of earth in km
    R = 6373.0
    lat1, lon1 = radians(lat1), radians(lon1)
    lat2, lon2 = radians(lat2), radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance

In [33]:
def collectAndFindNearbyPlaces(row, radius):
    lat, lon = row.latitude, row.longitude
    counter = {}
    for key in nearby_places_csvs:
        counter[key] = 0
    for i in nearby_places.itertuples(index=True, name='Pandas'):
        if calculateDistance(lat, lon, i.latitude, i.longitude)<=radius:
            counter[i.type] += 1
    return pd.Series(list(counter.values()) ,index=list(counter))

In [34]:
#1km radius
supermarkets[list(nearby_places_csvs)] = supermarkets.apply(collectAndFindNearbyPlaces, radius=1, axis=1)

In [35]:
supermarkets.describe()

Unnamed: 0,latitude,longitude,atms,banks,bus_stations,churches,gas_stations,hospitals,mosques,pharmacies,restaurants,schools,train_stations
count,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0,1392.0
mean,9.003146,38.765421,3.720546,26.387931,0.456178,11.777299,2.802443,9.524425,4.133621,8.293822,86.214799,21.18319,0.033046
std,0.034234,0.046179,5.246634,20.084912,1.205465,7.548595,2.543747,9.043674,3.967259,5.750306,61.134592,13.380197,0.178821
min,8.83577,38.662204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,8.988069,38.736376,0.0,9.0,0.0,7.0,1.0,3.0,1.0,4.0,32.0,13.0,0.0
50%,9.005729,38.759672,1.0,21.5,0.0,10.0,2.0,7.0,3.0,8.0,73.0,20.0,0.0
75%,9.024217,38.788254,5.0,42.0,0.0,15.0,5.0,14.0,5.0,11.0,133.0,26.0,0.0
max,9.102839,38.920221,21.0,82.0,8.0,38.0,11.0,45.0,20.0,26.0,257.0,79.0,1.0


In [36]:
supermarkets.drop_duplicates(inplace = True, subset=['name', 'latitude', 'longitude'])

In [37]:
supermarkets.shape

(1388, 16)

In [38]:
import json
import os

script_dir = os.path.dirname("__file__")
subcity_population_json = os.path.join(script_dir, '../Data/Population_per_subcity.json')
subcity_population = open(subcity_population_json,)
subcity_population_data = json.load(subcity_population)

In [39]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import numpy as np

def determineSubcityAndAddPopulation(row):
    lat, lon = row.latitude, row.longitude
    for subcity in subcity_population_data:
        point = Point(lat,lon)
        polygon = Polygon([(i,j) for i, j in subcity_population_data[subcity]['coordinates']])
        if polygon.contains(point):
            total_males = total_females = 0
            for age in subcity_population_data[subcity]['population']:
              total_males += subcity_population_data[subcity]['population'][age]['Males']  
              total_females += subcity_population_data[subcity]['population'][age]['Females']  
              # total += subcity_population_data[subcity]['population'][age]['Total']  
            return pd.Series([total_males, total_females] ,index=['Males', 'Females']) 
    return pd.Series([0,0] ,index=['Males', 'Females']) 




In [40]:
supermarkets['Males'] = 0
supermarkets['Females'] = 0

In [41]:
supermarkets[['Males', 'Females']] = supermarkets.apply(determineSubcityAndAddPopulation, axis=1)

In [42]:
supermarkets.head()

Unnamed: 0,business_status,name,latitude,longitude,rating,atms,banks,bus_stations,churches,gas_stations,hospitals,mosques,pharmacies,restaurants,schools,train_stations,Males,Females
0,OPERATIONAL,"AllMart Plus, Ayat Neighborhood, Addis Ababa",9.020171,38.875108,-,1,9,0,7,3,3,0,2,28,4,0,145225,163770
1,OPERATIONAL,Nahuta SuperMarket,9.022502,38.785853,5,8,36,0,15,1,18,4,9,105,28,0,161592,185072
2,OPERATIONAL,Afran Mini Market,9.057363,38.724341,-,0,15,0,9,2,2,9,6,54,20,0,129396,138228
3,OPERATIONAL,251 Store ET Gerji store,9.001777,38.810458,-,1,27,0,11,1,6,0,13,153,27,0,145225,163770
4,OPERATIONAL,Selam Super Market,9.011199,38.796551,-,1,34,0,22,6,18,1,9,110,21,0,145225,163770


In [43]:
supermarkets.to_csv('supermarkets_cleaned_with_popn.csv')