### Gathering Nearby Locations

In [81]:
# Collect Required Features
# Scrap google maps for each feature
# Preprocess and Convert to a dataframe
# Extract and return counts of each feature 

In [82]:
pip install googlemaps


Note: you may need to restart the kernel to use updated packages.


In [83]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import pandas as pd
import googlemaps
import numpy as np
import json
import os


In [84]:
def initializeGoogleMaps(API_KEY):
    return googlemaps.Client(API_KEY)

In [85]:
def cleanData(data_list):
    # Inner Functions for dataframe cleaning
    def lat(row):
        if 'location' in row:
            return row['location']['lat']
        return None
    def lng(row):
        if 'location' in row:
            return row['location']['lng']
        return None
    def check(row, col):
        if col in row:
            return 1
        return 0

    df = pd.DataFrame(data_list)    
    if len(data_list)>0:
        cleaned_df = df.drop(['photos', 'icon', 'scope', 'permanently_closed', 'opening_hours', 'reference', 'plus_code', 'user_ratings_total', 'vicinity'], axis=1, errors='ignore')    
        cleaned_df['latitude'] = cleaned_df['geometry'].apply(lat)
        cleaned_df['longitude'] = cleaned_df['geometry'].apply(lng)
        cleaned_df = cleaned_df.drop(['geometry', 'types'],axis=1, errors='ignore')
        print('Before dropping duplicates, the size was,',len(cleaned_df))
        cleaned_df.sort_values("place_id", inplace=True)
        cleaned_df.drop_duplicates(inplace=True)
        print('After dropping duplicates, the size was,',len(cleaned_df))
        return cleaned_df
    else:
        return df

In [86]:
def extractFeatures(options, latitude, longitude):
    dataframes = []
    for opt in options:
        data_list = []
        params = {
            'location':[latitude, longitude],
            'radius':1000,
            'type': opt
        }

        first_page = gmaps.places_nearby(**params)
        
        second_page = {'results':[]}
        third_page = {'results':[]}
        
        #Fetching the second page if there is any
        if 'next_page_token' in first_page:
            params['page_token'] = first_page['next_page_token']
            time.sleep(2)
            second_page = gmaps.places_nearby(**params)
        
        #Fetching the third or last page if there is any
        if 'next_page_token' in second_page:
            params['page_token'] = second_page['next_page_token']
            time.sleep(2)
            third_page = gmaps.places_nearby(**params)
        
        data_list.extend(first_page['results'])
        data_list.extend(second_page['results'])
        data_list.extend(third_page['results'])
        cleaned_data = cleanData(data_list)
        if cleaned_data.shape[0]>0:
            dataframes.append((opt, cleaned_data))
    return dataframes

In [87]:
def collectNearbyPlaces(current_features, latitude, longitude):
    dataframes = extractFeatures(current_features, latitude, longitude)
    nearby_places = {}
    for key in current_features:
        nearby_places[key] = 0
    for key, df in dataframes:
        if 'permanently_closed' in df.columns:
            df = df[df['permanently_closed']!=True]
        nearby_places[key]0 = df.shape[0]
    return nearby_places

### Extract Population

In [None]:
# Read Population JSON file
# Iterate over each subcity to find one that contains the coordinate within it
# Return the population of males and females when found

In [91]:
# Import and read Population file
def importPopulationFile(location = '../Data/Population_per_subcity.json'):
    script_dir = os.path.dirname("__file__")
    subcity_population_json = os.path.join(script_dir, location )
    subcity_population = open(subcity_population_json,)
    subcity_population_data = json.load(subcity_population)
    return subcity_population_data

In [89]:
#Given a coordinate, using the population json return the subcity population for the coordinate
def determineSubcityAndAddPopulation(latitude, longitude):
    subcity_population_data = importPopulationFile()

    for subcity in subcity_population_data:
        point = Point(latitude,longitude)
        polygon = Polygon([(i,j) for i, j in subcity_population_data[subcity]['coordinates']])
        if polygon.contains(point):
            total_males = total_females = 0
            for age in subcity_population_data[subcity]['population']:
              total_males += subcity_population_data[subcity]['population'][age]['Males']  
              total_females += subcity_population_data[subcity]['population'][age]['Females']  
            return [total_males, total_females]
    return [0,0]

### Merging Both of them Together

In [92]:
def extractData(current_features, latitude, longitude):
    nearby_places = collectNearbyPlaces(current_features, latitude, longitude)
    males, females = determineSubcityAndAddPopulation(latitude, longitude)
    nearby_places['Males'] = males
    nearby_places['Females'] = females
    return nearby_places

gmaps = initializeGoogleMaps('')

current_features = ['atm','bank','bus_station','church','gas_station','hospital','mosque','pharmacy','restaurant','school','train_station']
latitude, longitude = 8.9806, 38.7578

extracted_data = extractData(current_features, latitude, longitude)
print(extracted_data)

Before dropping duplicates, the size was, 2
After dropping duplicates, the size was, 2
Before dropping duplicates, the size was, 49
After dropping duplicates, the size was, 49
Before dropping duplicates, the size was, 1
After dropping duplicates, the size was, 1
Before dropping duplicates, the size was, 39
After dropping duplicates, the size was, 39
Before dropping duplicates, the size was, 5
After dropping duplicates, the size was, 5
Before dropping duplicates, the size was, 41
After dropping duplicates, the size was, 41
Before dropping duplicates, the size was, 5
After dropping duplicates, the size was, 5
Before dropping duplicates, the size was, 26
After dropping duplicates, the size was, 26
Before dropping duplicates, the size was, 60
After dropping duplicates, the size was, 60
Before dropping duplicates, the size was, 57
After dropping duplicates, the size was, 57
{'atm': [2], 'bank': [49], 'bus_station': [1], 'church': [39], 'gas_station': [5], 'hospital': [41], 'mosque': [5], 'p