In [None]:
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from functools import partial
import multiprocessing
from shapely.geometry import shape
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from datetime import datetime
import requests
import math

In [None]:
pip install openpyxl

In [None]:
!pip install gdown

In [None]:
!gdown https://drive.google.com/uc?id=16xU5EnQ2mNrgPykMCeOOv-5xTNFSeBIm

In [None]:
!gdown https://drive.google.com/uc?id=1Zc-8_W8g4b1R7cfO8EgBowyjQh05Zf66

In [None]:
!unzip -qq ./sao_paulo_area_consolidated.zip -d ./ 

## Config

In [None]:
# Matplotlib Config
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

# Pandas and numpy config
pd.set_option('display.float_format', lambda x: '%.3f' % x)
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Load

In [None]:
london_area_polygons = json.load(open('../input/mlprojectgroupdata/london_lsoa.json','r'))
sao_palo_polygons =  json.load(open('../input/mlprojectgroupdata/sao_paulo_od_zones_2017.json','r'))

In [None]:
sao_paulo_school = pd.read_csv('./sao_paulo_school_processed.csv')

In [None]:
dfdata = pd.read_csv('./sao_paulo_population_processed.csv')

In [None]:
dfdata.area_code

In [None]:
sao_paulo_pop.population_density_end

## Prepare boundary dicts

In [None]:
london_poly = {}

for indx, polygon in enumerate(london_area_polygons['features']):
    shapely_object = shape(polygon['geometry'])
    london_poly.update({polygon['properties']['DISPLAY_NAME'] : shapely_object})

In [None]:
sao_paulo_poly = {}

for indx, polygon in enumerate(sao_palo_polygons['features']):
    shapely_object = shape(polygon['geometry'])
    sao_paulo_poly.update({polygon['properties']['DISPLAY_NAME'] : shapely_object})

In [None]:
dfdata.Nome_distrito

In [None]:
common = set(sao_paulo_poly.keys()) - set(list(dfdata.area_code))
common

In [None]:
dfdata.head()

In [None]:
def is_inside_area_pool(df, polygon_dict):
    """
    Returns the area for which the lat long belongs
    """
    df['area_code'] = 'Not Found'
    for index, row in df.iterrows():
        for key,value in polygon_dict.items():
            if value.contains(Point(row['Lon'], row['Lat'])):
                df.loc[index,'area_code'] = key
    return df

In [None]:
import requests
baseUrl = 'https://api.opencagedata.com/geocode/v1/json?q=PLACENAME&key=99bbf8cb82504867a2faa28b14b5494e'
dfdata['Lat'] = 'None'
dfdata['Lon'] = 'None'
for index, loc_name in dfdata["LSOAName"].iteritems():
    print(index)
    url = baseUrl.replace('PLACENAME',loc_name)
    response = requests.get(url)
    dfdata['Lat'][index] = response.json()['results'][0]['geometry']['lat']
    dfdata['Lon'][index] = response.json()['results'][0]['geometry']['lng']

In [None]:
dfdata.head()

## Process lat long columns

In [None]:
# london_2019_jan_data_filtered[['start_lat', 'start_lon']] = london_2019_jan_data_filtered['starting_node_lat_lon'].str.split(',', 1, expand=True)
# london_2019_jan_data_filtered[['end_lat', 'end_lon']] = london_2019_jan_data_filtered['ending_node_lat_lon'].str.split(',', 1, expand=True)
london_2019_population_data['Lat'] = london_2019_population_data['Lat'].astype('float32')
london_2019_population_data['Lon'] = london_2019_population_data['Lon'].astype('float32')

In [None]:
sao_paulo_2019_pop_data.head()

## Set area parallely

In [None]:
start = datetime.now()
print("Started Now")
final_df = is_inside_area_pool(dfdata,london_poly)
print("Ended in {} seconds".format(datetime.now() - start))

In [None]:
final_df.head()

In [None]:
final_df[final_df.area_code != 'Not Found'].count()

In [None]:
final_df_copy.head()

In [None]:
final_df_copy = final_df.loc[final_df.area_code != 'Not Found']

In [None]:
final_df_copy.to_csv("./london_pop_final.csv")

In [None]:
final_df.count()

# Sao Paulo Pop Resolution based on euclidean distance


In [None]:
UberAreas = list(sao_paulo_poly.keys())

In [None]:
UberAreasdf = pd.DataFrame (UberAreas, columns = ['area_names'])

In [None]:
UberAreasdf['Lat'] = 'None'
UberAreasdf['Lon'] = 'None'
baseUrl = 'https://api.opencagedata.com/geocode/v1/json?q=PLACENAME&key=0e0b659ff593436b9e1e4faed5aadc97'
for index, area in UberAreasdf['area_names'].iteritems():
    url = baseUrl.replace('PLACENAME',area+',São Paulo')
    print(url)
    response = requests.get(url)
    print(response.json()['results'][0]['geometry']['lat'],':',response.json()['results'][0]['geometry']['lng'])
    UberAreasdf['Lat'][index] = response.json()['results'][0]['geometry']['lat']
    UberAreasdf['Lon'][index] = response.json()['results'][0]['geometry']['lng']

In [None]:
def get_distance(dim1,dim2):
    lat1 = dim1.split(',')[0]
    lon1 = dim1.split(',')[1]
    lat2 = dim2.split(',')[0]
    lon2 = dim2.split(',')[1]
    return math.sqrt(((float(lat2) - float(lat1))*(float(lat2) - float(lat1))) + ((float(lon2) - float(lon1))*(float(lon2) - float(lon1))))
    

In [None]:
UberAreasdf['Population'] = 'NA'
for index, row in UberAreasdf.iterrows():
    print(index)
    dist = 999999
    population = 0
    rowDim = str(row['Lat'])+','+str(row['Lon'])
    for index, popRow in dfdata.iterrows():
        tempDist = get_distance(str(popRow['Lat'])+','+str(popRow['Lon']),rowDim)
        if tempDist < dist:
            dist = tempDist
            population = popRow['Pop_2020']/((popRow['Pop_2010']/popRow['Pop/ha_2010'])*0.01)
            print(tempDist,',',population)
    row['Population'] = population
        

In [None]:
UberAreasdf[UberAreasdf.Population != 'NA']