In [45]:
import pandas as pd
import numpy as np

df = pd.read_csv('../data/tt_georeference.csv')

In [46]:
def get_shape(df):
    print(df.shape)
    return df

from IPython.display import display

with pd.option_context('max_rows', 2500):
    display(
        (df
         .pipe(get_shape)
         .drop_duplicates()
         .pipe(get_shape)
         .rename(columns={'cod_provincia_reside':'prov_code', 'provincia_reside':'prov_name', 
                  'cod_canton_reside':'cant_code', 'canton_reside':'cant_name', 
                  'cod_parroquia_reside':'parr_code', 'parroquia_reside':'parr_name'})
         .assign(prov_name=lambda df_: df_.prov_name.str.title().str.strip(), 
                 cant_name=lambda df_: df_.cant_name.str.title().str.strip(), 
                 parr_name=lambda df_: df_.parr_name.str.title().str.strip() \
                                          .replace('Atocha \x96 Ficoa', 'Atocha Ficoa'),
                 urbana=lambda df_: (df_.parr_code % 100 < 51))
         .drop_duplicates()
         .sort_values(by=['prov_code', 'cant_code', 'parr_code'])
        )
    )

(539535, 6)
(1289, 6)


Unnamed: 0,prov_code,prov_name,cant_code,cant_name,parr_code,parr_name,urbana
58,1.0,Azuay,101.0,Cuenca,10101.0,Bellavista,True
1921,1.0,Azuay,101.0,Cuenca,10102.0,Cañaribamba,True
389,1.0,Azuay,101.0,Cuenca,10103.0,El Batán,True
2814,1.0,Azuay,101.0,Cuenca,10104.0,El Sagrario,True
302,1.0,Azuay,101.0,Cuenca,10105.0,El Vecino,True
5801,1.0,Azuay,101.0,Cuenca,10106.0,Gil Ramírez Dávalos,True
974,1.0,Azuay,101.0,Cuenca,10107.0,Huaynacápac,True
586,1.0,Azuay,101.0,Cuenca,10108.0,Machángara,True
330,1.0,Azuay,101.0,Cuenca,10109.0,Monay,True
2350,1.0,Azuay,101.0,Cuenca,10110.0,San Blas,True


### Save Dictionary/DataFrame at current stage

In [47]:
gdf = \
(df
 .pipe(get_shape)
 .drop_duplicates()
 .pipe(get_shape)
 .rename(columns={'cod_provincia_reside':'prov_code', 'provincia_reside':'prov_name', 
          'cod_canton_reside':'cant_code', 'canton_reside':'cant_name', 
          'cod_parroquia_reside':'parr_code', 'parroquia_reside':'parr_name'})
 .assign(prov_name=lambda df_: df_.prov_name.str.title().str.strip(), 
         cant_name=lambda df_: df_.cant_name.str.title().str.strip(), 
         parr_name=lambda df_: df_.parr_name.str.title().str.strip() \
                                  .replace('Atocha \x96 Ficoa', 'Atocha Ficoa'), # such a change maight be required elsewhere as well
         urbana=lambda df_: (df_.parr_code % 100 < 51))
 .drop_duplicates()
 .sort_values(by=['prov_code', 'cant_code', 'parr_code'])
)

gdf.to_csv('../data/tt_georeference_fin.csv', index=False, encoding='utf-8')

(539535, 6)
(1289, 6)


### Leveraging Google Maps API

In [48]:
import googlemaps

gmaps = googlemaps.Client(key='AIzaSyAI0BRSOW7mf98IHvHYse58nlGip1nGRwE')

In [49]:
def coords_append(df):   
    coords = []
    for indx, row in df.iterrows():
        
        administrative_area = row[1]
        locality = row[3]
        parish = row[5]
        
        # print('*'*100)        
        # print(f'@ parish :{parish}\nlocality :{locality}\nadministrative area :{administrative_area}')
        # print('*'*100)  
        
        try:        
            maps_out = gmaps.geocode(parish, 
                                     components={'country':'EC', 
                                                 'administrative_area':administrative_area, 
                                                 'locality':locality})

            geometry = maps_out[0]['geometry']
            latitude = geometry['location']['lat']
            longitude = geometry['location']['lng']
            # northeast_lat = geometry['bounds']['northeast']['lat']
            # northeast_lng = geometry['bounds']['northeast']['lng']
            # southwest_lat = geometry['bounds']['southwest']['lat']
            # southwest_lng = geometry['bounds']['southwest']['lng']
            
            coords.append((
                latitude,
                longitude
            ))
            
        except:
            print('*'*100)        
            print(f'@ parish :{parish}\nlocality :{locality}\nadministrative area :{administrative_area}')
            print('*'*100) 
            
            coords.append((np.nan, np.nan))
            
            
    coords_df = pd.DataFrame(data = {'latitude':[t[0] for t in coords], 
                                     'longitude':[t[1] for t in coords]}, 
                             index = df.index)
        
    return pd.concat([df, coords_df], axis = 1)
            
        

gdf = gdf.pipe(coords_append)

****************************************************************************************************
@ parish :Arquitecto Sixto Duran Ballen
locality :24 De Mayo
administrative area :Manabi
****************************************************************************************************
[(-2.8812927, -79.0051685), (-2.905653, -78.9882083), (-2.8962842, -79.0334288), (-2.8927134, -79.000042), (-2.8811271, -78.9882083), (-2.8924251, -79.01506049999999), (-2.9155786, -78.99951539999999), (-2.8869941, -78.9613486), (-2.8993951, -78.97548619999999), (-2.9007928, -78.9999998), (-2.8816779, -79.0447306), (-2.9011305, -79.01550510000001), (-2.890825, -78.97689989999999), (-2.9158573, -79.02777739999999), (-2.8713204, -78.9882083), (-2.9001285, -79.00589649999999), (-2.9841089, -79.2225576), (-3.0902132, -79.0079949), (-2.9465255, -79.4028639), (-2.7175265, -79.0306032), (-2.813526, -79.00347099999999), (-2.8663861, -78.885453), (-2.767106, -79.397369), (-2.8530354, -78.8722341), (-2.7707342

In [58]:
# manually introduce coordinates for Arquitecto Sixto Duran Ballen
gdf = \
(gdf
 .assign(latitude=gdf.latitude.mask(gdf.parr_name == 'Arquitecto Sixto Duran Ballen', -1.4768859), 
         longitude=gdf.longitude.mask(gdf.parr_name == 'Arquitecto Sixto Duran Ballen', -80.3480802))
)

gdf.to_csv('../data/tt_georeference_fin.csv', index=False, encoding='utf-8')

In [57]:
gdf.sample(10).T

Unnamed: 0,49115,935,19124,2568,1,443,921,1062,5,1499
prov_code,22.0,18.0,14.0,10.0,17.0,18.0,1.0,18.0,17.0,6.0
prov_name,Orellana,Tungurahua,Morona Santiago,Imbabura,Pichincha,Tungurahua,Azuay,Tungurahua,Pichincha,Chimborazo
cant_code,2201.0,1805.0,1402.0,1005.0,1703.0,1806.0,101.0,1801.0,1701.0,601.0
cant_name,Orellana,Patate,Gualaquiza,Pimampiro,Mejia,Quero,Cuenca,Ambato,Distrito Metropolitano De Quito,Riobamba
parr_code,220155.0,180553.0,140256.0,100550.0,170353.0,180652.0,10167.0,180162.0,170155.0,60152.0
parr_name,El Eden,Sucre (Cab. En Sucre-Patate Urcu),Nueva Tarqui,Pimampiro,Cutuglahua,Yanayacu- Mochapata (Cab. En Yanayacu),Sinincay,Quisapincha (Quizapincha),Calderon (Carapungo),Calpi
urbana,False,False,False,True,False,False,False,False,False,False
latitude,-0.547399,-1.24395,-3.457032,0.390744,-0.376612,-1.472302,-2.849508,-1.198126,-0.094017,-1.646247
longitude,-76.224737,-78.406973,-78.665917,-77.941025,-78.554728,-78.679584,-79.013647,-78.713608,-78.451007,-78.742401
