In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import psycopg2, os, geopandas

from shapely import wkt
from shapely.geometry import MultiPolygon, Polygon

# Establish connection and create its cursor
try: 
    conn = psycopg2.connect(f"host={os.environ['AURORA_POSTGRES_HOST']} dbname={os.environ['AURORA_POSTGRES_DATABASE']} user={os.environ['AURORA_POSTGRES_USERNAME']} password={os.environ['AURORA_POSTGRES_PWD']}")
    cur = conn.cursor()
except psycopg2.Error as e: 
    print("Error: Could not make connection to the Postgres database")
    print(e)

In [2]:
dt = pd.read_csv("data/cep_coordinates/census_tracks_to_ceps.csv")
dt['id_munic'] = dt.apply(lambda row: ('0'*(2-len(str(int(row['cod_uf']))))+str(int(row['cod_uf'])))+('0'*(5-len(str(int(row['cod_municipio']))))+str(int(row['cod_municipio']))), axis=1)
dt.head()


Unnamed: 0.1,Unnamed: 0,cod_uf,cod_municipio,cod_distrito,cod_subdistrito,cod_setor,CEP,address_count,id_munic
0,0,11,15,5,0,1,76954000.0,392,1100015
1,1,11,15,5,0,2,76954000.0,334,1100015
2,2,11,15,5,0,3,76954000.0,255,1100015
3,3,11,15,5,0,4,76954000.0,111,1100015
4,4,11,15,5,0,5,76954000.0,424,1100015


In [3]:
# Collect municipality names and append them
cur.execute("""SELECT uf, cod_uf, cod_munic, nome_do_municipio FROM ibge.municipios""")
municipios = pd.DataFrame(cur.fetchall(), columns=[desc[0] for desc in cur.description])
municipios['id_munic'] = municipios.apply(lambda row: row['cod_uf']+row['cod_munic'], axis=1)
municipios['nome_do_municipio'] = municipios.apply(lambda row: row['uf']+', '+row['nome_do_municipio'], axis=1)
names_dict = {k:v for k,v in zip(municipios.id_munic, municipios.nome_do_municipio)}

dt['municipio'] = dt['id_munic'].apply(lambda x: names_dict[x])
dt.head()


Unnamed: 0.1,Unnamed: 0,cod_uf,cod_municipio,cod_distrito,cod_subdistrito,cod_setor,CEP,address_count,id_munic,municipio
0,0,11,15,5,0,1,76954000.0,392,1100015,"RO, Alta Floresta D'Oeste"
1,1,11,15,5,0,2,76954000.0,334,1100015,"RO, Alta Floresta D'Oeste"
2,2,11,15,5,0,3,76954000.0,255,1100015,"RO, Alta Floresta D'Oeste"
3,3,11,15,5,0,4,76954000.0,111,1100015,"RO, Alta Floresta D'Oeste"
4,4,11,15,5,0,5,76954000.0,424,1100015,"RO, Alta Floresta D'Oeste"


In [6]:
dt[dt['CEP']==5615000].groupby('municipio').address_count.sum().sort_index()

municipio
PE, Moreilândia     87
SP, São Paulo      211
Name: address_count, dtype: int64

In [7]:
dt[dt['CEP']==6240000].groupby('municipio').address_count.sum().sort_index()

municipio
CE, Amontada        1
CE, Beberibe       99
CE, Camocim        63
CE, Granja         10
CE, Moraújo         4
PA, Almeirim       20
PI, Piracuruca      1
SP, Osasco        558
Name: address_count, dtype: int64

In [8]:
dt[dt['CEP']==4464000].groupby('municipio').address_count.sum().sort_index()

municipio
BA, Riachão do Jacuípe    379
SP, São Paulo             341
Name: address_count, dtype: int64

In [9]:
dt[dt['CEP']==4464000]

Unnamed: 0.1,Unnamed: 0,cod_uf,cod_municipio,cod_distrito,cod_subdistrito,cod_setor,CEP,address_count,id_munic,municipio
276079,276079,29,26301,5,0,23,4464000.0,105,2926301,"BA, Riachão do Jacuípe"
276081,276081,29,26301,5,0,24,4464000.0,19,2926301,"BA, Riachão do Jacuípe"
276084,276084,29,26301,5,0,25,4464000.0,18,2926301,"BA, Riachão do Jacuípe"
276096,276096,29,26301,5,0,35,4464000.0,4,2926301,"BA, Riachão do Jacuípe"
276100,276100,29,26301,5,0,37,4464000.0,85,2926301,"BA, Riachão do Jacuípe"
276105,276105,29,26301,5,0,40,4464000.0,34,2926301,"BA, Riachão do Jacuípe"
276111,276111,29,26301,5,0,44,4464000.0,20,2926301,"BA, Riachão do Jacuípe"
276114,276114,29,26301,5,0,46,4464000.0,4,2926301,"BA, Riachão do Jacuípe"
276124,276124,29,26301,5,0,54,4464000.0,5,2926301,"BA, Riachão do Jacuípe"
276129,276129,29,26301,7,0,3,4464000.0,85,2926301,"BA, Riachão do Jacuípe"


In [18]:
dt2 = pd.read_csv("data/cep_coordinates/all.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [11]:
dt[dt['CEP']==9635000].groupby('municipio').address_count.sum().sort_index()

municipio
RS, Amaral Ferrador          117
SP, São Bernardo do Campo    395
Name: address_count, dtype: int64

In [15]:
dt.groupby('CEP').cod_uf.nunique().value_counts(normalize=True)

1     0.995269
2     0.004287
3     0.000322
4     0.000068
5     0.000021
9     0.000005
6     0.000005
7     0.000005
8     0.000004
11    0.000004
27    0.000002
12    0.000002
15    0.000002
10    0.000002
19    0.000002
24    0.000002
Name: cod_uf, dtype: float64

In [17]:
dt.groupby('CEP').id_munic.nunique().value_counts(normalize=True)

1       0.988529
2       0.008662
3       0.001663
4       0.000555
5       0.000277
6       0.000123
7       0.000060
8       0.000046
9       0.000025
10      0.000014
11      0.000011
25      0.000004
14      0.000004
16      0.000004
65      0.000002
13      0.000002
23      0.000002
19      0.000002
15      0.000002
18      0.000002
3146    0.000002
29      0.000002
60      0.000002
63      0.000002
12      0.000002
26      0.000002
272     0.000002
Name: id_munic, dtype: float64