## 1.0 Introduction

The main purpose of this function is to automatically fill in Brazilian regions and states from the city and postal codes (CEP).

## 1.1 Import modules

In [2]:
# From the module
from gumly import geo_location

#Others
import pandas as pd

## 1.2 Creating a DataFrame

In [2]:
d = {'Customer': [1, 2, 3, 4],
     'City' : ['São Paulo', 'Sao Paulo', 'sao paulo', 'São Pauol']} # With a typo on the last entry
df = pd.DataFrame(data=d)

df

Unnamed: 0,Customer,City
0,1,São Paulo
1,2,Sao Paulo
2,3,sao paulo
3,4,São Pauol


## 1.3 Filling the region from the city

In [3]:
df['Region'] = geo_location.city_to_region(df, 'City')
df[['Customer','City','Region']]



Unnamed: 0,Customer,City,Region
0,1,São Paulo,Sudeste
1,2,Sao Paulo,Sudeste
2,3,sao paulo,Sudeste
3,4,São Pauol,


In [4]:
df['Microregion'] = geo_location.city_to_microregion(df, 'City')
df[['Customer','City','Microregion']]



Unnamed: 0,Customer,City,Microregion
0,1,São Paulo,São Paulo
1,2,Sao Paulo,São Paulo
2,3,sao paulo,São Paulo
3,4,São Pauol,


In [5]:
df['Mesoregion'] = geo_location.city_to_mesoregion(df, 'City')
df[['Customer','City','Mesoregion']]



Unnamed: 0,Customer,City,Mesoregion
0,1,São Paulo,Metropolitana de São Paulo
1,2,Sao Paulo,Metropolitana de São Paulo
2,3,sao paulo,Metropolitana de São Paulo
3,4,São Pauol,


In [6]:
df['Immediate_region'] = geo_location.city_to_immediate_region(df, 'City')
df[['Customer','City','Immediate_region']]



Unnamed: 0,Customer,City,Immediate_region
0,1,São Paulo,São Paulo
1,2,Sao Paulo,São Paulo
2,3,sao paulo,São Paulo
3,4,São Pauol,


In [8]:
df['Intermediary_region'] = geo_location.city_to_intermediary_region(df,'City')
df[['Customer','City','Intermediary_region']]

Unnamed: 0,Customer,City,Intermediary_region
0,1,São Paulo,São Paulo
1,2,Sao Paulo,São Paulo
2,3,sao paulo,São Paulo
3,4,São Pauol,


## 1.4 Filling the region from the city and UF to avoid duplication

In [9]:
d = {'Customer': [1, 2, 3],
     'City' : ['Bom jesus', 'Bom Jesus', 'Bom Jesus'], 'UF':['RS',"PI",'RN']}
dfuf = pd.DataFrame(data=d) 
dfuf

Unnamed: 0,Customer,City,UF
0,1,Bom jesus,RS
1,2,Bom Jesus,PI
2,3,Bom Jesus,RN


In [10]:
dfuf['Region'] = geo_location.city_to_region(dfuf, 'City', 'UF')
dfuf

Unnamed: 0,Customer,City,UF,Region
0,1,Bom jesus,RS,Sul
1,2,Bom Jesus,PI,Nordeste
2,3,Bom Jesus,RN,Nordeste


In [11]:
dfuf['Microregion'] = geo_location.city_to_microregion(dfuf, 'City', 'UF')
dfuf[['Customer','City','UF','Microregion']]

Unnamed: 0,Customer,City,UF,Microregion
0,1,Bom jesus,RS,Vacaria
1,2,Bom Jesus,PI,Alto Médio Gurguéia
2,3,Bom Jesus,RN,Agreste Potiguar


In [12]:
dfuf['Mesoregion'] = geo_location.city_to_mesoregion(dfuf, 'City', 'UF')
dfuf[['Customer','City','UF','Mesoregion']]

Unnamed: 0,Customer,City,UF,Mesoregion
0,1,Bom jesus,RS,Nordeste Rio-grandense
1,2,Bom Jesus,PI,Sudoeste Piauiense
2,3,Bom Jesus,RN,Agreste Potiguar


In [14]:
dfuf['Imediate_region'] = geo_location.city_to_immediate_region(dfuf, 'City', 'UF')
dfuf[['Customer','City','UF','Imediate_region']]

Unnamed: 0,Customer,City,UF,Imediate_region
0,1,Bom jesus,RS,Vacaria
1,2,Bom Jesus,PI,Bom Jesus
2,3,Bom Jesus,RN,Natal


In [15]:
dfuf['Intermediary_region'] = geo_location.city_to_intermediary_region(dfuf, 'City', 'UF')
dfuf[['Customer','City','UF','Intermediary_region']]

Unnamed: 0,Customer,City,UF,Intermediary_region
0,1,Bom jesus,RS,Caxias do Sul
1,2,Bom Jesus,PI,Corrente - Bom Jesus
2,3,Bom Jesus,RN,Natal


## 1.5 Filling the region from the state

In [16]:
dfstate = {'Customer': [1, 2, 3, 4],
     'State' : ['Ceará', 'Ceara', 'ceara', 'ceaara']}

dfstate = pd.DataFrame(data=dfstate)

dfstate

Unnamed: 0,Customer,State
0,1,Ceará
1,2,Ceara
2,3,ceara
3,4,ceaara


In [17]:
dfstate['state_to_region'] = geo_location.state_to_region(dfstate, 'State')
dfstate

Unnamed: 0,Customer,State,state_to_region
0,1,Ceará,Nordeste
1,2,Ceara,Nordeste
2,3,ceara,Nordeste
3,4,ceaara,


## 1.6 Creating a DataFrame with postal codes (CEPs)

In [19]:
dfcep = {'Customer': [1, 2, 3],
     'CEP' : ['03033-070', '03033070', '03033']}
dfcep = pd.DataFrame(data=dfcep)
dfcep

Unnamed: 0,Customer,CEP
0,1,03033-070
1,2,03033070
2,3,03033


## 1.9 Filling the state from the CEP

In [20]:
dfcep['State'] = geo_location.cep_to_state(dfcep, 'CEP')
dfcep

Unnamed: 0,Customer,CEP,State
0,1,03033-070,São Paulo
1,2,03033070,São Paulo
2,3,03033,São Paulo


## 1.10 Filling the region from the CEP

In [21]:
dfcep['Region'] = geo_location.cep_to_region(dfcep, 'CEP')
dfcep[['Customer','CEP','Region']]

Unnamed: 0,Customer,CEP,Region
0,1,03033-070,Sudeste
1,2,03033070,Sudeste
2,3,03033,Sudeste


## 1.11  Filling the region from the UF

In [22]:
dfuf = {'Customer': [1, 2, 3, 4],
     'UF' : ['RS', 'rs', 'rS', 'rgs']}

dfuf = pd.DataFrame(data=dfuf)

dfuf

Unnamed: 0,Customer,UF
0,1,RS
1,2,rs
2,3,rS
3,4,rgs


In [23]:
dfuf['Region']= geo_location.uf_to_region(dfuf, 'UF')
dfuf

Unnamed: 0,Customer,UF,Region
0,1,RS,Sul
1,2,rs,Sul
2,3,rS,Sul
3,4,rgs,


## 1.12  Filling City from IBGE ID

In [24]:
dfid = {'Customer': [1, 2],
     'ID' : ['5300108', '30']}

dfid = pd.DataFrame(data=dfid)

dfid

Unnamed: 0,Customer,ID
0,1,5300108
1,2,30


In [25]:
dfid['city']= geo_location.ibge_city(dfid, 'ID')
dfid

Unnamed: 0,Customer,ID,city
0,1,5300108,Brasília
1,2,30,


## 1.13  Filling IBGE ID from City

In [26]:
d = {'Customer': [1, 2, 3],
     'City' : ['Bom jesus', 'Bom Jesus', 'Bom Jesus'], 'UF':['RS',"PI",'RN']}
dfuf = pd.DataFrame(data=d) 
dfuf

Unnamed: 0,Customer,City,UF
0,1,Bom jesus,RS
1,2,Bom Jesus,PI
2,3,Bom Jesus,RN


In [27]:
dfuf['IBGE_ID'] = geo_location.city_ibge(dfuf, 'City', 'UF')
dfuf

Unnamed: 0,Customer,City,UF,IBGE_ID
0,1,Bom jesus,RS,4302303
1,2,Bom Jesus,PI,2201903
2,3,Bom Jesus,RN,2401701


## 1.14 Filling UF from State/ state from UF

In [8]:
d = {'Customer': [1, 2, 3],
     'state' : ['Rio Grande do Sul', 'sao paulo', 'paraná']}
dfs = pd.DataFrame(data=d) 
dfs

Unnamed: 0,Customer,state
0,1,Rio Grande do Sul
1,2,sao paulo
2,3,paraná


In [9]:
dfs['UF'] = geo_location.state_to_uf(dfs, 'state')
dfs

Unnamed: 0,Customer,state,UF
0,1,Rio Grande do Sul,RS
1,2,sao paulo,SP
2,3,paraná,PR


In [10]:
dfs['state'] = geo_location.uf_to_state(dfs, 'UF')
dfs

Unnamed: 0,Customer,state,UF
0,1,Rio Grande do Sul,RS
1,2,São Paulo,SP
2,3,Paraná,PR


## 2.0 Conclusion and library advantages

This implementation is a simple approach to quickly fill brazilian states and regions in dataframes where this information is not present. It can accept complete and incomplete postal codes (CEPs) and city names without the correct accentuation or capitalization but it cannot handle typos.

## References

[ibge library](https://pypi.org/project/ibge/)

[ibge website](https://www.ibge.gov.br/)

[Everything about CEP](https://www.correios.com.br/enviar/precisa-de-ajuda/tudo-sobre-cep)
