# CEP COVERED BY CURRENTLY ACCREDITED BODYSHOPS

In this notebook we create the geojson files necessary to plot the bodyshops that are currently accredited and the associated open cep codes.


In [1]:
# Imports
import pandas as pd
import numpy as np
import psycopg2, os
import geopandas
from shapely import wkt

# Establish connection and create its cursor
try: 
    conn = psycopg2.connect(f"host={os.environ['AURORA_POSTGRES_HOST']} dbname={os.environ['AURORA_POSTGRES_DATABASE']} user={os.environ['AURORA_POSTGRES_USERNAME']} password={os.environ['AURORA_POSTGRES_PWD']}")
    cur = conn.cursor()
except psycopg2.Error as e: 
    print("Error: Could not make connection to the Postgres database")
    print(e)
    

---

## 1. Tables

### 1.1. Census tracts info

In [2]:
statement = """
    SELECT
        b.cep_code,
        b.code_tract,
        a.is_open,
        b.cep_lon,
        b.cep_lat,
        b.reg_susep
    FROM (
        SELECT
            cep as cep_code,
            is_open
        FROM dbt_albertoscf.covered_ceps
        ) as a
    LEFT JOIN (
        SELECT 
            "CEP" as cep_code,
            lon as cep_lon,
            lat as cep_lat,
            code_tract,
            reg_susep
        FROM susep.geo_info
        ) as b
    ON a.cep_code=b.cep_code
"""

cur.execute(statement)
df1 = pd.DataFrame(cur.fetchall(), columns=[desc[0] for desc in cur.description]).drop_duplicates().fillna(0)
df1['is_open'] = df1['is_open'].astype(bool)
df1.head()


Unnamed: 0,cep_code,code_tract,is_open,cep_lon,cep_lat,reg_susep
0,7950000,351630905000018,False,-49.437193,-21.244713,11.0
1,7952000,351630905000067,False,-48.520578,-21.974562,11.0
2,9950000,351380105000162,True,-46.689417,-23.625896,11.0
3,6900000,351510305000026,False,-47.992651,-22.452919,11.0
4,9280000,354780905000456,True,-46.53645,-23.474929,11.0


Characterize % open by census_tract - original coordinates in `susep.geo_info` are defined by CEP code, so code tracts can be not completely open

In [3]:
df2 = pd.DataFrame(df1.groupby('code_tract').is_open.apply(lambda x: len(x[x==True])/len(x)).sort_values()).reset_index()
df2.head()


Unnamed: 0,code_tract,is_open
0,354850005000081,0.0
1,354100005000085,0.0
2,354100005000084,0.0
3,354100005000082,0.0
4,354100005000081,0.0


Now query the geometry and add it

In [4]:
cur.execute("""
    SELECT
        cd_geocodi,
        census_tract_geom_text
    FROM susep.census_tract_detail""")

df3 = pd.DataFrame(cur.fetchall(), columns=[desc[0] for desc in cur.description])
df3.head()


Unnamed: 0,cd_geocodi,census_tract_geom_text
0,350635905000052,"POLYGON((-46.119915 -23.801814999999998, -46.1..."
1,350690405000020,POLYGON((-48.309927450470795 -23.2110145643571...
2,350745605000008,"POLYGON((-48.99183378666601 -22.5689597156505,..."
3,350750610000012,"POLYGON((-48.80256796118729 -22.8453336950107,..."
4,350750615000008,"POLYGON((-48.3388833020193 -22.6240319292293, ..."


In [5]:
census_tracts_info = df2.merge(df3.rename(columns={'cd_geocodi': 'code_tract'}), on='code_tract', how='left')
census_tracts_info.head()


Unnamed: 0,code_tract,is_open,census_tract_geom_text
0,354850005000081,0.0,"POLYGON((-46.316348 -23.960863, -46.3162529999..."
1,354100005000085,0.0,"POLYGON((-46.426632999999995 -24.009279, -46.4..."
2,354100005000084,0.0,"POLYGON((-46.423955 -24.010140999999997, -46.4..."
3,354100005000082,0.0,"POLYGON((-46.424248999999996 -24.007309, -46.4..."
4,354100005000081,0.0,"POLYGON((-46.42306 -24.01125, -46.423179999999..."


Geometry column and export

In [None]:
# Convert to polygon shapes
census_tracts_info['census_tract_geom_text'] = census_tracts_info['census_tract_geom_text'].apply(wkt.loads)
census_tracts_info = geopandas.GeoDataFrame(census_tracts_info, geometry='census_tract_geom_text', crs="EPSG:4326")
census_tracts_info.head()

In [None]:
census_tracts_info.to_file("census_tracts_info.json", driver="GeoJSON") 

### 1.2. CEP codes info

In [3]:
# Add coordinates
df1['cep_code_coords'] = df1.apply(lambda row: 'POINT('+str(row['cep_lon'])+' '+str(row['cep_lat'])+')', axis=1).apply(wkt.loads)

# Filter columns and create geodf
cep_codes_info = geopandas.GeoDataFrame(df1, geometry='cep_code_coords', crs="EPSG:4326").drop(columns=['cep_lon', 'cep_lat'])
cep_codes_info.head()


Unnamed: 0,cep_code,code_tract,is_open,reg_susep,cep_code_coords
0,7950000,351630905000018,False,11.0,POINT (-49.43719 -21.24471)
1,7952000,351630905000067,False,11.0,POINT (-48.52058 -21.97456)
2,9950000,351380105000162,True,11.0,POINT (-46.68942 -23.62590)
3,6900000,351510305000026,False,11.0,POINT (-47.99265 -22.45292)
4,9280000,354780905000456,True,11.0,POINT (-46.53645 -23.47493)


In [4]:
cep_codes_info['reg_susep_missing'] = cep_codes_info.reg_susep==0
cep_codes_info['open_and_reg_susep_missing'] = (cep_codes_info.reg_susep_missing) & (cep_codes_info.is_open)
cep_codes_info.to_file("cep_codes_info.json", driver="GeoJSON")


Some minor statistics

In [5]:
cep_codes_info.reg_susep_missing.value_counts()

False    179274
True       2284
Name: reg_susep_missing, dtype: int64

In [6]:
cep_codes_info.reg_susep_missing.value_counts(normalize=True)*100

False    98.742
True      1.258
Name: reg_susep_missing, dtype: float64

In [7]:
pd.DataFrame(cep_codes_info.groupby('is_open').reg_susep_missing.apply(lambda x: x.value_counts()))

Unnamed: 0_level_0,Unnamed: 1_level_0,reg_susep_missing
is_open,Unnamed: 1_level_1,Unnamed: 2_level_1
False,False,106247
False,True,1751
True,False,73027
True,True,533


### 1.3. Bodyshops data

In [8]:
cur.execute("SELECT * FROM staging_tables.stg_accredited_bodyshops")
bodyshops_info = pd.DataFrame(cur.fetchall(), columns=[desc[0] for desc in cur.description])

# Add coordinates
bodyshops_info['bodyshops_coords'] = bodyshops_info.apply(lambda row: 'POINT('+str(row['longitude'])+' '+str(row['latitude'])+')', axis=1).apply(wkt.loads)

# Filter columns and create geodf
bodyshops_info = geopandas.GeoDataFrame(bodyshops_info, geometry='bodyshops_coords', crs="EPSG:4326")[['nome_da_oficina', 'full_address', 'cep', 'bodyshops_coords']]
bodyshops_info.head()


Unnamed: 0,nome_da_oficina,full_address,cep,bodyshops_coords
0,Scattini Funilaria,"Al,. dos Nhambiquaras 1542 Moema São Paulo SP ...",04090-003,POINT (-46.66366 -23.61122)
1,Oficina Marques (Vila Mascote),Av. Santa Catarina 2580 Vila Mascote São Paulo...,04378-200,POINT (-46.65548 -23.65626)
2,Oficina Marques (Santo Amaro),Av. Santo Amaro 4408 Brooklin São Paulo SP 045...,04556-500,POINT (-46.68457 -23.62164)
3,Ponce e Filho,R. Butiá 42 Vila Reg. Feijó São Paulo SP 03346...,03346-010,POINT (-46.57038 -23.55887)
4,Dimas Norte,Av. Gen. Ataliba Leonel 1727 Carandiru São Pau...,02033-010,POINT (-46.61268 -23.49995)


Export to geojson

In [9]:
bodyshops_info.to_file("bodyshops_info.json", driver="GeoJSON")