In [8]:
import pandas as pd
import geopandas as gpd
import geobr

In [9]:
# Read the census data from CSV and geographic sectors from GPKG
df = pd.read_csv('../data/censo2022/raw/censo.csv', sep=";")
setores = gpd.read_file('../data/censo2022/raw/setores.gpkg')
setores = setores.simplify(tolerance=0.001, preserve_topology=True)

# Dictionary mapping census column codes to race categories
# V01317-V01321 represent counts of people by race in each census sector
race_dict = {
    'CD_SETOR': 'CD_SETOR',  # Census sector ID
    'V01317': 'branca',      # White
    'V01318': 'preta',       # Black 
    'V01319': 'amarela',     # Asian
    'V01320': 'parda',       # Brown/Mixed
    'V01321': 'indigena'     # Indigenous
}

# Select only the race columns we need and rename them
df = df[race_dict.keys()]
df.columns = race_dict.values()

# Calculate population proportions for each race category
race_columns = ['branca', 'preta', 'amarela', 'parda', 'indigena']

# Replace 'X' with '0' to allow numeric calculations
for col in race_columns:
    df[col] = df[col].replace('X', '0').astype(float)

# Calculate total population for each setor
df['populacao'] = df[race_columns].sum(axis=1)

# Convert setor ID to string to match df index
setores['CD_SETOR'] = setores['CD_SETOR'].astype(str)
df['CD_SETOR'] = df['CD_SETOR'].astype(str)

# Merge demographic data with spatial data
gdf = setores.merge(df, on='CD_SETOR', how='left')

# Select columns for races dataframe and rename to match docs specification
gdf = gdf[['geometry', 'CD_UF', 'CD_SETOR', 'CD_MUN', 'NM_MUN', 'populacao'] + race_columns]
gdf = gdf.rename(columns={
    'CD_UF': 'sigla_uf', 
    'CD_SETOR': 'id_setor_censitario',
    'CD_MUN': 'id_municipio'
})

# Create mapping of state codes to abbreviations
state_codes = {
    11: 'RO', 12: 'AC', 13: 'AM', 14: 'RR', 15: 'PA', 16: 'AP', 17: 'TO',
    21: 'MA', 22: 'PI', 23: 'CE', 24: 'RN', 25: 'PB', 26: 'PE', 27: 'AL', 28: 'SE', 29: 'BA',
    31: 'MG', 32: 'ES', 33: 'RJ', 35: 'SP',
    41: 'PR', 42: 'SC', 43: 'RS',
    50: 'MS', 51: 'MT', 52: 'GO', 53: 'DF'
}

# Replace state codes with abbreviations using the state_codes mapping
gdf['sigla_uf'] = gdf['sigla_uf'].astype(int).map(state_codes)



  df = pd.read_csv('../data/censo2022/raw/censo.csv', sep=";")


DataSourceError: sqlite3_open(../data/censo2022/raw/setores.gpkg) failed: unable to open database file

In [3]:
municipalities = geobr.read_municipality()

# Create municipality level GeoJSON from municipalities dataframe
municipalities = municipalities.rename(columns={
    'code_muni': 'id_municipio',
    'name_muni': 'municipio'
})

municipalities = municipalities[['id_municipio', 'municipio', 'geometry']]

# Merge demographic data aggregated by municipality
mun_agg = gdf.groupby(['sigla_uf', 'id_municipio']).agg({
    'branca': 'sum',
    'preta': 'sum', 
    'amarela': 'sum',
    'parda': 'sum',
    'indigena': 'sum',
    'populacao': 'sum'
}).reset_index()

# Convert IDs to same type (float) to ensure proper merging
municipalities['id_municipio'] = municipalities['id_municipio'].astype(float)
mun_agg['id_municipio'] = mun_agg['id_municipio'].astype(float)


municipalities = municipalities.merge(mun_agg, on='id_municipio', how='left')

In [6]:
# Save individual state files
for state in municipalities['sigla_uf'].unique():

    if state != 'RJ':
        continue
    # Filter and save municipality data by state
    state_mun = municipalities[municipalities['sigla_uf'] == state]
    state_mun.to_file(f'../data/censo2022/output/tiles/race/municipality_{state}.geojson', driver='GeoJSON')
    
    # Filter and save census tract data by state
    state_tract = gdf[gdf['sigla_uf'] == state]
    state_tract.to_file(f'../data/censo2022/output/tiles/race/census_tract_{state}.geojson', driver='GeoJSON')

In [5]:
municipalities.to_file(f'../data/censo2022/output/tiles/race/municipality_BR.geojson', driver='GeoJSON')
gdf.to_file(f'../data/censo2022/output/tiles/race/census_tract_BR.geojson', driver='GeoJSON')

In [20]:
# Save full datasets and individual state files
municipalities.to_file('../data/censo2022/output/tiles/race/municipality.geojson', driver='GeoJSON')
gdf.to_file('../data/censo2022/output/tiles/race/census_tract.geojson', driver='GeoJSON')




: 

: 