### Importações

In [1]:
import pandas as pd
import numpy as np

### Constantes

In [2]:
ABA_ANALISE             = "MEXER_NESSA"
DATA_PATH_CAPITAIS      = "/Users/gabrielribeirobizerril/Documents/GitHub/llm/editai_extractor_llm_based/resultados/output_capitais_pt2.xlsx"
DATA_PATH_ESTADOS       = "/Users/gabrielribeirobizerril/Documents/GitHub/llm/editai_extractor_llm_based/resultados/estados_output.xlsx"
DATA_PATH_PAINEL_PNAB   = "/Users/gabrielribeirobizerril/Documents/GitHub/llm/editai_extractor_llm_based/resultados/Adesão - Politica Nacional Aldir Blanc.xlsx"
NAME_NAN                = ["não previu", "indisponível", "none", "ok", "nan", "na"]

### Carregamento

In [3]:
def load_data(file_path: str, sheet_name: str ):
    return pd.read_excel(io=file_path, sheet_name=sheet_name)

In [None]:
# Carrega dados painel pnab
# Carrega Estados
df_pnab = load_data(file_path=DATA_PATH_PAINEL_PNAB, sheet_name="Adesao a Política")
df_pnab = df_pnab.iloc[3:].reset_index(drop=True)
# Define a primeira linha restante como novo cabeçalho
df_pnab.columns = df_pnab.iloc[0]
# Remove a linha agora redundante (que virou cabeçalho)
df_pnab = df_pnab.iloc[1:].reset_index(drop=True)
df_pnab = df_pnab.drop(index=0).reset_index(drop=True)
# Filtra ciclo 1 
df_pnab = df_pnab.loc[df_pnab["Ano de adesão"] == "Ciclo 1"]


In [115]:
# # Carrega Estados
# df_estados = load_data(file_path=DATA_PATH_ESTADOS, sheet_name="Lista de Desobedientes")
# df_estados = df_estados.iloc[2:].reset_index(drop=True)
# # Define a primeira linha restante como novo cabeçalho
# df_estados.columns = df_estados.iloc[0]
# # Remove a linha agora redundante (que virou cabeçalho)
# df_estados = df_estados.iloc[1:].reset_index(drop=True)
# # Remove coluna NaN
# df_estados = df_estados.drop(columns=[np.nan])

In [116]:
# df_estados["cotas_negras"].value_counts()

In [6]:
# Carrega Capitais
df_capitais = load_data(file_path=DATA_PATH_CAPITAIS, sheet_name=ABA_ANALISE)


### Validação de Valores

In [7]:
# Excluir última linha (no excel era uma linha de valores totais)
df_capitais = df_capitais.iloc[:-1]

In [8]:
# Corrige os valores que estão vazios
df_capitais["cotas_negras"]    = df_capitais["cotas_negras"].apply(
    lambda x: np.nan if str(x).strip().lower() in NAME_NAN else x
)

df_capitais["cotas_indigenas"] = df_capitais["cotas_indigenas"].apply(
    lambda x: np.nan if str(x).strip().lower() in NAME_NAN else x
)

df_capitais["cotas_pcd"]       = df_capitais["cotas_pcd"].apply(
    lambda x: np.nan if str(x).strip().lower() in NAME_NAN else x
)

df_capitais["cotas_negras"] = df_capitais["cotas_negras"].astype(str).str.replace(",", ".", regex=False)
df_capitais["cotas_indigenas"] = df_capitais["cotas_indigenas"].astype(str).str.replace(",", ".", regex=False)
df_capitais["cotas_pcd"] = df_capitais["cotas_pcd"].astype(str).str.replace(",", ".", regex=False)

In [9]:
# Passa coluna de cotas para decimais 
df_capitais["cotas_negras"]    = df_capitais["cotas_negras"].astype(float) / 100
df_capitais["cotas_indigenas"] = df_capitais["cotas_indigenas"].astype(float) / 100
df_capitais["cotas_pcd"]       = df_capitais["cotas_pcd"].astype(float) / 100

### Filtros

In [10]:
df_capitais = df_capitais[["uf", "pdf", "cotas_negras", "cotas_indigenas", "cotas_pcd", "vagas_totais", "valor_total"]]

In [12]:
import unicodedata
df_capitais = df_capitais.rename(columns={"uf":"ente"})
df_capitais["ente"] = df_capitais["ente"].str.strip().str.upper()
def normalizar(texto):
    if pd.isna(texto):  # trata valores NaN
        return texto
    texto = unicodedata.normalize('NFKD', texto)
    texto = ''.join(c for c in texto if not unicodedata.combining(c))  # remove acentos
    return texto.upper().strip()

# Aplicando à coluna
df_capitais["ente"] = df_capitais["ente"].apply(normalizar)

### Instrumento de Fomento

In [123]:
df_capitais["instrumento_fomento"] = df_capitais["pdf"].apply(
    lambda x: x.split("_")[2].split(".")[0]
)

In [124]:
df_capitais["ente"].value_counts()

ente
FORTALEZA         12
SALVADOR           9
MANAUS             9
JOAO PESSOA        9
SAO PAULO          9
MACEIO             8
RIO BRANCO         7
NATAL              7
VITORIA            6
RIO DE JANEIRO     6
GOIANIA            6
PORTO ALEGRE       5
FLORIANOPOLIS      5
PALMAS             5
PORTO VELHO        4
TERESINA           4
SAO LUIS           4
RECIFE             4
ARACAJU            4
CAMPO GRANDE       4
BOA VISTA          3
CURITIBA           3
MACAPA             3
CUIABA             3
BELO HORIZONTE     3
BELEM              1
Name: count, dtype: int64

### Calculo de Valor e Vagas

In [13]:
for categoria in ["negras", "indigenas", "pcd"]:
    df_capitais[f"valor_{categoria}"] = df_capitais[f"cotas_{categoria}"] * df_capitais["valor_total"]
    df_capitais[f"vagas_{categoria}"] = df_capitais[f"cotas_{categoria}"] * df_capitais["vagas_totais"]

In [14]:
df_capitais.describe()

Unnamed: 0,cotas_negras,cotas_indigenas,cotas_pcd,vagas_totais,valor_total,valor_negras,vagas_negras,valor_indigenas,vagas_indigenas,valor_pcd,vagas_pcd
count,125.0,119.0,123.0,132.0,143.0,125.0,116.0,119.0,113.0,123.0,114.0
mean,0.26928,0.112381,0.058257,47.69697,1776856.0,493171.8,12.813534,207703.5,5.429104,99919.63,2.721768
std,0.068288,0.039696,0.021869,65.167423,3183502.0,809097.6,16.618149,352284.6,7.298662,168102.1,3.372575
min,0.2,0.0833,0.04,1.0,100000.0,25000.0,0.25,10000.0,0.1,5000.0,0.05
25%,0.25,0.1,0.05,13.75,476434.0,125000.0,3.75,50000.0,1.8,26412.5,1.0
50%,0.25,0.1,0.05,29.0,800000.0,225000.0,7.5,90000.0,3.0,48039.21,1.825
75%,0.25,0.1,0.05,52.0,2000000.0,600000.0,13.875,211036.7,6.0,120000.0,3.0
max,0.5,0.25,0.2,510.0,25000000.0,6075000.0,127.5,2500000.0,51.0,1250000.0,25.5


In [127]:
df_capitais = df_capitais.rename(columns={"uf":"ente"})
df_capitais["ente"] = df_capitais["ente"].str.strip().str.upper()

In [128]:
df_capitais["ente"] = df_capitais["ente"].str.strip().str.upper()

In [129]:
capital_to_uf = {
    "RIO BRANCO": "AC",
    "MACEIO": "AL",
    "MACAPA": "AP",
    "MANAUS": "AM",
    "SALVADOR": "BA",
    "FORTALEZA": "CE",
    "BRASILIA": "DF",
    "VITORIA": "ES",
    "GOIANIA": "GO",
    "SAO LUIS": "MA",
    "CUIABA": "MT",
    "CAMPO GRANDE": "MS",
    "BELO HORIZONTE": "MG",
    "BELEM": "PA",
    "JOAO PESSOA": "PB",
    "CURITIBA": "PR",
    "RECIFE": "PE",
    "TERESINA": "PI",
    "RIO DE JANEIRO": "RJ",
    "NATAL": "RN",
    "PORTO ALEGRE": "RS",
    "PORTO VELHO": "RO",
    "BOA VISTA": "RR",
    "FLORIANOPOLIS": "SC",
    "SAO PAULO": "SP",
    "ARACAJU": "SE",
    "PALMAS": "TO"
}


In [130]:
df_capitais["uf"]      = df_capitais["ente"].map(capital_to_uf)
df_capitais["ente_uf"] = df_capitais["ente"] + "-" + df_capitais["uf"]

In [None]:
df_nan = df_capitais.loc[df_capitais["ente_uf"].isna()] 

In [132]:
df_pnab["ente"] = df_pnab["Ente Federativo"].apply(normalizar)
df_pnab["uf"] = df_pnab["UF"].apply(normalizar)


In [133]:
df_pnab["ente_uf"] = df_pnab["ente"] + "-" + df_pnab["uf"]

In [134]:
df_pnab

Unnamed: 0,Tipo de Ente,Código IBGE,UF,Ente Federativo,População,Ano de adesão,Aderiu a política?,Situação Plano de Ação,Valor do Plano de Ação,Situação do Termo de Adesão,Valor do Plano,ente,uf,ente_uf
0,Estado,11,RO,Rondônia,,Ciclo 1,Sim,Autorizado,20366508.78,Assinado,20366508.78,RONDONIA,RO,RONDONIA-RO
1,Estado,12,AC,Acre,,Ciclo 1,Sim,Autorizado,16798300.76,Assinado,16798300.76,ACRE,AC,ACRE-AC
2,Estado,13,AM,Amazonas,,Ciclo 1,Sim,Autorizado,38702531.53,Assinado,38702531.53,AMAZONAS,AM,AMAZONAS-AM
3,Estado,14,RR,Roraima,,Ciclo 1,Sim,Autorizado,14517383.57,Assinado,14517383.57,RORAIMA,RR,RORAIMA-RR
4,Estado,15,PA,Pará,,Ciclo 1,Sim,Autorizado,68292729.88,Assinado,68292729.88,PARA,PA,PARA-PA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5420,Município,5222005,GO,Vianópolis,14956,Ciclo 1,Sim,Autorizado,126198.87,Assinado,126198.87,VIANOPOLIS,GO,VIANOPOLIS-GO
5421,Município,5222054,GO,Vicentinópolis,8768,Ciclo 1,Sim,Autorizado,74506.33,Assinado,74506.33,VICENTINOPOLIS,GO,VICENTINOPOLIS-GO
5422,Município,5222203,GO,Vila Boa,4215,Ciclo 1,Sim,Autorizado,47644.85,Assinado,47644.85,VILA BOA,GO,VILA BOA-GO
5423,Município,5222302,GO,Vila Propício,5815,Ciclo 1,Sim,Autorizado,57084.42,Assinado,57084.42,VILA PROPICIO,GO,VILA PROPICIO-GO


In [135]:
plano_de_acao = df_pnab[["ente_uf","Valor do Plano de Ação"]]

In [97]:
df_merged = pd.merge(df_capitais, plano_de_acao, how='left', on='ente_uf')

In [98]:
df_merged

Unnamed: 0,ente,pdf,cotas_negras,cotas_indigenas,cotas_pcd,vagas_totais,valor_total,instrumento_fomento,valor_negras,vagas_negras,valor_indigenas,vagas_indigenas,valor_pcd,vagas_pcd,uf,ente_uf,Valor do Plano de Ação
0,RECIFE,2024-01_RECIFE_FOMENTO.pdf,0.30,0.1,0.05,10.0,500000.0,FOMENTO,150000.00,3.0,50000.00,1.0,25000.000,0.5,PE,RECIFE-PE,10495706.59
1,RECIFE,2024-02_RECIFE_FOMENTO.pdf.pdf,0.30,0.1,0.05,,2099141.3,FOMENTO,629742.39,,209914.13,,104957.065,,PE,RECIFE-PE,10495706.59
2,RECIFE,2025-01_RECIFE_CULTURAVIVA.pdf.pdf,0.30,0.1,0.05,,2800000.0,CULTURAVIVA,840000.00,,280000.00,,140000.000,,PE,RECIFE-PE,10495706.59
3,RECIFE,2024-03_RECIFE_FOMENTO.pdf.pdf,0.30,0.1,0.05,,4747853.3,FOMENTO,1424355.99,,474785.33,,237392.665,,PE,RECIFE-PE,10495706.59
4,ARACAJU,2024-11_ARACAJU_CULTURAVIVA.pdf.pdf,0.25,0.1,,10.0,800000.0,CULTURAVIVA,200000.00,2.5,80000.00,1.0,,,SE,ARACAJU-SE,4523414.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,RIO DE JANEIRO,2024-12_RIODEJANEIRO_FOMENTO.pdf,,,,,700000.0,FOMENTO,,,,,,,RJ,RIO DE JANEIRO-RJ,104025808.55
154,RIO DE JANEIRO,2024-12_RIODEJANEIRO_FOMENTO.pdf,,,,,700000.0,FOMENTO,,,,,,,RJ,RIO DE JANEIRO-RJ,37862551.62
155,RIO DE JANEIRO,2024-13_RIODEJANEIRO_FOMENTO.pdf,,,,,1000000.0,FOMENTO,,,,,,,RJ,RIO DE JANEIRO-RJ,104025808.55
156,RIO DE JANEIRO,2024-13_RIODEJANEIRO_FOMENTO.pdf,,,,,1000000.0,FOMENTO,,,,,,,RJ,RIO DE JANEIRO-RJ,37862551.62


### Rank dos Editais

In [136]:
agrupado = df_capitais.groupby(["ente", "ente_uf"]).agg(
    qtd_pdfs=("pdf", "nunique"),
    valor_total_edital=("valor_total", "sum"),
    valor_negras = ("valor_negras", "sum"),
    vagas_negras = ("vagas_negras", "sum"),
    valor_indigenas = ("valor_indigenas", "sum"),
    vagas_indigenas = ("vagas_indigenas", "sum"),
    valor_pcd = ("valor_pcd", "sum"),
    vagas_pcd = ("vagas_pcd", "sum")
).reset_index()

In [137]:
agrupado

Unnamed: 0,ente,ente_uf,qtd_pdfs,valor_total_edital,valor_negras,vagas_negras,valor_indigenas,vagas_indigenas,valor_pcd,vagas_pcd
0,ARACAJU,ARACAJU-SE,4,3800085.32,887521.3,20.5,355008.532,8.2,137504.3,3.6
1,BELEM,BELEM-PA,1,5224089.0,1306022.0,25.0,522408.9,10.0,261204.5,5.0
2,BELO HORIZONTE,BELO HORIZONTE-MG,3,15630000.0,1015000.0,31.0,406000.0,12.4,203000.0,6.2
3,BOA VISTA,BOA VISTA-RR,3,3565968.0,773275.0,66.0,309310.0,26.4,154655.0,13.2
4,CAMPO GRANDE,CAMPO GRANDE-MS,4,6638840.8,1679756.0,39.25,663884.08,15.5,583698.9,13.5
5,CUIABA,CUIABA-MT,3,3700000.0,925000.0,33.5,370000.0,13.4,185000.0,6.7
6,CURITIBA,CURITIBA-PR,3,12358000.0,3089500.0,93.5,1235800.0,37.4,617900.0,18.7
7,FLORIANOPOLIS,FLORIANOPOLIS-SC,5,3355000.0,826250.0,35.75,345470.0,14.2988,197240.0,7.5716
8,FORTALEZA,FORTALEZA-CE,12,9051094.08,1238872.0,41.0,295548.937,13.4,495548.9,16.4
9,GOIANIA,GOIANIA-GO,6,9127450.25,2281863.0,57.0,912745.025,22.8,456372.5,11.4


In [138]:
agrupado = agrupado.sort_values(by="ente", ascending=True)

In [139]:
df_merged = pd.merge(agrupado, plano_de_acao, how='left', on='ente_uf')

In [140]:
df_merged = df_merged.rename(columns={
    "Valor do Plano de Ação": "valor_repasse"
})

In [141]:
df_merged[r"%"] = df_merged["valor_total_edital"]/df_merged["valor_repasse"]

In [142]:
df_merged

Unnamed: 0,ente,ente_uf,qtd_pdfs,valor_total_edital,valor_negras,vagas_negras,valor_indigenas,vagas_indigenas,valor_pcd,vagas_pcd,valor_repasse,%
0,ARACAJU,ARACAJU-SE,4,3800085.32,887521.3,20.5,355008.532,8.2,137504.3,3.6,4523414.56,0.840092
1,BELEM,BELEM-PA,1,5224089.0,1306022.0,25.0,522408.9,10.0,261204.5,5.0,8818174.89,0.592423
2,BELO HORIZONTE,BELO HORIZONTE-MG,3,15630000.0,1015000.0,31.0,406000.0,12.4,203000.0,6.2,15330851.48,1.019513
3,BOA VISTA,BOA VISTA-RR,3,3565968.0,773275.0,66.0,309310.0,26.4,154655.0,13.2,3782941.41,0.942644
4,CAMPO GRANDE,CAMPO GRANDE-MS,4,6638840.8,1679756.0,39.25,663884.08,15.5,583698.9,13.5,5749069.72,1.154768
5,CUIABA,CUIABA-MT,3,3700000.0,925000.0,33.5,370000.0,13.4,185000.0,6.7,4232253.11,0.874239
6,CURITIBA,CURITIBA-PR,3,12358000.0,3089500.0,93.5,1235800.0,37.4,617900.0,18.7,11367009.39,1.087181
7,FLORIANOPOLIS,FLORIANOPOLIS-SC,5,3355000.0,826250.0,35.75,345470.0,14.2988,197240.0,7.5716,3545589.2,0.946246
8,FORTALEZA,FORTALEZA-CE,12,9051094.08,1238872.0,41.0,295548.937,13.4,495548.9,16.4,17015522.75,0.531932
9,GOIANIA,GOIANIA-GO,6,9127450.25,2281863.0,57.0,912745.025,22.8,456372.5,11.4,9607842.49,0.95


In [143]:
ordem = ['ente', 'qtd_pdfs', 'valor_total_edital', 'valor_repasse', '%','valor_negras',
       'vagas_negras', 'valor_indigenas', 'vagas_indigenas', 'valor_pcd',
       'vagas_pcd']


In [144]:
df_merged = df_merged.drop(index=[20, 24])

In [145]:
df_merged = df_merged[ordem]

In [146]:
df_merged

Unnamed: 0,ente,qtd_pdfs,valor_total_edital,valor_repasse,%,valor_negras,vagas_negras,valor_indigenas,vagas_indigenas,valor_pcd,vagas_pcd
0,ARACAJU,4,3800085.32,4523414.56,0.840092,887521.3,20.5,355008.532,8.2,137504.3,3.6
1,BELEM,1,5224089.0,8818174.89,0.592423,1306022.0,25.0,522408.9,10.0,261204.5,5.0
2,BELO HORIZONTE,3,15630000.0,15330851.48,1.019513,1015000.0,31.0,406000.0,12.4,203000.0,6.2
3,BOA VISTA,3,3565968.0,3782941.41,0.942644,773275.0,66.0,309310.0,26.4,154655.0,13.2
4,CAMPO GRANDE,4,6638840.8,5749069.72,1.154768,1679756.0,39.25,663884.08,15.5,583698.9,13.5
5,CUIABA,3,3700000.0,4232253.11,0.874239,925000.0,33.5,370000.0,13.4,185000.0,6.7
6,CURITIBA,3,12358000.0,11367009.39,1.087181,3089500.0,93.5,1235800.0,37.4,617900.0,18.7
7,FLORIANOPOLIS,5,3355000.0,3545589.2,0.946246,826250.0,35.75,345470.0,14.2988,197240.0,7.5716
8,FORTALEZA,12,9051094.08,17015522.75,0.531932,1238872.0,41.0,295548.937,13.4,495548.9,16.4
9,GOIANIA,6,9127450.25,9607842.49,0.95,2281863.0,57.0,912745.025,22.8,456372.5,11.4


In [147]:
df_merged.to_excel("agrupamento_capitais_16_48_6_8_25.xlsx")

In [228]:
agrupado["valor_per_edital"] = agrupado["valor_total"]/agrupado["qtd_pdfs"]

In [229]:
agrupado["valor_per_edital"] = agrupado["valor_per_edital"].apply(lambda x: f"{x:,.2f}")
agrupado["valor_total"]      = agrupado["valor_total"].apply(lambda x: f"{x:,.2f}")


In [58]:
agrupado

Unnamed: 0,ente,qtd_pdfs,valor_total_edital,valor_repasse,%,valor_negras,vagas_negras,valor_indigenas,vagas_indigenas,valor_pcd,vagas_pcd
0,ARACAJU,4,3800085.32,18093658.24,0.210023,887521.3,20.5,355008.532,8.2,137504.3,3.6
1,BELEM,1,5224089.0,8818174.89,0.592423,1306022.0,25.0,522408.9,10.0,261204.5,5.0
2,BELO HORIZONTE,3,15630000.0,45992554.44,0.339838,1015000.0,31.0,406000.0,12.4,203000.0,6.2
3,BOA VISTA,3,3565968.0,11348824.23,0.314215,773275.0,66.0,309310.0,26.4,154655.0,13.2
4,CAMPO GRANDE,4,6638840.8,22996278.88,0.288692,1679756.0,39.25,663884.08,15.5,583698.9,13.5
5,CUIABA,3,3700000.0,12696759.33,0.291413,925000.0,33.5,370000.0,13.4,185000.0,6.7
6,CURITIBA,3,12358000.0,34101028.17,0.362394,3089500.0,93.5,1235800.0,37.4,617900.0,18.7
7,FLORIANOPOLIS,5,3355000.0,17727946.0,0.189249,826250.0,35.75,345470.0,14.2988,197240.0,7.5716
8,FORTALEZA,12,9051094.08,204186273.0,0.044328,1238872.0,41.0,295548.937,13.4,495548.9,16.4
9,GOIANIA,6,9127450.25,57647054.94,0.158333,2281863.0,57.0,912745.025,22.8,456372.5,11.4


### Matriz das Cotas

### Categorias

df_capitais

In [11]:
df_capitais

Unnamed: 0,uf,pdf,cotas_negras,cotas_indigenas,cotas_pcd,vagas_totais,valor_total
0,RECIFE,2024-01_RECIFE_FOMENTO.pdf,0.30,0.10,0.05,10.0,500000.0
1,RECIFE,2024-02_RECIFE_FOMENTO.pdf.pdf,0.30,0.10,0.05,,2099141.3
2,RECIFE,2025-01_RECIFE_CULTURAVIVA.pdf.pdf,0.30,0.10,0.05,,2800000.0
3,RECIFE,2024-03_RECIFE_FOMENTO.pdf.pdf,0.30,0.10,0.05,,4747853.3
4,ARACAJU,2024-11_ARACAJU_CULTURAVIVA.pdf.pdf,0.25,0.10,,10.0,800000.0
...,...,...,...,...,...,...,...
138,JOÃO PESSOA,2024-10_JOAOPESSOA_FOMENTO.pdf.pdf,0.24,0.09,0.06,75.0,1320000.0
139,RIO DE JANEIRO,2024-11_RIODEJANEIRO_CULTURAVIVA.pdf,,,,70.0,600000.0
140,RIO DE JANEIRO,2024-12_RIODEJANEIRO_FOMENTO.pdf,,,,,700000.0
141,RIO DE JANEIRO,2024-13_RIODEJANEIRO_FOMENTO.pdf,,,,,1000000.0


In [15]:
df_categorias = pd.read_csv("categorias_valor.csv")

In [16]:
df_categorias

Unnamed: 0.1,Unnamed: 0,nome_pdf,valor_total,valor_pessoa_negra,valor_pessoa_indígena,valor_pcd,prop_valor_pessoa_negra,prop_vagas_pessoa_indígena,prop_valor_pcd
0,0,2024-02_MANAUS_FOMENTO.pdf,4789483.73,950287.9,950287.95,460021.98,0.198411,0.198411,0.096048
1,1,"2024-05_JOAOPESSOA_CULTURAVIVA.pdf.pdf,",420000.0,140000.0,35000.0,35000.0,0.333333,0.083333,0.083333
2,2,"2024-05_PORTOALEGRE_PRÊMIO.pdf,",3980000.0,1080000.0,370000.0,210000.0,0.271357,0.092965,0.052764
3,3,"2024-11_JOAOPESSOA_CULTURAVIVA.pdf.pdf,",120000.0,45000.0,15000.0,15000.0,0.375,0.125,0.125
4,4,2024-12_ARACAJU_CULTURAVIVA.pdf.pdf,55014.22,18338.07,9169.036667,9169.036667,0.333333,0.166667,0.166667
5,5,2025-04_BOAVISTA_FOMENTO.pdf,3993100.0,1017494.0,388082.554945,138625.289988,0.254813,0.097188,0.034716
6,6,2025-06_ARACAJU_FOMENTO.pdf,2420000.0,667272.7,229090.909091,50000.0,0.275733,0.094666,0.020661
7,7,2025-06_PORTOVELHO_SUBSÍDIO.pdf,470000.0,85000.0,85000.0,10000.0,0.180851,0.180851,0.021277
8,8,"2025-06_SAOLUIS_FOMENTO.pdf,",4705000.0,1188985.0,501748.865724,261438.948991,0.252707,0.106642,0.055566
9,9,2025-08_PORTOVELHO_FOMENTO.pdf,2121592.39,505825.1,214996.094394,55614.532727,0.238418,0.101337,0.026214


In [20]:
df_merge = pd.merge(
    left=df_capitais,
    right=df_categorias[[
        "nome_pdf", 
        "valor_total", 
        "valor_pessoa_negra", 
        "valor_pessoa_indígena", 
        "valor_pcd",
        "prop_valor_pessoa_negra",
        "prop_vagas_pessoa_indígena",
        "prop_valor_pcd"
    ]],
    left_on="pdf",
    right_on="nome_pdf"
)

In [23]:
df_categorias = df_categorias[[
        "nome_pdf", 
        "valor_total", 
        "valor_pessoa_negra", 
        "valor_pessoa_indígena", 
        "valor_pcd",
        "prop_valor_pessoa_negra",
        "prop_vagas_pessoa_indígena",
        "prop_valor_pcd"
    ]]

In [25]:
df_categorias = df_categorias.rename(columns={"nome_pdf": "pdf"})

In [27]:
df_capitais.merge(
    df_categorias,
    how="left",
    on="pdf"
)

Unnamed: 0,ente,pdf,cotas_negras,cotas_indigenas,cotas_pcd,vagas_totais,valor_total_x,valor_negras,vagas_negras,valor_indigenas,vagas_indigenas,valor_pcd_x,vagas_pcd,valor_total_y,valor_pessoa_negra,valor_pessoa_indígena,valor_pcd_y,prop_valor_pessoa_negra,prop_vagas_pessoa_indígena,prop_valor_pcd
0,RECIFE,2024-01_RECIFE_FOMENTO.pdf,0.30,0.10,0.05,10.0,500000.0,150000.00,3.0,50000.00,1.00,25000.000,0.5,,,,,,,
1,RECIFE,2024-02_RECIFE_FOMENTO.pdf.pdf,0.30,0.10,0.05,,2099141.3,629742.39,,209914.13,,104957.065,,,,,,,,
2,RECIFE,2025-01_RECIFE_CULTURAVIVA.pdf.pdf,0.30,0.10,0.05,,2800000.0,840000.00,,280000.00,,140000.000,,,,,,,,
3,RECIFE,2024-03_RECIFE_FOMENTO.pdf.pdf,0.30,0.10,0.05,,4747853.3,1424355.99,,474785.33,,237392.665,,,,,,,,
4,ARACAJU,2024-11_ARACAJU_CULTURAVIVA.pdf.pdf,0.25,0.10,,10.0,800000.0,200000.00,2.5,80000.00,1.00,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,JOAO PESSOA,2024-10_JOAOPESSOA_FOMENTO.pdf.pdf,0.24,0.09,0.06,75.0,1320000.0,316800.00,18.0,118800.00,6.75,79200.000,4.5,,,,,,,
139,RIO DE JANEIRO,2024-11_RIODEJANEIRO_CULTURAVIVA.pdf,,,,70.0,600000.0,,,,,,,,,,,,,
140,RIO DE JANEIRO,2024-12_RIODEJANEIRO_FOMENTO.pdf,,,,,700000.0,,,,,,,,,,,,,
141,RIO DE JANEIRO,2024-13_RIODEJANEIRO_FOMENTO.pdf,,,,,1000000.0,,,,,,,,,,,,,
