In [1]:
import pandas as pd

In [2]:
data_folder = "./data/"

df_legislaturas = pd.read_csv(data_folder + "df_legislaturas.csv")
df_parlamentares_por_legislatura = pd.read_csv(
    data_folder + "df_parlamentares_por_legislatura.csv"
)
df_parl_detalhes = pd.read_csv(data_folder + "df_parl_detalhes.csv")
df_legis_lideres = pd.read_csv(data_folder + "df_legis_lideres.csv")
df_legis_mesa = pd.read_csv(data_folder + "df_legis_mesa.csv")
df_cargos_externos = pd.read_csv(data_folder + "df_cargos_externos.csv")
df_historico = pd.read_csv(data_folder + "df_historico.csv")
df_pareceres = pd.read_csv(data_folder + "df_pareceres.csv")
df_pareceres_detalhes = pd.read_csv(data_folder + "df_pareceres_detalhes.csv")
df_orgaos_membros = pd.read_csv(data_folder + "df_orgaos_membros.csv")

  df_pareceres_detalhes = pd.read_csv(data_folder + "df_pareceres_detalhes.csv")


In [3]:
df_legislaturas["dataInicio"] = pd.to_datetime(df_legislaturas["dataInicio"])
df_legislaturas["dataFim"] = pd.to_datetime(df_legislaturas["dataFim"])

In [4]:
df_parlamentares_por_legislatura = df_parlamentares_por_legislatura[
    ["id", "nome", "idLegislatura"]
].drop_duplicates()

# dimensão posição (mesa)

In [5]:
columns = ["id", "idLegislatura", "titulo", "codTitulo"]
group_columns = ["id", "idLegislatura", "titulo"]
df_legis_g = df_legis_mesa[columns].groupby(group_columns).count().reset_index()

In [6]:
posicao_mapper = {
    "2º Secretário": "mesa_2",
    "Presidente": "mesa_1",
    "1º Vice-Presidente": "mesa_2",
    "1º Secretário": "mesa_2",
    "3º Secretário": "mesa_2",
    "4º Secretário": "mesa_2",
    "2º Suplente de Secretário": "mesa_2",
    "2º Vice-Presidente": "mesa_2",
    "3º Suplente de Secretário": "mesa_2",
    "4º Suplente de Secretário": "mesa_2",
    "1º Suplente de Secretário": "mesa_2",
}

df_legis_g["posicao"] = df_legis_g["titulo"].apply(lambda x: posicao_mapper.get(x))

In [7]:
df_legis_final = df_legis_g.join(pd.get_dummies(df_legis_g["posicao"]))
# df_legis_final = df_legis_final[['id', 'idLegislatura', 'mesa_1', 'mesa_2']].reset_index(drop=True)
df_legis_final = (
    df_legis_final[["id", "idLegislatura", "mesa_1", "mesa_2"]]
    .groupby(["id", "idLegislatura"])
    .sum()
    .reset_index()
)

# dimensão posição (colégio de lideres)

In [8]:
df_legis_lideres_final = (
    df_legis_lideres[df_legis_lideres["titulo"].isin(["Líder", "Presidente"])]
    .drop_duplicates(["parlamentar.id", "parlamentar.idLegislatura", "titulo"])
    .groupby(["parlamentar.id", "parlamentar.idLegislatura"])
    .count()
    .reset_index()
    .rename(
        {
            "parlamentar.id": "id",
            "parlamentar.idLegislatura": "idLegislatura",
            "titulo": "pos_lider",
        },
        axis=1,
    )[["id", "idLegislatura", "pos_lider"]]
)

# dimensão presidencia de comissão

In [9]:
pos_comissao_mapper = {
    "Coordenador": "pos_comiss_pr",
    "Coordenadora": "pos_comiss_pr",
    "Coordenador-Geral'": "pos_comiss_pr",
    "Presidente": "pos_comiss_pr",
}

In [10]:
df_orgaos_membros["titulo_trat"] = df_orgaos_membros["titulo"].apply(
    lambda x: pos_comissao_mapper.get(x)
)

In [11]:
df_orgaos_membros_final = (
    df_orgaos_membros[~df_orgaos_membros["titulo_trat"].isnull()]
    .drop_duplicates(["id_org", "id", "idLegislatura"])[
        ["id_org", "id", "idLegislatura"]
    ]
    .groupby(["id", "idLegislatura"])
    .count()
    .reset_index()
    .rename({"id_org": "pos_comiss_pr"}, axis=1)
)

# pareceres de relator

In [12]:
df_pareceres_merge = df_pareceres[["id", "id_parl"]].merge(
    df_pareceres_detalhes, on="id"
)

## encontrar legislatura por data de apresentação

In [13]:
df_pareceres_merge["dataApresentacao"] = pd.to_datetime(
    df_pareceres_merge["dataApresentacao"]
)

In [14]:
def find_legislatura(date):
    legislatura = df_legislaturas[
        (df_legislaturas["dataInicio"] <= date) & (df_legislaturas["dataFim"] >= date)
    ]
    if not legislatura.empty:
        return int(legislatura.iloc[0]["id"])
    return None

In [15]:
df_pareceres_merge["idLegislatura"] = df_pareceres_merge["dataApresentacao"].apply(
    find_legislatura
)
df_pareceres_merge["idLegislatura"] = (
    df_pareceres_merge["idLegislatura"].fillna(-1).astype("int64")
)

In [16]:
df_pareceres_final = (
    df_pareceres_merge[["id", "id_parl", "idLegislatura"]]
    .groupby(["id_parl", "idLegislatura"])
    .count()
    .reset_index()
    .rename({"id": "relatorias", "id_parl": "id"}, axis=1)
)

# mandatos externos

In [17]:
df_legislaturas["dataInicioYear"] = df_legislaturas["dataInicio"].dt.year

df_mandatos_externos = pd.merge(
    df_parlamentares_por_legislatura,
    df_legislaturas[["id", "dataInicioYear"]],
    left_on="idLegislatura",
    right_on="id",
    how="left",
)

# Drop the extra 'id' column from the merge
df_mandatos_externos.drop(columns=["id_y"], inplace=True)
df_mandatos_externos.rename(columns={"id_x": "id"}, inplace=True)

# Step 2: Initialize the new columns
df_mandatos_externos["mand_dep"] = 0
df_mandatos_externos["mand_ver"] = 0
df_mandatos_externos["mand_sen"] = 0


# Step 3: Define a function to update the columns based on conditions
def update_dim_columns(row, df_cargos, df_legislaturas):
    # Get the legislatura dataInicio year
    leg_data_inicio = df_legislaturas.loc[
        df_legislaturas["id"] == row["idLegislatura"], "dataInicio"
    ].dt.year.values[0]

    # Filter relevant cargos based on id and anoInicio < leg_data_inicio
    relevant_cargos = df_cargos[
        (df_cargos["id_parl"] == row["id"]) & (df_cargos["anoInicio"] < leg_data_inicio)
    ]

    # Increment the corresponding columns based on cargo_trat
    for _, cargo in relevant_cargos.iterrows():
        if cargo["cargo_trat"] == "mand_dep":
            row["mand_dep"] += (
                cargo["anos_mandato"] if cargo["anos_mandato"] > 0 else 1
            )
        elif cargo["cargo_trat"] == "mand_ver":
            row["mand_ver"] += (
                cargo["anos_mandato"] if cargo["anos_mandato"] > 0 else 1
            )
        elif cargo["cargo_trat"] == "mand_sen":
            row["mand_sen"] += (
                cargo["anos_mandato"] if cargo["anos_mandato"] > 0 else 1
            )

    return row

In [18]:
def find_mandato(x):
    texto = x.lower()
    if "suplente" in texto:
        return None
    elif "deput" in texto:
        return "dim_mand_dep"
    elif "veread" in texto:
        return "dim_mand_ver"
    elif "senad" in texto:
        return "dim_mand_sen"
    else:
        return None
    
df_cargos_externos['cargo_trat'] = df_cargos_externos['cargo'].apply(lambda x: find_mandato(x))
df_cargos_externos['anos_mandato'] = df_cargos_externos['anoFim'] - df_cargos_externos['anoInicio']

In [19]:
df_cargos_externos_clean = df_cargos_externos[
    ~df_cargos_externos["cargo_trat"].isnull()
]
# Apply the function to each row
df_mandatos_externos = df_mandatos_externos.apply(
    update_dim_columns, axis=1, args=(df_cargos_externos_clean, df_legislaturas)
)

# Drop the 'dataInicioYear' column as it's no longer needed
df_mandatos_externos.drop(columns=["dataInicioYear", "nome"], inplace=True)

# dim fidelidade

# merge dims

In [20]:
df_final = (
    df_parlamentares_por_legislatura
    .merge(df_legis_final, on=["id", "idLegislatura"], how="left")
    .merge(df_legis_lideres_final, on=["id", "idLegislatura"], how="left")
    .merge(df_orgaos_membros_final, on=["id", "idLegislatura"], how="left")
    .merge(df_pareceres_final, on=["id", "idLegislatura"], how="left")
    .merge(df_mandatos_externos, on=["id", "idLegislatura"], how="left")
    .fillna(0)
)

In [21]:
df_final

Unnamed: 0,id,nome,idLegislatura,mesa_1,mesa_2,pos_lider,pos_comiss_pr,dim_rel,dim_mand_dep,dim_mand_ver,dim_mand_sen
0,73909,0,52,0.0,0.0,0.0,0.0,22.0,0.0,0.0,0.0
1,73909,0,52,0.0,0.0,0.0,0.0,22.0,0.0,0.0,0.0
2,73984,0,52,0.0,0.0,0.0,0.0,1.0,4.0,0.0,0.0
3,73984,0,52,0.0,0.0,0.0,0.0,1.0,4.0,0.0,0.0
4,73764,ABELARDO LUPION,51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
6647,160632,ZÉ SILVA,55,0.0,0.0,0.0,2.0,23.0,0.0,0.0,0.0
6648,178923,ZECA CAVALCANTI,55,0.0,0.0,0.0,1.0,9.0,0.0,0.0,0.0
6649,160592,ZECA DIRCEU,55,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0
6650,178902,ZECA DO PT,55,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0
