In [23]:
import pandas as pd
import numpy as np
from os import path, makedirs

In [24]:
base_path = path.join('data_output', 'urbanismo')
if not path.exists(base_path):
    makedirs(base_path)
def _normalize_headers(cols):
    return [str(c).strip() for c in cols]
def _clean_df(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = _normalize_headers(df.columns)
    for c in df.columns:
        if df[c].dtype == object:
            df[c] = df[c].astype(str).str.strip().replace({"": np.nan})
    return df

In [25]:
df_atd = pd.read_csv("dados/urbanismo/atendimentos_viagens.csv",
                     sep=";", dtype=str)

df_loa = pd.read_csv("dados/urbanismo/loa_atende.csv",
                     sep=";", dtype=str)

df_km = pd.read_csv("dados/urbanismo/quilometragemKM.csv",
                     sep=";", dtype=str)

In [26]:
# Ajustar para formato iso data ex: AAAA-MM-DD
df_atd['data'] = pd.to_datetime(df_atd['data']).dt.strftime('%Y-%m-%d')
df_atd = _clean_df(df_atd)
df_atd

Unnamed: 0,area,empresa,ano,mes,data_meses,beneficiario,qtd_atendimentos_realizados,data
0,I,NORTE BUS,2022,jan,1,Usuario,5224,2022-01-01
1,I,NORTE BUS,2022,jan,1,Acompanhante,3892,2022-01-01
2,I,NORTE BUS,2022,fev,2,Usuario,7225,2022-01-02
3,I,NORTE BUS,2022,fev,2,Acompanhante,5765,2022-01-02
4,I,NORTE BUS,2022,mar,3,Usuario,9114,2022-01-03
...,...,...,...,...,...,...,...,...
931,VIII,ALFARODOBUS,2024,out,10,Acompanhante,2627,2024-01-10
932,VIII,ALFARODOBUS,2024,nov,11,Usuario,2923,2024-01-11
933,VIII,ALFARODOBUS,2024,nov,11,Acompanhante,2312,2024-01-11
934,VIII,ALFARODOBUS,2024,dez,12,Usuario,1989,2024-01-12


In [27]:
df_loa

Unnamed: 0,ano,liquidado,pago
0,2022,141682050.88,141682050.88
1,2023,150735189.79,150735189.79
2,2024,180725312.8,180725312.8


In [28]:
# Aqui eu despivoto
df_km_melted = pd.melt(
    df_km,
    id_vars=['Empresa', 'Lote', 'Mês'],
    value_vars=['Valor_Quilometragem_2022', 'Valor_Quilometragem_2023', 'Valor_Quilometragem_2024'],
    var_name='Ano',
    value_name='Valor_Quilometragem'
)

# Ajsuto a col "Ano" para ano
df_km_melted['Ano'] = df_km_melted['Ano'].str.extract(r'(\d{4})')

# Criei a coluna data com a primeira data de cada mês
df_km_melted['Data'] = pd.to_datetime(df_km_melted['Ano'] + '-' + df_km_melted['Mês'] + '-01', format='%Y-%B-%d')

df_km_melted

Unnamed: 0,Empresa,Lote,Mês,Ano,Valor_Quilometragem,Data
0,NORTE BUSS S.A.,D1,Janeiro,2022,193296,2022-01-01
1,SPENCER,D1,Janeiro,2022,723,2022-01-01
2,NORTE BUSS S.A.,D2,Janeiro,2022,53028,2022-01-01
3,SPENCER,D2,Janeiro,2022,0,2022-01-01
4,TRANSUNIÃO,D3,Janeiro,2022,122498,2022-01-01
...,...,...,...,...,...,...
535,A2 TRANSPORTES,D9,Dezembro,2024,95064,2024-12-01
536,TRANSWOLFF,D10,Dezembro,2024,155699,2024-12-01
537,TRANSWOLFF,D11,Dezembro,2024,152266,2024-12-01
538,TRANSCAP,D12,Dezembro,2024,117624,2024-12-01


In [29]:
base_path = path.join('data_output', 'urbanismo')

if not path.exists(base_path):
    makedirs(base_path)

for name, df in [
                 ('atendimentos_viagens', df_atd),
                 ('loa_atende', df_loa),
                 ('quilometragemKM', df_km_melted)
                 ]:
    filepath = path.join(base_path, f'{name}.csv')

    df.to_csv(filepath,
              index=False,
              sep=';',
              decimal=',',
              encoding='latin1')