In [23]:
import os
import pandas as pd


# Step 1: List all CSV files in the directory
directories = [
    "raw_data\polio\Oral Poliomielite (VOP)",
    "raw_data\polio\Poliomielite inativada (VIP)",
]


def combine_vacina_csv(directory: str)-> pd.DataFrame: 
    csv_files = [f for f in os.listdir(directory) if f.endswith(".csv")]

    # Extract the prefix from the directory name
    prefix = os.path.basename(directory)[-4:-1]

    # Step 2: Initialize an empty list to store DataFrames
    dataframes = []

    # Step 3: Process each CSV file
    for file in csv_files:
        # Extract the year from the file name
        year = file[-8:-4]

        # Load the CSV file into a DataFrame
        file_path = os.path.join(directory, file)
        df = pd.read_csv(file_path, encoding="latin_1", sep=";")

        # Step 4: Append the year as a new column 'ano'
        df["ano"] = year

        # Append the prefix as a new column 'vacina'
        df["vacina"] = prefix
        

        # Add the DataFrame to the list
        dataframes.append(df)

    # Step 5: Merge all DataFrames into a single DataFrame
    merged_df = pd.concat(dataframes, ignore_index=True)

    # Optionally, save the merged DataFrame to a new CSV file
    merged_df.to_csv("raw_data\polio\merged_polio_data.csv", index=False)
    return merged_df

In [24]:
import math

def create_df_with_patient_age_vop(df: pd.DataFrame)-> pd.DataFrame:
    list = [];
    for index, row in df.iterrows():
        age_columns = ["Menor de 1 ano", "1 ano", "2 anos", "3 anos", "4 anos", "5 a 6 anos", "7 anos e mais"]
        age_index = 0;
        for age in age_columns:
            if not math.isnan(row[age]):
                list.append((row["Unidade da Federação"], row["Unidade da Federação"][:2] , row["vacina"], row["ano"], age_index, row[age]))
            # elif not any(row[age] for age in age_columns):
            #     print("No data for age: ", age)
            #     list.append((row["Unidade da Federação"],row["Unidade da Federação"][:2], row["vacina"], row["ano"], -1, row["Total"]))
            age_index += 1
    result_df = pd.DataFrame(list, columns=[ "uf", "fk_cod_uf", "vacina", "ano", "idade", "quantidade"])
    result_df = result_df.drop(result_df[result_df['fk_cod_uf'] == 'To'].index)

    return result_df


vop_df = combine_vacina_csv(directories[0])

vip_df = combine_vacina_csv(directories[1])


vop = create_df_with_patient_age_vop(vop_df)
vip = create_df_with_patient_age_vop(vip_df)






In [25]:
import numpy as np

from psycopg2.extensions import register_adapter, AsIs
import os
from dotenv import load_dotenv
import psycopg2.extras

load_dotenv()


# Function to adapt numpy.int64 to PostgreSQL int
def adapt_numpy_int64(numpy_int64):
    return AsIs(numpy_int64)


# Register the adapter
register_adapter(np.int64, adapt_numpy_int64)

def create_vacination_pertype_table(df: pd.DataFrame, drop=False) -> None:
    # Connect to the database
    conn = psycopg2.connect(
        host=os.getenv("DB_HOST"),
        port=os.getenv("DB_PORT"),
        dbname=os.getenv("DB_NAME"),
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PASS"),
    )

    # Create a cursor
    cur = conn.cursor()
    if(drop):
        cur.execute("DROP TABLE IF EXISTS vacinacao_por_tipo")
    # Create the table
    cur.execute(
        """
        CREATE TABLE IF NOT EXISTS vacinacao_por_tipo (
            uf VARCHAR(255),
            fk_cod_uf INT,
            vacina VARCHAR(50),
            ano INTEGER,
            idade INTEGER,
            quantidade INTEGER,
            CONSTRAINT PK_vacinacao_por_tipo PRIMARY KEY (fk_cod_uf,ano,idade,vacina),
            CONSTRAINT fk_cod_uf FOREIGN KEY (fk_cod_uf) REFERENCES estados(cod_uf)

        )
        """
    )

    # Insert the DataFrame into the table
    psycopg2.extras.execute_batch(
        cur,
        """
        INSERT INTO vacinacao_por_tipo (uf, fk_cod_uf, vacina, ano, idade, quantidade)
        VALUES (%s, %s, %s, %s, %s, %s)
        """,
        df.values,
    )

    # Commit the transaction
    conn.commit()

    # Close the cursor
    cur.close()

    # Close the connection
    conn.close()

create_vacination_pertype_table(vop, drop=True)
print("VOP table created")
create_vacination_pertype_table(vip)

VOP table created
