In [7]:
import sys
from pathlib import Path

import pandas as pd

sys.path.append(str(Path().resolve().parent))

In [8]:
from extract.get_raw import extract_parquet, insert_parquet_to_duck
from load.show_data import load_duckdb_data
from transform.transform_columns import rename_db_columns
from transform.transform_db import transform_db
db_path = Path().resolve().parents[0] / "data" / "db" / "db.db"

In [9]:
db_path

PosixPath('/home/gutto/Repos/dq-sus/src/data/db/db.db')

In [10]:
def main_function(disease: str = "CHIK", db_path: Path = db_path) -> pd.DataFrame:
    """
    Main function to process disease data and load it into a DataFrame.

    Args:
        disease (str): The disease to process data for. Default is "CHIK".
        db_path (Path): The path to the database. Default is db_path.

    Returns:
        pd.DataFrame: The processed data loaded into a DataFrame.
    """
    years = [2021, 2022, 2023]
    raw_data = extract_parquet(disease, years)
    insert_parquet_to_duck(raw_data)
    rename_db_columns(db_path, "sinan")
    transform_db(db_path)
    data = load_duckdb_data("alarms_severities")
    return data


df = main_function()

2024-11-14 21:11:27,865 - INFO - Database already exists, it will be overwritten.
2024-11-14 21:11:27,876 - INFO - Inserting data from /home/gutto/Repos/dq-sus/src/data/parquet/CHIKBR21.parquet into the database.
2024-11-14 21:11:29,926 - INFO - Inserting data from /home/gutto/Repos/dq-sus/src/data/parquet/CHIKBR22.parquet into the database.


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

2024-11-14 21:11:37,985 - INFO - Inserting data from /home/gutto/Repos/dq-sus/src/data/parquet/CHIKBR23.parquet into the database.


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

2024-11-14 21:11:45,643 - INFO - All data inserted into the database successfully.
2024-11-14 21:11:45,644 - INFO - Column mapping for 'english' loaded successfully.
2024-11-14 21:11:45,698 - INFO - All columns renamed successfully.


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

2024-11-14 21:11:59,170 - INFO - Loading data from table 'alarms_severities' in DuckDB database.


Database transformed successfully!


2024-11-14 21:11:59,699 - INFO - Data loaded successfully.


In [11]:
type(df)

pandas.core.frame.DataFrame

In [12]:
df.head()

Unnamed: 0,notification_id,hypotension_alarm,platelet_alarm,vomiting_alarm,bleeding_alarm,lethargy_alarm,hematocrit_alarm,abdominal_pain_alarm,lethargy_alarm_1,hepatomegaly_alarm,...,other_organs_severity,severity_date,hemorrhaegic_manifestations,epistaxis,gengival_bleeding,metrorrhagia,petechiae,hematuria,bleeding,complications
0,1,,,,,,,,,,...,,,,,,,,,,
1,2,,,,,,,,,,...,,,,,,,,,,
2,3,,,,,,,,,,...,,,,,,,,,,
3,4,,,,,,,,,,...,,,,,,,,,,
4,5,,,,,,,,,,...,,,,,,,,,,
