In [2]:
import duckdb

def set_attr(con):
    con.execute("SET max_memory='8GB';")
    con.execute("SET threads=4;")
    con.execute("SET enable_progress_bar=1;")
    con.execute("SET temp_directory = 'C:/Projet/tmp/';")
    con.execute("SET disable_parquet_prefetching = TRUE;")
    con.execute("SET parquet_metadata_cache = TRUE;")

def transform_csv(con):
    con.execute("CREATE SCHEMA IF NOT EXISTS raw;")

    for i in range(1, 13):
        csv_path = f"C:/DUCK/Projet_Data/Raw_data/A2024{i:02d}.csv.gz"
        parquet_path = f"C:/DUCK/Projet_Data/Raw_data/A2024{i:02d}.parquet"
        con.execute(f"""
            COPY (
                SELECT * FROM read_csv_auto('{csv_path}')
            ) TO '{parquet_path}' (FORMAT PARQUET);
        """)


def load_parquet(con):
    con.execute("CREATE SCHEMA IF NOT EXISTS raw;")
    for i in range(1, 13):
        try:
            con.execute(f"""
                CREATE TABLE IF NOT EXISTS raw.A2024{i:02d} AS
                SELECT * FROM read_parquet('C:/DUCK/Projet_Data/Raw_data/A2024{i:02d}.parquet');
            """)
            print(f"[✔] Successfully loaded A2024{i:02d}.parquet")
        except Exception as e:
            print(f"[✘] Failed to load A2024{i:02d}.parquet: {e}")


def stage(con):
    con.execute("CREATE SCHEMA IF NOT EXISTS stg;")
    columns = """FLT_DEP_MNT mnt_depassement,
	FLT_PAI_MNT mnt_pay,
	FLT_REM_MNT mnt_rem,
	SOI_ANN ann_soin,
	SOI_MOI mois_soin,
	AGE_BEN_SNDS age,
	BEN_RES_REG region_ben,
	BEN_SEX_COD sexe,
	ASU_NAT nat_assurance,
	CPT_ENV_TYP type_envlp,
	DRG_AFF_NAT nat_destinataire,
    PRS_FJH_TYP forfait_journalier,
	PRS_PPU_SEC code_secteur,
	PRS_REM_TAU taux_remb,
	PRS_REM_TYP type_remb"""

    union_sql_parts = []

    for i in range(1, 13):
        union_sql_parts.append(f"""
            SELECT {columns}
            FROM raw.A2024{i:02d}
        """)
    union_sql = "\nUNION ALL\n".join(union_sql_parts)
    print(union_sql)
    """
    try:
        con.execute(f
            CREATE OR REPLACE TABLE stg.transactions AS
            {union_sql};
        )
        print("[✔] Successfully created stg.transactions")
    except Exception as e:
        print(f"[✘] Failed to create stg.transactions: {e}")"""
    """
    for i in range(1, 13):
        try:
            con.execute(f"DROP TABLE IF EXISTS raw.A2024{i:02d};")
            print(f"[✔] Successfully deleted raw.A2024{i:02d}")
        except Exception as e:
            print(f"[✘] Failed to delete raw.A2024{i:02d}: {e}")"""

def transform(con):
    with open('type_donnee.sql', 'r') as file:
        sql_script = file.read()
        try:
            con.execute(sql_script)
            print(f"[✔] Successfully created stg.transactions_libelles")
        except Exception as e:
            print(f"[✘] Failed to create stg.transactions_libelles: {e}")
        
def dwh(con):
    with open('3. dwh.sql', 'r') as file:
        sql_script = file.read()
        try:
            con.execute(sql_script)
            print(f"[✔] Successfully created dwh")
        except Exception as e:
            print(f"[✘] Failed to create dwh: {e}")
    

In [None]:
with duckdb.connect("projet.db") as con:
    set_attr(con)
    # transform_csv(con)
    load_parquet(con)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202401.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202402.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202403.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202404.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202405.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202406.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202407.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202408.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202409.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202410.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202411.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully loaded A202412.parquet


In [5]:
with duckdb.connect("projet.db") as con:
    set_attr(con)
    stage(con)


            SELECT FLT_DEP_MNT mnt_depassement,
	FLT_PAI_MNT mnt_pay,
	FLT_REM_MNT mnt_rem,
	SOI_ANN ann_soin,
	SOI_MOI mois_soin,
	AGE_BEN_SNDS age,
	BEN_RES_REG region_ben,
	BEN_SEX_COD sexe,
	ASU_NAT nat_assurance,
    CPL_COD compl_acte,
	CPT_ENV_TYP type_envlp,
	DRG_AFF_NAT nat_destinataire,
    PRS_FJH_TYP forfait_journalier,
	PRS_PPU_SEC code_secteur,
	PRS_REM_TAU taux_remb,
	PRS_REM_TYP type_remb
            FROM raw.A202401
        
UNION ALL

            SELECT FLT_DEP_MNT mnt_depassement,
	FLT_PAI_MNT mnt_pay,
	FLT_REM_MNT mnt_rem,
	SOI_ANN ann_soin,
	SOI_MOI mois_soin,
	AGE_BEN_SNDS age,
	BEN_RES_REG region_ben,
	BEN_SEX_COD sexe,
	ASU_NAT nat_assurance,
    CPL_COD compl_acte,
	CPT_ENV_TYP type_envlp,
	DRG_AFF_NAT nat_destinataire,
    PRS_FJH_TYP forfait_journalier,
	PRS_PPU_SEC code_secteur,
	PRS_REM_TAU taux_remb,
	PRS_REM_TYP type_remb
            FROM raw.A202402
        
UNION ALL

            SELECT FLT_DEP_MNT mnt_depassement,
	FLT_PAI_MNT mnt_pay,
	FLT_REM_MNT mn

In [10]:
with duckdb.connect("projet.db") as con:
    set_attr(con)
    transform(con)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

[✔] Successfully created stg.transactions_libelles


In [None]:
with duckdb.connect("projet.db") as con:
    set_attr(con)
    dwh(con)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [3]:
con = duckdb.connect("projet.db")
con.close()