# Transformation des données des tables raw_data vers la table indicateur pour le schéma public

In [None]:
# import des librairies nécessaires pour l'exploration
import pandas as pd
from pandas_profiling import ProfileReport

from src.postgresql_connector import PostgreSQLConnector

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 350)

## Lire des données d'une table

In [None]:
def read_table(table: str, schema = 'public') -> pd.DataFrame:
    pg_connector = PostgreSQLConnector()
    table = pd.read_sql_query(
        sql=f'''
                SELECT *
                FROM {schema}.{table}
                ''',
        con=pg_connector.connection
    )
    return table

In [None]:
def execute_sql(sql_request: str) -> pd.DataFrame:
    pg_connector = PostgreSQLConnector()
    table = pd.read_sql_query(
        sql=sql_request,
        con=pg_connector.connection
    )
    return table

### Lecture des tables 

In [None]:
metadata_indicateur = read_table('metadata_indicateur', 'raw_data')
fact_progress_indicateur = read_table('fact_progress_indicateur', 'raw_data')
dim_structures = read_table('dim_structures', 'raw_data')
dim_tree_nodes = read_table('dim_tree_nodes', 'raw_data')
type_indicateur = read_table('indicateur_type', 'raw_data')

In [None]:
prof = ProfileReport(metadata_indicateur)
prof.to_file(output_file='rapport_metadata_indicateur.html')

### Requête SQL

#### Dfakto indicateurs

In [None]:
sql_request_dfakto = f''' 
    SELECT *
    FROM raw_data.fact_progress_indicateur fpi 
        JOIN raw_data.dim_tree_nodes dtn ON fpi.tree_node_id=dtn.tree_node_id
        JOIN raw_data.dim_structures ds ON dtn.structure_id=ds.structure_id
    WHERE ds.structure_name='Réforme'
    ORDER BY effect_id, date_valeur_cible DESC NULLS LAST
'''

In [None]:
dfakto_indicateurs = execute_sql(sql_request_dfakto)
dfakto_indicateurs = dfakto_indicateurs.T.drop_duplicates().T
print(dfakto_indicateurs.shape)
print(dfakto_indicateurs.effect_id.nunique())

In [None]:
dfakto_indicateurs.effect_id.value_counts()

#### Public indicateurs

In [None]:
sql_request_public_indicateur_with_latest_date_valeur_cible = f'''
    SELECT DISTINCT ON (effect_id) 
        indic_id as id,
        indic_nom as nom,
        indic_parent_ch as chantier_id,
        valeur_cible as objectif_valeur_cible,
        bounded_progress as objectif_taux_avancement, 
        to_char(extract(year from date_valeur_cible), '9999') as objectif_date_valeur_cible,
        indic_type as type_id,
        indic_type_name as type_nom,
        indic_is_baro as est_barometre,
        indic_is_phare as est_phare
    FROM raw_data.fact_progress_indicateur fpi 
        JOIN raw_data.dim_tree_nodes dtn ON fpi.tree_node_id=dtn.tree_node_id
        JOIN raw_data.dim_structures ds ON dtn.structure_id=ds.structure_id
        INNER JOIN raw_data.metadata_indicateur mi on mi.indic_nom=fpi.effect_id
        LEFT JOIN raw_data.indicateur_type it on it.indic_type_id=mi.indic_type
    WHERE ds.structure_name='Réforme'
    ORDER BY effect_id, date_valeur_cible DESC NULLS LAST
'''

In [None]:
sql_request_public_indicateur = f'''
    SELECT DISTINCT ON (effect_id) 
        indic_id as id,
        indic_nom as nom,
        indic_parent_ch as chantier_id,
        valeur_initiale,
        date_valeur_initiale, 
        valeur_actuelle,
        date_valeur_actuelle, 
        valeur_cible as objectif_valeur_cible,
        bounded_progress as objectif_taux_avancement, 
        to_char(extract(year from date_valeur_cible), '9999') as objectif_date_valeur_cible,
        indic_type as type_id,
        indic_type_name as type_nom,
        indic_is_baro as est_barometre,
        indic_is_phare as est_phare
    FROM raw_data.fact_progress_indicateur fpi 
        JOIN raw_data.dim_tree_nodes dtn ON fpi.tree_node_id=dtn.tree_node_id
        JOIN raw_data.dim_structures ds ON dtn.structure_id=ds.structure_id
        INNER JOIN raw_data.metadata_indicateur mi on mi.indic_nom=fpi.effect_id
        LEFT JOIN raw_data.indicateur_type it on it.indic_type_id=mi.indic_type
    WHERE ds.structure_name='Réforme'
    ORDER BY effect_id, period_id DESC
'''

In [None]:
indicateurs = execute_sql(sql_request_public_indicateur)
indicateurs.shape

In [None]:
indicateurs.describe(include='all')

In [None]:
indicateurs