# 0. Packages

!pip install pandas

In [17]:
import pandas as pd
import os

# 1. Load and cleaned EDF tables

In [18]:
# Load EDF raw tables

raw_EDF_folder = "../data/raw/EDF/"

dfs = {
    "remuneration": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-remuneration-et-promotions.csv", sep=";"),
    "formation": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-formation.csv", sep=";"),
    "droit": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-droit-du-travail.csv", sep=";"),
    "condition": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-autres-conditions-de-travail.csv", sep=";"),
    "absenteisme": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-absenteisme.csv", sep=";"),
    "effectif": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-effectifs-et-repartition-par-age-statut-et-sexe.csv", sep=";"),
    "handicap": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-salaries-en-situation-de-handicap.csv", sep=";"),
    "exterieur": pd.read_csv(raw_EDF_folder + "bilan-social-d-edf-sa-travailleurs-exterieurs.csv", sep=";")
}

In [19]:
# Mapping df and cols to drop

mapping_cols_to_drop = {
    "remuneration": ["Spatial perimeter", "Indicator", "Type of contract", "Employee category", 
                     "M3E classification", "Gender", "Unit"],
    "formation": ["Spatial perimeter", "Indicator", "Employee category", 
                  "M3E classification", "Gender", "Unit"],
    "droit": ["Spatial perimeter", "Indicator", "Type of legal action", "Unit"],
    "condition": ["Spatial perimeter", "Indicator", "Type of contract", "Time range",
                  "Employee category", "Gender", "Unit"],
    "absenteisme": ["Spatial perimeter", "Indicator", "Type of contract", 
                    "Employee category", "Gender", "Unit"],
    "effectif": ['Spatial perimeter','Indicator', 'Type of contract', 'Employee category', 
                 'Employee subcategory', 'Gender', 'M3E classification','Nationality', 
                 'Seniority', 'Age bracket', 'Unit'],
    "handicap": ['Spatial perimeter','Indicator', 'Type of contract', 
                 'Employee category', 'Gender', 'Unit'],
    "exterieur": ['Spatial perimeter','Indicator', 
                  'Employee category', 'Gender', 'Unit']
}



In [20]:
def drop_columns(dfs, mapping_cols_to_drop):
    """
    Supprime les colonnes listées dans mapping_cols_to_drop pour chaque df
    args:
        dfs (dict): dictionnaire contenant les df
        mapping_cols_to_drop (dict): dictionnaire qui associe les colonnes à supprimer pour chaque df

    return:
        dict: dictionnaire des dfs avec les colonnes supprimées
    """
    for df_name, df in dfs.items():
        cols_to_drop = mapping_cols_to_drop[df_name]
        dfs[df_name] = df.drop(columns=cols_to_drop)
        
    return dfs 

In [21]:
dfs_cleaned = drop_columns(dfs, mapping_cols_to_drop)

In [22]:
def save_dfs(dfs, output_folder):

    for df_name, df in dfs.items():
        output_path = os.path.join(output_folder, f"{df_name}.csv")
        df.to_csv(output_path, index=False, sep=";")

In [24]:
cleaned_EDF_folder = "../data/cleaned/EDF/"

save_dfs(dfs_cleaned, cleaned_EDF_folder)