In [1]:
import pandas as pd
import numpy as np
import os


In [2]:
# --- Step 1: Define Master Data & Load Sources ---
print("--- Step 1: Defining master lists and loading source data ---")

# The definitive master list of French indicators from the interim report
master_indicators_fr = [
    "Pourcentage d'événements WASH organisés dans les communes avec la participation de Rotariens haïtiens.",
    "Proportion (%) de participants aux réunions virtuelles sur les projets WASH qui sont des Rotariens haïtiens.",
    "Nombre de visites sur le terrain effectuées chaque trimestre par les Rotariens dans les communautés concernées.",
    "Pourcentage d'interventions approuvées mises en œuvre conformément aux plans d'action des communes.",
    "Nombre de communes disposant de plans d'action qui incluent explicitement chaque aspect W, S, H, WRM (eau, assainissement, hygiène, gestion des ressources en eau).",
    "Nombre d'événements de planification et de coordination WASH avec la DINEPA et les responsables municipaux",
    "Pourcentage de prestataires de services relevant de l'initiative HANWASH contrôlés conformément aux directives de la DINEPA/OREPA acceptées par le bureau du maire",
    "Pourcentage de prestataires de services d'intervention qui sont entièrement responsables conformément aux exigences de la DINEPA/OREPA et du maire",
    "Pourcentage d'utilisateurs satisfaits de la qualité, du caractère abordable et de la fiabilité des services WASH fournis",
    "Nombre moyen de jours nécessaires pour résoudre les pannes des infrastructures hydrauliques",
    "Nombre moyen de jours de service d'eau potable fournis au cours du mois par les prestataires de services",
    "Nombre de visites d'inspection sur place effectuées chaque année par l'OREPA dans les communes d'intervention",
    "Taux de recouvrement des redevances d'eau",
    "Pourcentage de la population des communes d’intervention ayant accès au moins à un service élémentaire d’alimentation en eau potable",
    "Pourcentage de la population des communes d’intervention ayant accès à un service d’alimentation en eau potable (SAEP) géré en toute sécurité",
    "Pourcentage de ménages dans les communes d’intervention disposant d’une source d’eau améliorée disponible en cas de besoin",
    "Pourcentage de ménages qui paient régulièrement l'eau",
    "Pourcentage de la population des communes d’intervention ayant accès au moins à un service élémentaire d’assainissement",
    "Pourcentage de la population des communes d’intervention ayant accès à un service d'assainissement géré en toute sécurité",
    "Pourcentage de la population des communes d’intervention où les excréments sont éliminés en toute sécurité in situ ou transportés et traités hors site",
    "Pourcentage de la population des communes d’intervention ayant accès au moins à un service élémentaire d’hygiène",
    "Pourcentage d’écoles disposant au moins de services élémentaires d'eau, d'assainissement et d'hygiène",
    "Pourcentage d’établissements de santé disposant au moins de services élémentaires d'eau, d'assainissement et d'hygiène",
    "Nombre d'écoles bénéficiant désormais d'un service d'eau potable de base",
    "Nombre d'établissements de santé bénéficiant désormais de services d'eau potable de base",
    "Nombre d'écoles bénéficiant désormais de services d'assainissement de base",
    "Nombre d'établissements de santé bénéficiant désormais de services d'assainissement de base",
    "Nombre d'écoles bénéficiant désormais de services d'hygiène de base",
    "Nombre d'établissements de santé bénéficiant désormais de services d'hygiène de base",
    "Montant cumulé des fonds engagés conformément aux valeurs fondamentales de HANWASH, sur la base d'un protocole d'accord signé avec HANWASH",
    "Pourcentage des fonds engagés qui ont été dépensés (cumulés), sur la base d'un protocole d'accord signé avec HANWASH",
    "Montant dépensé par des acteurs externes dans les zones couvertes par le projet HANWASH, sur la base d'un protocole d'accord signé avec HANWASH",
    "Montant dépensé par des acteurs externes en dehors des zones de projet, conformément aux valeurs fondamentales de HANWASH, soutenu par un protocole d'accord signé avec HANWASH.",
    "Pourcentage de partenaires de mise en œuvre dans les zones du programme HANWASH ayant signé l'accord-cadre de la DINEPA",
    "Nombre de membres du personnel de la DINEPA et des OREPA formés en tant que leaders",
    "Nombre de formations techniques dispensées dans les domaines prioritaires de la DINEPA",
    "Pourcentage des comités de points d'eau (CPE) et des opérateurs professionnels créés qui sont fonctionnels",
    "Pourcentage de communautés d’intervention qui ont atteint le statut de fin de la défécation à l’air libre (FDAL) en utilisant l’approche ATPC",
    "Pourcentage de la population des communes d’intervention qui paie pour le service de l’eau",
    "Pourcentage des écoles avec des points d’eau améliorés",
    "Pourcentage d’établissements de santé avec des points d’eau améliorés",
    "Pourcentage des écoles avec des latrines améliorées et séparées par sexe",
    "Pourcentage d’établissements de santé avec des latrines améliorées et séparées par sexe",
    "Pourcentage des écoles avec des services d’hygiène de base",
    "Pourcentage d’établissements de santé avec des services d’hygiène de base",
    "Pourcentage d’écoles avec un service d’eau potable géré en toute sécurité",
    "Pourcentage d’établissements de santé avec un service d’eau potable géré en toute sécurité"
]

# The definitive English translation list you provided
master_indicators_en = [
    "% of WASH events organized in the communes with the participation of Haitian Rotarians.",
    "Proportion (%) of participants in virtual meetings on WASH projects who are Haitian Rotarians.",
    "Number of field visits conducted each quarter by Rotarians in the relevant communities.",
    "% of approved interventions implemented in accordance with commune action plans.",
    "Number of communes with action plans that explicitly include each aspect W, S, H, WRM (water, sanitation, hygiene, water resources management).",
    "Number of WASH planning and coordination events with DINEPA and municipal officials.",
    "Percentage of service providers under the HANWASH initiative monitored in accordance with DINEPA/OREPA guidelines accepted by the mayor's office.",
    "Percentage of intervention service providers who are fully accountable in accordance with DINEPA/OREPA and mayoral requirements.",
    "% of users satisfied with the quality, affordability, and reliability of the WASH services provided.",
    "Average number of days required to resolve hydraulic infrastructure failures.",
    "Average number of days of drinking water service provided during the month by service providers.",
    "Number of on-site inspection visits conducted each year by OREPA in the intervention communes.",
    "Water fee collection rate.",
    "% of the population in intervention communes with access to at least a basic drinking water service.",
    "% of the population in intervention communes with access to a safely managed drinking water service (WSS).",
    "% of households in intervention communes with an improved water source available when needed.",
    "% of households that regularly pay for water.",
    "% of the population in intervention communes with access to at least a basic sanitation service.",
    "% of the population in intervention communes with access to a safely managed sanitation service.",
    "Percentage of the population in intervention communes where excreta is safely disposed of in-situ or transported and treated off-site.",
    "% of the population in intervention communes with access to at least a basic hygiene service.",
    "% of schools with at least a basic drinking water, sanitation, and hygiene service.",
    "% of healthcare facilities with at least a basic drinking water, sanitation, and hygiene service.",
    "Number of schools now benefiting from a basic drinking water service.",
    "Number of healthcare facilities now benefiting from basic drinking water services.",
    "Number of schools now benefiting from basic sanitation services.",
    "Number of healthcare facilities now benefiting from basic sanitation services.",
    "Number of schools now benefiting from basic hygiene services.",
    "Number of healthcare facilities now benefiting from basic hygiene services.",
    "Cumulative amount of funds committed in accordance with the fundamental values of HANWASH, based on a signed memorandum of understanding with HANWASH.",
    "% of committed funds that have been spent (cumulative), based on a signed memorandum of understanding with HANWASH.",
    "Amount spent by external actors in areas covered by the HANWASH project, based on a signed memorandum of understanding with HANWASH.",
    "Amount spent by external actors outside project areas, in accordance with the fundamental values of HANWASH, supported by a signed memorandum of understanding with HANWASH.",
    "Percentage of implementing partners in HANWASH program areas who have signed the DINEPA framework agreement.",
    "Number of DINEPA and OREPA staff members trained as leaders.",
    "Number of technical trainings provided in DINEPA priority areas.",
    "% of water point committees (CPEs) and professional operators created which are functional.",
    "% of intervention communities that have achieved Open Defecation Free (ODF) status using the CLTS approach.",
    "% of the population in intervention communes who pay for water service.",
    "% of schools with improved water points.",
    "% of healthcare facilities with improved water points.",
    "% of schools with improved and sex-separated latrines.",
    "% of healthcare facilities with improved and sex-separated latrines.",
    "% of schools with basic hygiene services.",
    "% of healthcare facilities with basic hygiene services.",
    "% of schools with a safely managed drinking water service.",
    "% of healthcare facilities with a safely managed drinking water service."
]

# Baseline values with French keys, manually extracted from the Word document's 'Tableau 11'
baseline_data_fr = {
    "Pourcentage de la population des communes d’intervention ayant accès au moins à un service élémentaire d’alimentation en eau potable": {"Cavaillon": 44, "Ferrier": 56, "Léogâne": 68, "Pignon": 35, "Terre-Neuve": 37},
    "Pourcentage de la population des communes d’intervention ayant accès à un service d’alimentation en eau potable (SAEP) géré en toute sécurité": {"Cavaillon": 18, "Ferrier": 1, "Léogâne": 10, "Pignon": 2, "Terre-Neuve": 15},
    "Pourcentage de ménages dans les communes d’intervention disposant d’une source d’eau améliorée disponible en cas de besoin": {"Cavaillon": 89, "Ferrier": 98, "Léogâne": 96, "Pignon": 95, "Terre-Neuve": 96},
    "Pourcentage de ménages qui paient régulièrement l'eau": {"Cavaillon": 43, "Ferrier": 2, "Léogâne": 3, "Pignon": 2, "Terre-Neuve": 25},
    "Pourcentage de la population des communes d’intervention ayant accès au moins à un service élémentaire d’assainissement": {"Cavaillon": 26, "Ferrier": 31, "Léogâne": 49, "Pignon": 35, "Terre-Neuve": 24},
    "Pourcentage de la population des communes d’intervention ayant accès à un service d'assainissement géré en toute sécurité": {"Cavaillon": 11, "Ferrier": 14, "Léogâne": 15, "Pignon": 8, "Terre-Neuve": 7},
    "Pourcentage de la population des communes d’intervention où les excréments sont éliminés en toute sécurité in situ ou transportés et traités hors site": {"Cavaillon": 8, "Ferrier": 13, "Léogâne": 13, "Pignon": 7, "Terre-Neuve": 6},
    "Pourcentage de la population des communes d’intervention ayant accès au moins à un service élémentaire d’hygiène": {"Cavaillon": 29, "Ferrier": 56, "Léogâne": 60, "Pignon": 49, "Terre-Neuve": 42},
    "Pourcentage d’écoles disposant au moins de services élémentaires d'eau, d'assainissement et d'hygiène": {"Cavaillon": 20, "Ferrier": 2, "Léogâne": 16, "Pignon": 12, "Terre-Neuve": 13},
    "Pourcentage d’établissements de santé disposant au moins de services élémentaires d'eau, d'assainissement et d'hygiène": {"Cavaillon": 80, "Ferrier": 25, "Léogâne": 63, "Pignon": 33, "Terre-Neuve": 67}
}

# Known variations between the new master list and the existing source data
known_variations_map = {
    "% of WASH events organized in the communes with the participation of Haitian Rotarians.": "% of commune WASH events with Rotarian participation",
    "% of approved interventions implemented in accordance with commune action plans.": "% of approved interventions implemented in alignment with Commune Action Plans",
    "% of the population in intervention communes with access to at least a basic drinking water service.": "% of population in intervention communes with at least basic drinking water service",
    "% of households that regularly pay for water.": "% of households that pay for water on a regular basis",
    "% of the population in intervention communes with access to at least a basic sanitation service.": "% of population in intervention communes with at least basic sanitation service",
    "% of the population in intervention communes with access to at least a basic hygiene service.": "% of population in intervention communes with at least basic hygiene service",
    "% of schools with at least a basic drinking water, sanitation, and hygiene service.": "% of schools with at least basic drinking water, sanitation, and hygiene services",
    "% of healthcare facilities with at least a basic drinking water, sanitation, and hygiene service.": "% of healthcare facilities with at least basic drinking water, sanitation, and hygiene services",
    "Cumulative amount of funds committed in accordance with the fundamental values of HANWASH, based on a signed memorandum of understanding with HANWASH.": "Cumulative amount of money committed in alignment with HANWASH Core Values",
    "% of water point committees (CPEs) and professional operators created which are functional.": "% of water point committees (CPEs) and Professional Operators created which are functional",
    "% of intervention communities that have achieved Open Defecation Free (ODF) status using the CLTS approach.": "% of intervention communities that achieved Open Defecation Free (ODF) status using the CLTS approach",
    "% of the population in intervention communes who pay for water service.": "% of population in intervention communities paying for water service"
}

--- Step 1: Defining master lists and loading source data ---


In [3]:
try:
    df = pd.read_excel('./output/classified_indicators.xlsx')
    print("Successfully loaded 'classified_indicators.xlsx'.")
except FileNotFoundError:
    print("Error: 'classified_indicators.xlsx' not found. Please check the file path.")
    exit()


Successfully loaded 'classified_indicators.xlsx'.


In [4]:
source_indicators_en = set(df['Indicator'].dropna())
# Create a FR -> EN mapping dictionary for translation
fr_to_en_map = dict(zip(master_indicators_fr, master_indicators_en))

# Create a mapping from English Indicator text to its ID for easy lookup
indicator_id_map = pd.Series(df.ID.values, index=df.Indicator).to_dict()

# Create a new baseline dictionary with English keys
baseline_data_en = {fr_to_en_map.get(fr_key): value for fr_key, value in baseline_data_fr.items() if fr_to_en_map.get(fr_key)}
print("Baseline data re-keyed to English indicators.")


Baseline data re-keyed to English indicators.


In [5]:
# --- Step 2: Generate Reports ---
print("\n--- Step 2: Generating imbalance and baseline reports ---")
imbalance_report_data = []
baseline_report_data = []
evaluation_indicators_found = []
communes = ["Cavaillon", "Ferrier", "Léogâne", "Pignon", "Terre-Neuve"]

for indicator_fr, indicator_en in zip(master_indicators_fr, master_indicators_en):
    is_found = "No"
    indicator_found_text = None
    indicator_id = None
    
    # Check for perfect match or a known variation
    if indicator_en in source_indicators_en:
        is_found = "Yes"
        indicator_found_text = indicator_en
    elif indicator_en in known_variations_map and known_variations_map[indicator_en] in source_indicators_en:
        is_found = "Yes"
        indicator_found_text = known_variations_map[indicator_en]
    
    if is_found == "Yes":
        evaluation_indicators_found.append(indicator_found_text)
        indicator_id = indicator_id_map.get(indicator_found_text)
        
    imbalance_report_data.append({
        'pdf_presentation_indicator': indicator_fr,
        'is_found': is_found,
        'indicator_found': indicator_found_text,
        'indicator_id': indicator_id
    })
    
    # Use English indicator for the baseline report
    baseline_row = {'Indicator_ID': indicator_id, 'Indicator': indicator_en} 
    commune_values = baseline_data_en.get(indicator_en, {})
    for commune in communes:
        baseline_row[commune] = commune_values.get(commune, None)
    baseline_report_data.append(baseline_row)

imbalance_df = pd.DataFrame(imbalance_report_data)
baseline_df = pd.DataFrame(baseline_report_data)
print("Report dataframes created successfully.")


--- Step 2: Generating imbalance and baseline reports ---
Report dataframes created successfully.


In [6]:
# --- Step 3: Classify Indicators ---
print("\n--- Step 3: Classifying indicators based on the interim report ---")
evaluation_indicators = sorted(list(set(evaluation_indicators_found)))
print(f"Found {len(evaluation_indicators)} matching indicators to classify as 'Evaluation'.")

# First, set all existing indicators to 'Monitoring Indicator' by default.
# This ensures that any indicator previously marked 'Evaluation' but not in the new list
# is correctly re-classified as 'Monitoring'.
df.loc[df['Indicator'].notna(), 'Indicator Type'] = 'Monitoring Indicator'

# Then, upgrade only the indicators found in our new evaluation list.
df.loc[df['Indicator'].isin(evaluation_indicators), 'Indicator Type'] = 'Evaluation Indicator'

if 'Commune' in df.columns:
    df = df.drop(columns=['Commune'])

print("Classification complete.")




--- Step 3: Classifying indicators based on the interim report ---
Found 6 matching indicators to classify as 'Evaluation'.
Classification complete.


In [7]:
# --- Step 4: Save All Output Files ---
print("\n--- Step 4: Saving all output files to the 'output' directory ---")
output_dir = 'output'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

imbalance_df.to_excel(os.path.join(output_dir, 'matching_imbalances.xlsx'), index=False)
baseline_df.to_excel(os.path.join(output_dir, 'baseline_values_by_commune.xlsx'), index=False)
df.to_excel(os.path.join(output_dir, 'classified_indicators_final.xlsx'), index=False)
print("All files saved successfully.")



--- Step 4: Saving all output files to the 'output' directory ---
All files saved successfully.


In [8]:

# --- Step 5: Final Verification ---
print("\n--- Step 5: Final Verification ---")
print("New value counts for 'Indicator Type':")
print(df['Indicator Type'].value_counts(dropna=False))


--- Step 5: Final Verification ---
New value counts for 'Indicator Type':
Indicator Type
Monitoring Indicator    49
Evaluation Indicator     6
NaN                      4
Name: count, dtype: int64
