In [1]:
import pandas as pd
import os

In [2]:
# Define file paths
file0 = 'indicateur_section0.xlsx'
file1 = 'indicator_section1.xlsx'
file2 = 'indicator_section2.xlsx'


In [4]:
# Load the datasets into pandas DataFrames
try:
    df0 = pd.read_excel(file0)
    df1 = pd.read_excel(file1)
    df2 = pd.read_excel(file2)
    print("All three files loaded successfully!")
except FileNotFoundError as e:
    print(f"Error: {e}. Please ensure all CSV files are in the same directory as this script.")
    exit() # Exit if files are not found

All three files loaded successfully!


In [5]:
print("\n--- Initial Data Head ---")
print("\nFile 0:")
print(df0.head())
print("\nFile 1:")
print(df1.head())
print("\nFile 2:")
print(df2.head())



--- Initial Data Head ---

File 0:
                            Units     ID  \
0                               %  1100a   
1                               %  1100b   
2  volunteer hours / club / month  1110a   
3                          people  1110b   
4                          people  1120a   

                                           Indicator  \
0  % of commune WASH events with Rotarian partici...   
1  % of commune WASH events led by trained HANWAS...   
2  # of volunteer hours per month spent by Rotari...   
3  Number of people reached by the HANWASH Ambass...   
4       # of ambassadors who have completed training   

                                         Definitions  
0  Percentage of commune-level WASH events that h...  
1  Percentage of commune-level WASH events led by...  
2  Average number of volunteer hours per month sp...  
3  Total number of individuals reached through WA...  
4  Total number of HANWASH Ambassadors who have s...  

File 1:
   Code Indicator group

In [6]:
# --- Step 2: Pre-process and Standardize Columns ---
print("\n--- Step 2: Pre-processing Data ---")

# Rename columns for consistency
df1.rename(columns={
    'Code': 'Category_code',
    'Description': 'Category_code_description'
}, inplace=True)

df2.rename(columns={
    'Baseline': 'Baseline Value',
    'Current': 'current_value',
    'Target': 'Planned Value'
}, inplace=True)

# To avoid duplicate columns after merging (e.g., 'Units_x', 'Units_y'),
# we drop them from one of the dataframes before the merge.
# df0 has the primary definitions, so we'll keep its 'Indicator' and 'Units' columns.
df2 = df2.drop(columns=['Indicator', 'Units'])


--- Step 2: Pre-processing Data ---


In [7]:
print("Columns renamed and prepared for merging.")
print("\nColumns in df1:", df1.columns.tolist())
print("\nColumns in df2:", df2.columns.tolist())


Columns renamed and prepared for merging.

Columns in df1: ['Category_code', 'Indicator group', 'Category', 'Category_code_description', 'ID']

Columns in df2: ['ID', 'Baseline Value', 'current_value', 'Planned Value']


In [8]:
# --- Step 3: Merge the DataFrames ---
print("\n--- Step 3: Merging DataFrames ---")

# Merge the first two dataframes
merged_df = pd.merge(df0, df1, on='ID', how='outer')

# Merge the result with the third dataframe
final_df = pd.merge(merged_df, df2, on='ID', how='outer')

print("DataFrames merged successfully.")
print("Shape of the final DataFrame:", final_df.shape)
print(final_df.head())



--- Step 3: Merging DataFrames ---
DataFrames merged successfully.
Shape of the final DataFrame: (59, 11)
                            Units     ID  \
0                             NaN      1   
1                             NaN   1000   
2                               %  1100a   
3                               %  1100b   
4  volunteer hours / club / month  1110a   

                                           Indicator  \
0                                                NaN   
1                                                NaN   
2  % of commune WASH events with Rotarian partici...   
3  % of commune WASH events led by trained HANWAS...   
4  # of volunteer hours per month spent by Rotari...   

                                         Definitions  Category_code  \
0                                                NaN              1   
1                                                NaN           1000   
2  Percentage of commune-level WASH events that h...           1100   
3  Perc

In [9]:
# --- Step 4: Add New Empty Columns ---
print("\n--- Step 4: Adding New Columns ---")

final_df['Commune'] = None
final_df['Indicator Type'] = None

print("Added 'Commune' and 'Indicator Type' columns.")
print(final_df.head())



--- Step 4: Adding New Columns ---
Added 'Commune' and 'Indicator Type' columns.
                            Units     ID  \
0                             NaN      1   
1                             NaN   1000   
2                               %  1100a   
3                               %  1100b   
4  volunteer hours / club / month  1110a   

                                           Indicator  \
0                                                NaN   
1                                                NaN   
2  % of commune WASH events with Rotarian partici...   
3  % of commune WASH events led by trained HANWAS...   
4  # of volunteer hours per month spent by Rotari...   

                                         Definitions  Category_code  \
0                                                NaN              1   
1                                                NaN           1000   
2  Percentage of commune-level WASH events that h...           1100   
3  Percentage of commune-level W

In [10]:
# --- Step 5: Finalize and Export ---
print("\n--- Step 5: Finalizing and Exporting ---")

# Define the final column order
final_column_order = [
    'ID',
    'Indicator group',
    'Category',
    'Category_code',
    'Category_code_description',
    'Indicator',
    'Definitions',
    'Units',
    'Baseline Value',
    'Planned Value',
    'current_value',
    'Commune',
    'Indicator Type'
]

# Reorder the dataframe
final_df = final_df[final_column_order]

# Save to Excel
output_dir = 'output'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

output_filename = os.path.join(output_dir, 'merged_indicators.xlsx')
final_df.to_excel(output_filename, index=False)

print(f"\nFinal table successfully created and saved to '{output_filename}'")
print("\n--- Final Table Preview ---")
print(final_df.head())



--- Step 5: Finalizing and Exporting ---

Final table successfully created and saved to 'output/merged_indicators.xlsx'

--- Final Table Preview ---
      ID Indicator group              Category  Category_code  \
0      1      Mattson LM      Ultimate outcome              1   
1   1000      Mattson LM  Intermediate outcome           1000   
2  1100a      Mattson LM     Immediate outcome           1100   
3  1100b      Mattson LM     Immediate outcome           1100   
4  1110a      Mattson LM               Outputs           1110   

                           Category_code_description  \
0  Increased access to safe, sustainable and affo...   
1  Strengthened demand for and management of WASH...   
2  Increased civil society engagement in decision...   
3  Increased civil society engagement in decision...   
4  Support civil society engagement in the WASH s...   

                                           Indicator  \
0                                                NaN   
1         