<a href="https://colab.research.google.com/github/InesLesire2/usaid-wssh-tool-3/blob/main/scripts/data-rank-collab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
# @title Prologue
# 1. Import Dependencies
import geopandas as gpd
import pandas as pd
import requests
import json
from io import StringIO
import os
from sklearn.preprocessing import MinMaxScaler
import ipywidgets as widgets
import matplotlib.pyplot as plt
from IPython.display import display, clear_output, HTML

# 2. Clone Repository
if os.path.exists("usaid-wssh-tool-3"):
  !rm -rf usaid-wssh-tool-3
!git clone https://github.com/akvo/usaid-wssh-tool-3.git

geojson_path = "usaid-wssh-tool-3/data/output"
file_list = [f for f in os.listdir(geojson_path) if os.path.isfile(os.path.join(geojson_path, f))]
file_list = list(filter(lambda x: x.endswith(".geojson"), file_list))
country_list = list(map(lambda x: x.replace(".geojson", ""), file_list))
country_list.sort()
country_list = [c.capitalize() for c in country_list]

# 3. Column Definitions
column_definitions = {
    'drr': 'Drought Risk',
    'rfr': 'Riverine Flood Risk',
    'bws' : 'Base Water Stress',
    'Open_defecation_estimates_mean': 'Open Defecation',
    'No_Improved_water_premise_estimates_mean': 'No Improve Water Premises',
    'No_basic_water_estimates_mean': 'No Basic Water',
    'no_basic_sanitation_estimates_mean': 'No Basic Sanitation'
}
column_list = list(column_definitions.keys())


Cloning into 'usaid-wssh-tool-3'...
remote: Enumerating objects: 185, done.[K
remote: Counting objects: 100% (185/185), done.[K
remote: Compressing objects: 100% (165/165), done.[K
remote: Total 185 (delta 88), reused 50 (delta 12), pack-reused 0 (from 0)[K
Receiving objects: 100% (185/185), 26.38 MiB | 16.38 MiB/s, done.
Resolving deltas: 100% (88/88), done.


In [30]:
# @title Update weights and normalisation strategy
weight_mapping = {'core': 0.7, 'secondary': 0.3}
scaler = MinMaxScaler()

In [31]:
# @title Progam setup
indicator_config = {
    "Climate-resilient WASH": [{
        "indicator": "rfr",
        "label":"Flood Risk",
        "weight": "core",
        "direction": "positive"
    },{
        "indicator": "No_Improved_water_premise_estimates_mean",
        "label":"No Improve Water Premises",
        "weight": "core",
        "direction": "positive"
    },{
        "indicator": "Open_defecation_estimates_mean",
        "label":"Open Defecation",
        "weight": "core",
        "direction": "positive"
    },{
        "indicator":"drr",
        "label":"Drought Risk",
        "weight": "core",
        "direction": "positive"
    }],
    "Water Program": [{
        "indicator": "No_Improved_water_premise_estimates_mean",
        "label":"No Improve Water Premises",
        "weight": "core",
        "direction": "positive"
    },{
        "indicator":"drr",
        "label":"Drought Risk",
        "weight": "secondary",
        "direction": "positive"
    }]
}

In [32]:
# @title Precalculate Data

def pre_calculate_data():
    countries_data = {}
    for country in country_list:
        geojson = gpd.read_file(f"{geojson_path}/{country.lower()}.geojson")
        # Initialize the Index
        missing_columns = [col for col in column_list if col not in geojson.columns]
        for col in missing_columns:
            print(f"Adding missing column: {col}")
            geojson[col] = 0
        geojson['index'] = 0
        countries_data[country] = geojson
    return countries_data

countries_data = pre_calculate_data()

In [33]:
# @title Init Map Functions

# Apply Directionality
def apply_directionality(data, selected_index):
    indicator_names = [indicator['indicator'] for indicator in selected_index]
    data = data.copy()
    data[indicator_names] = scaler.fit_transform(data[indicator_names])
    directed_data = {}
    for idx, indicator in enumerate(selected_index):
        indicator_name = indicator['indicator']
        if indicator['direction'] == 'positive':
            directed_data[indicator_name] = data[indicator_name]  # Positive direction, keep the value as is
        else:
            directed_data[indicator_name] = 1 - data[indicator_name]  # Negative direction, invert the value (1 - value)
    return directed_data

# Function to calculate the weighted index and show on the map
def calculate_index(countries, selected_index):
    merged_geojson = []
    top_five = []
    bottom_five = []
    final_top_five = []
    final_bottom_five = []

    for country in countries:
        geojson = countries_data[country].copy()
        directed_data = apply_directionality(geojson, selected_index)
        for i, indicator in enumerate(selected_index):
            weight = weight_mapping[indicator['weight']]
            geojson['index'] += directed_data[indicator['indicator']] * weight
            # changes in ranking from each additional indicator
            changes = i + 1
            geojson[f'index_{changes}'] = geojson['index'] / changes
        if geojson['index'].sum() > 0:
            geojson['index'] = geojson['index'] / len(selected_index)
        geojson['quartile'] = pd.qcut(geojson['index'], 4, labels=False, duplicates='drop') + 1
        merged_geojson.append(geojson)
        # rank each additional indicators
        for i, indicator in enumerate(selected_index):
            changes = i + 1
            geojson[f'Rank_{changes}'] = geojson[f'index_{changes}'].rank(ascending=False).astype(int)
        top_five.append(geojson.nlargest(5, 'index_1'))
        bottom_five.append(geojson.nsmallest(5, 'index_1'))

    additional_index = []
    for i in range(len(selected_index)):
        additional_index.append(f"Rank_{i + 1}")
        additional_index.append(f"index_{i + 1}")

    real_index_names = []
    added_index = []
    for indicator in selected_index:
        added_index.append(indicator['label'])
        index_name = "+".join(added_index)
        real_index_names.append(f"[Rank]+{index_name}")
        real_index_names.append(f"[Index]+{index_name}")


    # Add Top 5 Country
    top_five = pd.concat(top_five)
    top_five = top_five.sort_values(by='index_1', ascending=False)
    top_five = top_five.reset_index(drop=True)
    top_five = top_five[['ADM1_EN','ADM2_EN'] + additional_index]
    top_five.columns = ['State', 'District'] + real_index_names

    # Add Bottom 5 Country
    bottom_five = pd.concat(bottom_five)
    bottom_five = bottom_five.sort_values(by='index_1', ascending=True)
    bottom_five = bottom_five.reset_index(drop=True)
    bottom_five = bottom_five[['ADM1_EN','ADM2_EN'] + additional_index]
    bottom_five.columns = ['State', 'District'] + real_index_names

# Now add final Top and Bottom 5 after all parameters are included
    # Top 5 districts when all indicators are included
    final_top_five = pd.concat(merged_geojson)
    final_top_five = final_top_five.sort_values(by='index', ascending=False)
    final_top_five = final_top_five.reset_index(drop=True)
    final_top_five = final_top_five[['ADM1_EN','ADM2_EN'] + additional_index]
    final_top_five.columns = ['State', 'District'] + real_index_names

    # Bottom 5 districts when all indicators are included
    final_bottom_five = pd.concat(merged_geojson)
    final_bottom_five = final_bottom_five.sort_values(by='index', ascending=True)
    final_bottom_five = final_bottom_five.reset_index(drop=True)
    final_bottom_five = final_bottom_five[['ADM1_EN','ADM2_EN'] + additional_index]
    final_bottom_five.columns = ['State', 'District'] + real_index_names

    rank_columns = list(filter(lambda x: x.startswith('[Rank]'), real_index_names))
    return top_five, bottom_five, final_top_five, final_bottom_five, rank_columns

In [34]:
# @title Helper Functions
def modify_html(title, table):
    html = "<h3>{}</h3><hr>".format(title)
    html += table.to_html(index=False)
    html = html.replace("<table", "<table class='table is-bordered is-narrow is-hoverable is-fullwidth'")
    # make table header for State and District column same width
    html = html.replace("<th>State</th>", "<th style='width:200px;vertical-align:top;'>State</th>")
    html = html.replace("<th>District</th>", "<th style='width:200px;vertical-align:top;'>District</th>")
    # make all th vertical align
    html = html.replace("<th>", "<th style='vertical-align:top;'>")
    # replace + with br if th contains +
    html = html.replace("+", "<br>+")
    return html

def create_plot(df, value_columns, is_float=False, title=""):
    plt.figure(figsize=(20, 4))
    # add plot title
    plt.title(title)

    districts = df['District'].unique()

    # Get line colors from the legend
    lines = []
    for district in districts:
        district_data = df[df['District'] == district]
        line, = plt.plot(district_data[value_columns].values[0], value_columns, label=district)
        lines.append(line)

    # Add dots and rank labels with matching colors
    for i, district in enumerate(districts):
        district_data = df[df['District'] == district]
        values = district_data[value_columns].values[0]
        color = lines[i].get_color()  # Get color from the line

        for j, rank in enumerate(values):
            plt.scatter(rank, value_columns[j], marker='o', color=color)  # Use line color for dots
            if is_float:
                plt.text(rank, value_columns[j], str(float(round(rank, 2))), color='black')
            else:
                plt.text(rank + 0.1, value_columns[j], str(int(rank)), color='black')
    # make plt trasparent background

    plt.legend(loc='best')
    plt.show()

In [35]:
# @title Select Countries & Indicators
# 1. Init Container
output_area = widgets.Output()  # For buttons and widgets
top_five_area = widgets.Output()
top_five_area.clear_output(wait=True)
bottom_five_area = widgets.Output()
bottom_five_area.clear_output(wait=True)
final_top_five_area = widgets.Output()  # Output area for final top 5
final_bottom_five_area = widgets.Output()  # Output area for final bottom 5

display(HTML('<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@1.0.2/css/bulma.min.css">'))
display(HTML("<style>html,body,img {width: 100% !important;} th, td {text-align: right !important;} th {vertical-align: top !important;} h1, h2, h3 {text-align: center; font-size: 20px; margin-top: 20px;}</style>"))

# 2. Country selection widget
country_selection = widgets.Dropdown(
    options=[c.capitalize() for c in country_list],
    description='Country',
    value=country_list[0],
    disabled=False,
    layout=widgets.Layout(width='250px')  # Set custom width and height
)

# 3. Indicator Selection
indicator_selection = widgets.Dropdown(
    options=list(indicator_config),
    description='Indicator',
    value=list(indicator_config)[0],
    disabled=False,
    layout=widgets.Layout(width='300px')  # Set custom width and height
)

# 5. Function to handle the button click event for calculating the index
def on_calculate_button_click(event):
    with top_five_area:
        clear_output(wait=True)
    with bottom_five_area:
        clear_output(wait=True)
    with final_top_five_area:
        clear_output(wait=True)
    with final_bottom_five_area:
        clear_output(wait=True)

    selected_indicators = indicator_config[indicator_selection.value]
    selected_countries = country_selection.value
    if not isinstance(selected_countries, list):
        selected_countries = [selected_countries]

    # Collect core/secondary and directionality choices
    top_five, bottom_five, final_top_five, final_bottom_five, rank_columns = calculate_index(selected_countries, selected_indicators)
    index_columns = [col.replace('[Rank]','[Index]') for col in rank_columns]
    with top_five_area:
        display(HTML(modify_html("Top 5 Districts", top_five)))
        create_plot(top_five, rank_columns, False, "Rank")
        create_plot(top_five, index_columns, True, "Index")


    with bottom_five_area:
        display(HTML(modify_html("Bottom 5 Districts", bottom_five)))
        create_plot(bottom_five, rank_columns, False, "Rank")
        create_plot(bottom_five, index_columns, True, "Index")

    # Display final top 5 districts (after all parameters)
    with final_top_five_area:
        display(HTML(modify_html("Final Top 5 Districts", final_top_five)))
        #create_plot(final_top_five, rank_columns, False, "Rank")
        #create_plot(final_top_five, index_columns, True, "Index")

    # Display final bottom 5 districts (after all parameters)
    with final_bottom_five_area:
        display(HTML(modify_html("Final Bottom 5 Districts", final_bottom_five)))
        #create_plot(final_bottom_five, rank_columns, False, "Rank")
        #create_plot(final_bottom_five, index_columns, True, "Index")



button = widgets.Button(description="Calculate")
button.on_click(on_calculate_button_click)

display(HTML("<br>"))
display(widgets.HBox([country_selection, indicator_selection, button]))
display(HTML("<br>"))
display(top_five_area)
display(bottom_five_area)
display(final_top_five_area)  # Display final top 5
display(final_bottom_five_area)  # Display final bottom 5
# disable button
# button.disabled = True

HBox(children=(Dropdown(description='Country', layout=Layout(width='250px'), options=('Madagascar', 'Rwanda', …

Output()

Output()

Output()

Output()