In [3]:
import sys
import os

# Adds the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# WhatsApp Donation Dashboard

This notebook visualizes real WhatsApp donation data. It allows you to explore how messages and words are distributed across donors and conversations using **Gini Index** and **Lorenz Curves**.

We will use interactive widgets to select donors and metrics.

In [4]:
# Import necessary libraries
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
import matplotlib.pyplot as plt

# Import utility functions for data and plots
from real_data_utils import donations, messages, calculate_gini, plot_lorenz_curve, plot_burstiness_dashboard

## Donor Selection Functions

These functions allow us to:
- Get a list of unique donor IDs.
- Create a dropdown/combobox to select donors.
- Filter messages for a selected donor.

In [5]:
# Function to get sorted list of donor IDs
def get_donor_ids():
    return sorted(donations['donor_id'].unique())

# Function to create a donor selector widget
def create_donor_selector():
    return widgets.Combobox(
        options=get_donor_ids(),
        placeholder='Type or select donor_id',
        description='Donor:',
        ensure_option=False,
        layout=widgets.Layout(width='300px')
    )

# Function to get messages for a given donor
def get_donor_messages(donor):
    return messages[messages['donation_id'].isin(
        donations[donations['donor_id'] == donor]['donation_id']
    )]

## Gini & Lorenz Dashboard

This section creates an interactive dashboard to analyze message or word distributions for a donor.  

- **Metric options**: Messages or Words  
- **Outputs**: 
  - Bar chart showing counts per conversation
  - Lorenz curve
  - Summary with Gini Index interpretation

In [6]:
# Function to display the interactive Gini & Lorenz dashboard
def show_gini_dashboard():
    donor_select = create_donor_selector()
    metric_select = widgets.RadioButtons(
        options=["Messages", "Words"],
        description="Metric:",
        layout=widgets.Layout(width='200px')
    )
    submit_btn = widgets.Button(description="Load Donor", button_style='success')
    bar_output, lorenz_output, summary_output = widgets.Output(), widgets.Output(), widgets.Output()

    def update_dashboard(change=None):
        bar_output.clear_output()
        lorenz_output.clear_output()
        summary_output.clear_output()

        donor = donor_select.value
        metric = metric_select.value
        donor_ids = get_donor_ids()

        if donor not in donor_ids:
            with summary_output:
                display(HTML(f"<b style='color:red;'> Donor '{donor}' not found in WhatsApp data.</b>"))
            return

        donor_msgs = get_donor_messages(donor)

        if metric == "Messages":
            counts = donor_msgs[donor_msgs['sender_id'] == donor].groupby('conversation_id').size().to_dict()
        else:
            counts = donor_msgs[donor_msgs['sender_id'] == donor].groupby('conversation_id')['word_count'].sum().to_dict()

        gini = calculate_gini(counts)

        with bar_output:
            counts_series = pd.Series(counts).sort_values(ascending=False)
            short_labels = [str(x)[:6] + "..." if len(str(x)) > 6 else str(x) for x in counts_series.index]
            counts_series.plot(kind='bar', figsize=(max(6, len(counts_series) * 0.6), 5), color='blue')
            plt.title(f"{metric} Count per Contact")
            plt.xticks(range(len(short_labels)), short_labels, rotation=45, ha='right')
            plt.tight_layout()
            plt.grid(True)
            plt.show()

        with lorenz_output:
            plot_lorenz_curve(counts, title=f"{metric} Distribution")

        with summary_output:
            display(HTML(
                f"<div style='background:#f5f5f5; padding:15px; border-radius:10px; width:100%; height:100%;'>"
                f"<h4 style='margin-top:0;'>Lorenz Curve & Gini Index Summary</h4>"
                f"<p>This curve shows how unequally messages/words are distributed across contacts. If the curve is close to the diagonal, the distribution is equal. If it bows below the diagonal, it indicates inequality.</p>"
                f"<p>A Gini index of {gini:.3f} means: "
                f"{'High inequality (few contacts dominate)' if gini > 0.5 else 'Relatively balanced distribution'}.</p>"
                f"</div>"
            ))

    submit_btn.on_click(update_dashboard)
    metric_select.observe(update_dashboard, names='value')

    display(widgets.VBox([
        widgets.HTML("<h2>WhatsApp Donation Dashboard</h2><p>Visualize donor message distributions using real WhatsApp data.</p>"),
        widgets.HBox([donor_select, submit_btn, metric_select], layout=widgets.Layout(gap='20px')),
        widgets.HBox([bar_output, lorenz_output, summary_output], layout=widgets.Layout(gap='30px'))
    ]))

### Launch the Dashboard

Run the function below to display the interactive dashboard and start exploring donor data.

In [7]:
show_gini_dashboard()

VBox(children=(HTML(value='<h2>WhatsApp Donation Dashboard</h2><p>Visualize donor message distributions using …