In [2]:
from lorenz_utils_clean import (
    extract_names,
    count_donor_msgs_words,
    calculate_gini,
    get_lorenz_points,
    plot_lorenz_curve
)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from ipywidgets import interact, Dropdown


In [3]:
file_paths = [
    'Example_chat_data/WhatsApp_Chat_with_Donna_Patterson.txt',
    'Example_chat_data/WhatsApp_Chat_with_Dr._Heather_Hanson.txt',
    'Example_chat_data/WhatsApp_Chat_with_Jeffery_Hill_.txt',
    'Example_chat_data/WhatsApp_Chat_with_Michelle_Morris.txt',
    'Example_chat_data/WhatsApp_Chat_with_Sherry_Flores.txt'
]

In [4]:
# Finding common name across all chats
name_sets = [extract_names(path) for path in file_paths]
common_names = set.intersection(*name_sets)
donor_name = common_names.pop() if common_names else "No donor found"
print("Donor Name:", donor_name)


Donor Name: Kyle Adkins


In [5]:
message_counts = {}
word_counts = {}

for path in file_paths: 
    contact, msgs, words = count_donor_msgs_words(path, donor_name)
    message_counts[contact] = msgs
    word_counts[contact] = words

print("Message counts:", message_counts)
print("Word counts:", word_counts)


Message counts: {'Donna Patterson': 2494, 'Dr. Heather Hanson': 924, 'Jeffery Hill': 2144, 'Michelle Morris': 12231, 'Sherry Flores': 554}
Word counts: {'Donna Patterson': 10100, 'Dr. Heather Hanson': 3178, 'Jeffery Hill': 13029, 'Michelle Morris': 43111, 'Sherry Flores': 2583}


In [6]:
gini_messages = calculate_gini(message_counts)
gini_words = calculate_gini(word_counts)

print(f"Gini index for messages: {gini_messages:.3f}")
print(f"Gini index for words: {gini_words:.3f}")


Gini index for messages: 0.543
Gini index for words: 0.505


In [7]:
@interact(data_type=Dropdown(options=['Messages', 'Words'], description='Show:'))
def display_bar_chart(data_type):
    # Choose which data to show
    data = message_counts if data_type == 'Messages' else word_counts

    # bar chart
    plt.figure(figsize=(8, 5))
    plt.bar(data.keys(), data.values(), color='skyblue')
    plt.title(f"Donor's {data_type} Sent Per Contact")
    plt.ylabel(f"Number of {data_type}")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid(True)
    plt.show()


interactive(children=(Dropdown(description='Show:', options=('Messages', 'Words'), value='Messages'), Output()…

In [9]:
@interact(
    donor=Dropdown(options=["Kyle Adkins"], description="Donor:"),
    data_type=Dropdown(options=["Messages", "Words"], description="Data:")
)
def plot_lorenz_for_donor(donor, data_type):
    data = message_counts if data_type == "Messages" else word_counts
    x, y = get_lorenz_points(data)
    gini = calculate_gini(data)

    plt.figure(figsize=(6, 5))
    plt.plot(x * 100, y * 100, label="Lorenz Curve", color="blue")
    plt.plot([0, 100], [0, 100], linestyle="--", color="gray", label="Perfect Equality")
    plt.fill_between(x * 100, x * 100, y * 100, color="lightblue", alpha=0.3)
    plt.title(f"{donor} — {data_type} Lorenz Curve (Gini = {gini:.3f})")
    plt.xlabel("Cumulative % of Contacts")
    plt.ylabel(f"Cumulative % of {data_type}")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()


interactive(children=(Dropdown(description='Donor:', options=('Kyle Adkins',), value='Kyle Adkins'), Dropdown(…

### 🧾 Explanation of Lorenz Curve and Gini Index:

The Lorenz Curve is a visual way to show how equally or unequally something is distributed — in this case, how a donor sends messages or words across their contacts. On the graph:

* The x-axis shows the cumulative percentage of contacts (from least messaged to most).
* The y-axis shows the cumulative percentage of messages or words sent.
* The diagonal line represents *perfect equality* — where every contact would receive the exact same number of messages.

The blue curve (the Lorenz curve) shows the actual distribution. The further this curve bends away from the diagonal, the more unequal the distribution is.

The Gini Index is a number between 0 and 1 that quantifies this inequality:

* A Gini Index of 0 means perfect equality (everyone gets the same amount).
* A Gini Index close to 1 means extreme inequality (most messages go to just a few people).

For example, if the Gini index is 0.54, that means the donor is concentrating most of their communication with just a few contacts, while others receive much less. If it's closer to 0.2 or 0.3, it suggests a more balanced interaction.




The Lorenz curve shows how evenly the donor talks to their contacts. If the curve bends a lot away from the diagonal, it means they message a few people way more than others. The Gini index tells this as a number — the higher it is, the more unequal the messaging pattern.