In [8]:
from __future__ import division
import numpy as np
import scipy as sc
from itertools import product
import time
import matplotlib.pyplot as plt
import PIL
from numpy import log10
import random
from math import factorial
from scipy.stats import linregress, gaussian_kde, skew
from scipy import stats
from scipy.spatial import distance
import warnings
import pandas as pd
import re
import os
import math
from collections import Counter
from sklearn.preprocessing import PolynomialFeatures
from statsmodels.stats.outliers_influence import summary_table
import statsmodels.api as sm
from scipy.optimize import linear_sum_assignment

warnings.filterwarnings('ignore')

%config InlineBackend.figure_formats = ['svg']
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}

pd.set_option('display.max_columns', None)


In [9]:

# Poverty lines for D.C. in 2025
poverty_lines = {
    1: 15650,
    2: 21150,
    3: 26650,
    4: 32150,
    5: 37650,
    6: 43150,
    7: 48650,
    8: 54150,
}


def simulate_family_size(n, lam):
    """Simulate family sizes using Poisson distribution, with a maximum family size of 8."""
    sizes = np.random.poisson(lam, n)  # Simulate family sizes
    sizes[sizes < 1] = 1  # Ensure that family size is at least 1
    sizes[sizes > 8] = 8  # Ensure that family size is at most 8
    return sizes


def poverty_line(family_size, poverty_lines):
    """Return the poverty line for the given family size using predefined lines for D.C."""
    return poverty_lines.get(family_size, poverty_lines[8])  # Use the max poverty line for sizes > 8


# To compute the Sen Index, we need two components:
#   - The poverty gap index (I)
#   - The Gini coefficient computed only for the poor (G_p)
def gini_coefficient(x):
    """Compute the Gini coefficient of array x."""
    # Mean absolute difference
    diff_sum = np.sum(np.abs(np.subtract.outer(x, x)))
    mean_x = np.mean(x)
    n = len(x)
    return diff_sum / (2 * n**2 * mean_x)


In [10]:

n_households = 1000
headcount_ratios = []
poverty_gap_indices = []
watts_indices = []
sen_indices = []
sst_indices = []
shift_measures = []
gini_vals = []
means_ls = []
medians_ls = []
mins_ls = []
maxs_ls = []


for j in range(1000):
    wealth_mu = 11.3 #np.random.uniform(10, 12)
    wealth_sigma = np.random.uniform(1, 3)

    # Family size distribution
    family_size_lambda = 2.5 #np.random.uniform(1, 2)
    
    wealth = np.random.lognormal(mean=wealth_mu, sigma=wealth_sigma, size=n_households)
    
    family_sizes = simulate_family_size(n_households, family_size_lambda)
    
    poverty_thresholds = np.array([poverty_line(size, poverty_lines) for size in family_sizes])

    # Identify poor households
    is_poor = wealth < poverty_thresholds
    
    # Compute poverty measures
    headcount_ratio = np.mean(is_poor)
    poverty_gaps = np.where(is_poor, (poverty_thresholds - wealth) / poverty_thresholds, 0)
    poverty_gap_index = np.mean(poverty_gaps)
    
    epsilon = 1e-6
    watts_values = np.where(is_poor, np.log(poverty_thresholds / (wealth + epsilon)), 0)
    watts_index = np.mean(watts_values)

    I = poverty_gap_index

    if np.sum(is_poor) > 0:
        poor_wealth = wealth[is_poor]
        G_p = gini_coefficient(poor_wealth)
    else:
        G_p = 0

    sen_index = headcount_ratio * (I + (1 - I) * G_p)

    sen_shorrocks_thon_index = headcount_ratio * (I + (1 - I) * (G_p - headcount_ratio * G_p))

    w = np.log(wealth)
    shift = (np.max(w) - np.mean(w)) / (np.max(w) - 0)
    
    # Store results
    headcount_ratios.append(headcount_ratio)
    poverty_gap_indices.append(poverty_gap_index)
    watts_indices.append(watts_index)
    sen_indices.append(sen_index)
    sst_indices.append(sen_shorrocks_thon_index)
    shift_measures.append(shift)

    # Compute the Gini
    gini = gini_coefficient(wealth)
    gini_vals.append(gini)
    
    means_ls.append(np.mean(wealth))
    medians_ls.append(np.median(wealth))
    mins_ls.append(np.min(wealth))
    maxs_ls.append(np.max(wealth))

    
# Convert lists to arrays for plotting

means_ls = np.array(means_ls)
medians_ls = np.array(medians_ls)
mins_ls = np.array(mins_ls)
maxs_ls = np.array(maxs_ls)


headcount_ratios = np.array(headcount_ratios)
poverty_gap_indices = np.array(poverty_gap_indices)
watts_indices = np.array(watts_indices)
sen_indices = np.array(sen_indices)
sst_indices = np.array(sst_indices)
shift_measures = np.array(shift_measures)
gini_vals = np.array(gini_vals)
shift_measures  = np.array(shift_measures)



In [11]:
x_var = np.mean(mins_ls)
std = np.std(mins_ls)
print('Min income:', x_var, ', ±', std)

x_var = np.mean(means_ls)
std = np.std(means_ls)
print('Mean income:', x_var, ', ±', std)

x_var = np.mean(medians_ls)
std = np.std(medians_ls)
print('Median income:', x_var, ', ±', std)

x_var = np.mean(maxs_ls)
std = np.std(maxs_ls)
print('Median income:', x_var, ', ±', std)


Min income: 527.833402220507 , ± 823.6327918775858
Mean income: 1407012.2452370487 , ± 2597753.3316892614
Median income: 80920.58067480307 , ± 6723.98275268386
Median income: 420445017.22338915 , ± 1907671528.4205089


In [18]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy import stats

X_vals = [shift_measures]
x_labs = [r"$\mathcal{S}$"]

def sigmoid(x, L, x0, k, b):
    """Logistic function for curve fitting."""
    return L / (1 + np.exp(-k * (x - x0))) + b

for i, x_vals in enumerate(X_vals):
    fig = plt.figure(figsize=(6, 6))
    fs = 16
    x_lab = x_labs[i]

    # Shift vs Headcount Ratio
    plt.subplot(2, 2, 1)
    plt.scatter(x_vals, headcount_ratios, s=10, facecolors='0.8', edgecolors='0.2', linewidths=0.4)
    plt.xlabel(x_lab, fontsize=fs)
    plt.ylabel('Headcount Ratio', fontsize=fs-3)

    slope, intercept, r_value, p_value, std_err = stats.linregress(x_vals, headcount_ratios)
    plt.plot(x_vals, slope * x_vals + intercept, color='k', linewidth=1.5)
    plt.text(0.05, 0.95, f"Slope = {slope:.2f}\nr² = {r_value**2:.2f}", 
             transform=plt.gca().transAxes, fontsize=fs-6, color='0.2', va='top')
    plt.text(0.92, 0.08, 'A', transform=plt.gca().transAxes, ha='center', va='center', fontsize=18, fontweight='bold')
    plt.ylim(0.92 * min(headcount_ratios), 1.05 * max(headcount_ratios))

    
    
    
    # Shift vs Poverty Gap Index
    plt.subplot(2, 2, 2)
    plt.scatter(x_vals, poverty_gap_indices, s=10, facecolors='0.8', edgecolors='0.2', linewidths=0.4)
    plt.xlabel(x_lab, fontsize=fs)
    plt.ylabel('Poverty Gap Index', fontsize=fs-3)

    slope, intercept, r_value, p_value, std_err = stats.linregress(x_vals, poverty_gap_indices)
    plt.plot(x_vals, slope * x_vals + intercept, color='k', linewidth=1.5)
    plt.text(0.05, 0.95, f"Slope = {slope:.2f}\nr² = {r_value**2:.2f}", 
             transform=plt.gca().transAxes, fontsize=fs-6, color='0.2', va='top')
    plt.text(0.92, 0.08, 'B', transform=plt.gca().transAxes, ha='center', va='center', fontsize=18, fontweight='bold')
    plt.ylim(0.92 * min(poverty_gap_indices), 1.05 * max(poverty_gap_indices))

    
    
    
    # Shift vs Sen Index
    plt.subplot(2, 2, 3)
    plt.scatter(x_vals, sen_indices, s=10, facecolors='0.8', edgecolors='0.2', linewidths=0.4)
    plt.xlabel(x_lab, fontsize=fs)
    plt.ylabel('Sen Index', fontsize=fs-3)

    slope, intercept, r_value, p_value, std_err = stats.linregress(x_vals, sen_indices)
    plt.plot(x_vals, slope * x_vals + intercept, color='k', linewidth=1.5)
    plt.text(0.05, 0.95, f"Slope = {slope:.2f}\nr² = {r_value**2:.2f}", 
             transform=plt.gca().transAxes, fontsize=fs-6, color='0.2', va='top')
    plt.text(0.92, 0.08, 'C', transform=plt.gca().transAxes, ha='center', va='center', fontsize=18, fontweight='bold')
    plt.ylim(0.92 * min(sen_indices), 1.05 * max(sen_indices))

    
    
    
    # Shift vs Gini
    plt.subplot(2, 2, 4)
    g_vals = gini_vals
    plt.scatter(x_vals, g_vals, s=10, facecolors='0.8', edgecolors='0.2', linewidths=0.4)
    plt.xlabel(x_lab, fontsize=fs)
    plt.ylabel('Gini', fontsize=fs-3)
    
    # Fit sigmoid
    p0 = [max(g_vals), np.median(x_vals), 1, min(g_vals)]
    popt, _ = curve_fit(sigmoid, x_vals, g_vals, p0, method='dogbox')

    # Generate smooth curve for plotting
    x_fit = np.linspace(min(x_vals), max(x_vals), 500)
    y_fit = sigmoid(x_fit, *popt)

    # Compute R² with constrained regression (slope=1, intercept=0)
    y_pred = sigmoid(np.array(x_vals), *popt)
    ss_res = np.sum((g_vals - y_pred) ** 2)
    ss_tot = np.sum((g_vals - np.mean(g_vals)) ** 2)
    r_squared = 1 - (ss_res / ss_tot)

    # Plot the sigmoid fit
    plt.plot(x_fit, y_fit, color='k', linewidth=1.5)

    # Display parameters and R² on the plot
    plt.text(0.05, 0.95, f"L = {popt[0]:.2f}\nk = {popt[2]:.2f}\n$R^2$ = {r_squared:.2f}", 
             transform=plt.gca().transAxes, fontsize=fs-6, color='0.2', va='top')
    plt.text(0.92, 0.08, 'D', transform=plt.gca().transAxes, ha='center', va='center', fontsize=18, fontweight='bold')
    plt.ylim(0.92 * min(g_vals), 1.05 * max(g_vals))

    # Final figure adjustments
    fig.patch.set_facecolor('white')
    plt.subplots_adjust(wspace=0.48, hspace=0.45)

    # Save the figures
    plt.savefig('Final_Figs/manuscript/Fig5_Poverty.jpg', bbox_inches='tight', format='jpg', dpi=600)
    plt.savefig('Final_Figs/manuscript/Fig5_Poverty.pdf', bbox_inches='tight', format='pdf', dpi=600)
    plt.close()
