# Global Lorenz Curve Fitting Example

This notebook demonstrates how to fit Lorenz curves at country level and aggregate to global distributions.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from global_lorenz import (
    fit_country_lorenz_curves,
    fit_global_lorenz,
    lorenz_1param,
    lorenz_2param,
    lorenz_3param,
)
from global_lorenz.country_fitting import read_country_data

## 1. Load Country Data

Load country-level income distribution data from Excel file.

In [None]:
# Load data
data_df = read_country_data('data/realistic_country_data.xlsx')

# Display first few rows
print(f"Loaded {len(data_df)} countries")
data_df.head()

## 2. Fit Country-Level Lorenz Curves

Fit Lorenz curves to each country's income distribution.

In [None]:
# Define income columns (deciles)
income_cols = [f'D{i}' for i in range(1, 11)]

# Fit 2-parameter Lorenz curves
country_results = fit_country_lorenz_curves(
    data_df, 
    income_cols, 
    n_params=2
)

print(f"Successfully fitted {len(country_results)} countries")
print(f"Mean Gini: {country_results['gini'].mean():.3f}")
print(f"Median Gini: {country_results['gini'].median():.3f}")

country_results.head()

## 3. Visualize Country Lorenz Curves

Plot some example country Lorenz curves.

In [None]:
# Select a few countries to plot
countries_to_plot = ['United States', 'China', 'Brazil', 'Germany']

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, country in enumerate(countries_to_plot):
    ax = axes[i]
    
    # Get country data
    row = country_results[country_results['country'] == country].iloc[0]
    params = (row['param_1'], row['param_2'])
    
    # Plot fitted curve
    p = np.linspace(0, 1, 100)
    L = lorenz_2param(p, *params)
    
    ax.plot(p, L, 'b-', label='Fitted Lorenz curve', linewidth=2)
    ax.plot([0, 1], [0, 1], 'k--', label='Perfect equality', alpha=0.5)
    
    ax.set_xlabel('Cumulative population fraction')
    ax.set_ylabel('Cumulative income fraction')
    ax.set_title(f"{country}\\nGini = {row['gini']:.3f}")
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)

plt.tight_layout()
plt.show()

## 4. Fit Global Lorenz Curve

Aggregate country-level distributions to create a global Lorenz curve.

In [None]:
# Fit global Lorenz curve
global_params, global_lorenz_func, global_gini, global_data = fit_global_lorenz(
    country_results,
    n_params_country=2,
    n_params_global=2
)

print(f"\nGlobal Gini coefficient: {global_gini:.4f}")
print(f"Global Lorenz curve parameters: {global_params}")

## 5. Visualize Global Lorenz Curve

In [None]:
from global_lorenz.global_aggregation import global_distribution_to_lorenz

# Convert to Lorenz curve format
p, L = global_distribution_to_lorenz(global_data)

# Plot
fig, ax = plt.subplots(figsize=(10, 8))

# Plot data points (sampled)
ax.scatter(p[::10], L[::10], c='red', s=30, alpha=0.5, label='Aggregated data', zorder=3)

# Plot fitted curve
p_smooth = np.linspace(0, 1, 200)
L_smooth = lorenz_2param(p_smooth, *global_params)
ax.plot(p_smooth, L_smooth, 'b-', label='Fitted global Lorenz curve', linewidth=2)

# Perfect equality line
ax.plot([0, 1], [0, 1], 'k--', label='Perfect equality', alpha=0.5)

ax.set_xlabel('Cumulative population fraction', fontsize=12)
ax.set_ylabel('Cumulative income fraction', fontsize=12)
ax.set_title(f'Global Lorenz Curve (Gini = {global_gini:.3f})', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)

plt.tight_layout()
plt.show()

## 6. Compare Different Lorenz Curve Forms

Test all three parameter forms and compare results.

In [None]:
# Fit with different numbers of parameters
results = {}

for n_params in [1, 2, 3]:
    # Fit country curves
    country_res = fit_country_lorenz_curves(data_df, income_cols, n_params=n_params)
    
    # Fit global curve
    global_p, global_func, global_g, global_d = fit_global_lorenz(
        country_res,
        n_params_country=n_params,
        n_params_global=n_params
    )
    
    results[n_params] = {
        'country_results': country_res,
        'global_params': global_p,
        'global_gini': global_g,
        'mean_country_gini': country_res['gini'].mean(),
    }
    
    print(f"\n{n_params}-parameter form:")
    print(f"  Mean country Gini: {results[n_params]['mean_country_gini']:.4f}")
    print(f"  Global Gini: {results[n_params]['global_gini']:.4f}")

## 7. Income Distribution Analysis

Analyze the global income distribution at specific thresholds.

In [None]:
# Plot income distribution
fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(global_data['income_threshold'], global_data['population_fraction'], 'b-', linewidth=2)
ax.set_xlabel('Income Threshold (PPP $)', fontsize=12)
ax.set_ylabel('Cumulative Population Fraction', fontsize=12)
ax.set_title('Global Income Distribution', fontsize=14, fontweight='bold')
ax.set_xscale('log')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Summary

This notebook demonstrated:
1. Loading country-level income distribution data
2. Fitting Lorenz curves at the country level
3. Visualizing country-level inequality
4. Aggregating to global income distribution
5. Fitting global Lorenz curves
6. Comparing different functional forms

The package supports 1, 2, and 3-parameter Lorenz curve forms for flexible modeling of income inequality at both country and global levels.