# Minimum Universal Basic Dividend

In [69]:
# Imports

from IPython.display import display, HTML

import humanize as h
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from swf.datapackage import load


In [70]:
def h_format(value, format='%.1f'):
    if len(str(abs(int(value)))) < 7:
        return h.intcomma(value)
    else:
        return h.intword(value, format)

def get_poverty_levels():
    df = pd.DataFrame({
        'household_size': [1, 2, 3, 4, 5, 6, 7, 8],
        'income_poverty_level': [12060, 16240, 20420, 24600, 28780, 32960, 37140, 41320]
    })
    
    return df

def get_distribution(df, column, quantiles=100):
    real_column = 'real_{}'.format(column)
    real_column_pct = 'real_{}_pct'.format(column)
    
    pdf = df[[column, 'wgt']].copy()
    pdf[real_column] = pdf[column] * pdf['wgt']
    pdf[real_column_pct] = (pdf[real_column] / pdf[real_column].sum() * 100)
    pdf.sort_values(column, inplace=True)

    pdf['population_pct'] = (pdf.wgt.cumsum() / pdf.wgt.sum() * 100)
    pdf['quantile'] = pd.cut(pdf['population_pct'].values, quantiles, labels=False) + 1
    pdf = pdf.groupby('quantile')[[real_column_pct]].sum()
    
    return pdf
    

In [71]:
# Load the survey data
descriptor = 'http://datahub.io/quidproquo/2016-survey-of-consumer-finances-summary-extract/datapackage.json'
df = load(descriptor)
df.set_index('Y1', inplace=True)
df.drop([df.columns[0]], axis=1, inplace=True)
print(len(df))
display(df.head())


31240


Unnamed: 0_level_0,YY1,wgt,hhsex,age,agecl,educ,edcl,married,kids,lf,...,LLOAN11,LLOAN12,nwcat,inccat,assetcat,ninccat,NINC2CAT,nwpctlecat,incpctlecat,nincpctlecat
Y1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11,1,6427.136676,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
12,1,6428.350592,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
13,1,6414.477294,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
14,1,6428.487972,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
15,1,6425.256822,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1


## Population Poverty Levels

Our first step is to figure out the amount of income that needs to be added to households living below the poverty line in order to bring those households above the povery line. The poverty line is based on the [Federal Poverty Levels](https://www.healthcare.gov/glossary/federal-poverty-level-FPL/) (FPLs) and are as follows:

{{pldf = get_poverty_levels()}}
{{display(HTML(pldf.to_html(index=False)))}}

Since the survey data doesn't contain a column for household size we need to calculate it from existing columns:


In [72]:
df['household_size'] = 1  # Default each family to one individual
df.loc[df['married'] == 1, 'household_size'] += 1  # Add additional individual for married persons
df.loc[df['age'] < 55, 'household_size'] += df.kids  # Only add children for households whose head < 55 yrs of age

us_pop = 323127513
us_hh = 125820000

us_pop_est = int((df['wgt'] * df['household_size']).sum())
us_hh_est = int(df.wgt.sum())

us_pop_est_delta = us_pop - us_pop_est
us_hh_est_delta = us_hh - us_hh_est

The survey data contains weightings that extrapolate the true size of the population of households to be **{{h_format(us_hh_est)}}**, while the census estimates the number to be **{{h_format(us_hh)}}**; this is off by **{{h_format(us_hh_est_delta)}}**. Using the newly calculated household sizes, we can now test to see if we are in the neighborhood of the correct estimate for the total U.S. population of **{{h_format(us_pop)}}**. Our calculation returns **{{h_format(us_pop_est)}}**, which is off by **{{h_format(us_pop_est_delta)}}**.

Now that we have the poverty levels and the household sizes, we can find all the households that are below the poverty line and calculate the total amount of income we would need to add to those households to bring them above the poverty line.

In [73]:
pvdf = df[['wgt', 'income', 'age', 'household_size']].copy()
pvdf['household_size'] = pvdf.household_size.clip(1, 8)
pvdf = pvdf.join(pldf, on='household_size', how='inner', rsuffix='_pl')
pvdf = pvdf[pvdf.income < pvdf.income_poverty_level]
pvdf.drop(['household_size_pl'], axis=1, inplace=True)

pov_hh_est = pvdf.wgt.sum()
pov_hh_pct = pov_hh_est/us_hh_est * 100
ubd_tot = ((pvdf.income_poverty_level - pvdf.income) * pvdf.wgt).sum()
ubd_hh = int(ubd_tot / pov_hh_est)


The estimate of the number of households below the poverty line is **{{h_format(pov_hh_est)}}**, or about **{{'{:.2f}'.format(pov_hh_pct)}}%** of all U.S. households. To bring those households out of poverty, the total amount of income that would have to be added is **\${{h_format(ubd_tot)}}**, or on average **\${{h_format(ubd_hh)}}** per household below the poverty line on an annual basis.


In [74]:
r_rate = 0.05  # Rate of return for the fund
fund_size = ubd_tot / r_rate

Now that we know the scope of the problem, we can calculate the optimal size of the Social Wealth Fund if our policy goal were to eliminate poverty in the U.S. Assuming a yield of **{{r_rate*100}}%** the size of the fund would need to be **\${{h_format(fund_size)}}**.