# Sizing A U.S. Social Wealth Fund

In [76]:
# Imports

from IPython.display import display, Markdown

import humanize as h
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys

module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

from swf.datapackage import load


In [91]:
POP_WGT_FACTOR = 1 + 1/6

def display_m(value):
    display(Markdown(value))

def h_format(value, format='%.1f'):
    if len(str(abs(int(value)))) < 7:
        return h.intcomma(value)
    else:
        return h.intword(value, format)

def get_poverty_levels():
    df = pd.DataFrame({
        'household_size': [1, 2, 3, 4, 5, 6, 7, 8],
        'income_poverty_level': [11880, 16020, 20160, 24300, 28440, 32580, 36730, 40890]
    })
    df.set_index('household_size', inplace=True)
    
    return df

def get_distribution(df, column, quantiles=100):
    real_column = 'real_{}'.format(column)
    real_column_pct = 'real_{}_pct'.format(column)
    
    pdf = df[[column, 'wgt']].copy()
    pdf[real_column] = pdf[column] * pdf['wgt']
    pdf[real_column_pct] = (pdf[real_column] / pdf[real_column].sum() * 100)
    pdf.sort_values(column, inplace=True)

    pdf['population_pct'] = (pdf.wgt.cumsum() / pdf.wgt.sum() * 100)
    pdf['quantile'] = pd.cut(pdf['population_pct'].values, quantiles, labels=False) + 1
    pdf = pdf.groupby('quantile')[[real_column_pct]].sum()
    
    return pdf
    

In [5]:
# Load the survey data
descriptor = 'http://datahub.io/quidproquo/2016-survey-of-consumer-finances-summary-extract/datapackage.json'
df = load(descriptor)
df.set_index('Y1', inplace=True)
df.drop([df.columns[0]], axis=1, inplace=True)
print(len(df))
display(df.head())


31240


Unnamed: 0_level_0,YY1,wgt,hhsex,age,agecl,educ,edcl,married,kids,lf,...,LLOAN11,LLOAN12,nwcat,inccat,assetcat,ninccat,NINC2CAT,nwpctlecat,incpctlecat,nincpctlecat
Y1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11,1,6427.136676,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
12,1,6428.350592,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
13,1,6414.477294,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
14,1,6428.487972,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1
15,1,6425.256822,2,71,5,10,3,2,0,0,...,0,0,3,1,3,1,1,7,1,1


## U.S. Poverty Statistics

Our first step is to determine the number of households living below the poverty line. To calculate this, we will use the income poverty thresholds based on the [2016 HHS Poverty Guidlines](https://aspe.hhs.gov/computations-2016-poverty-guidelines/) (full HHS history can be found by [clicking here](https://aspe.hhs.gov/prior-hhs-poverty-guidelines-and-federal-register-references)) and are as follows:

In [30]:
pldf = get_poverty_levels()
display(pldf)

Unnamed: 0_level_0,income_poverty_level
household_size,Unnamed: 1_level_1
1,11880
2,16020
3,20160
4,24300
5,28440
6,32580
7,36730
8,40890


Since the survey data doesn't contain a column for household size we need to calculate it from existing columns. In addition, the survey data contains a `wgt` column that is used to estimate the true size of the population of households.

In [101]:
df['household_size'] = 1  # Default each family to one individual
df.loc[df.married == 2, 'household_size'] += 1  # Add additional individual for married persons
df['household_adults'] = df.household_size
df.loc[df.age < 55, 'household_size'] += df.kids  # Only add childern where head of household is less than 55 yrs
df['household_size_full'] = df.household_size
df.loc[df.age >= 55, 'household_size_full'] += df.kids # Add back children for full household count

us_pop = 323127513  # 2016 U.S. population (U.S. Census)
us_hh = 125820000  # 2016 number of U.S. households (U.S. Census)

us_pop_est = int((df.household_size_full * df.wgt * POP_WGT_FACTOR).sum())
us_hh_est = int(df.wgt.sum())

display_m('U.S. households (survey): **%s**' % h_format(us_hh_est))
display_m('U.S. households (census): **%s**' % h_format(us_hh))
display_m('U.S. population (survey): **%s**' % h_format(us_pop_est))
display_m('U.S. population (census): **%s**' % h_format(us_pop))

U.S. households (survey): **126.0 million**

U.S. households (census): **125.8 million**

U.S. population (survey): **322.7 million**

U.S. population (census): **323.1 million**

As we can see from above, the survey and the census counts are different by a small margin of error, which shows we are on the right track.

Now that we have the poverty levels and the household sizes, we can join the dataframes and find all the households that are below the poverty line.

In [103]:
pvdf = df[['wgt', 'income', 'age', 'household_size', 'household_adults']].copy()
pvdf['household_size'] = pvdf.household_size.clip(1, 8)
pvdf.reset_index(inplace=True)
pvdf.set_index('household_size', inplace=True)
pvdf = pvdf.join(pldf, how='inner')
pvdf = pvdf[pvdf.income < pvdf.income_poverty_level]
pvdf.reset_index(inplace=True)
pvdf.set_index('Y1', inplace=True)
display(pvdf.head())

pov_hh_est = int(pvdf.wgt.sum())
pov_hh_pct = pov_hh_est/us_hh_est * 100

pov_pop_est = int((pvdf.household_size * pvdf.wgt * POP_WGT_FACTOR).sum())
pov_pop_pct = pov_pop_est/us_pop_est * 100

ubd_tot = ((pvdf.income_poverty_level - pvdf.income) * pvdf.wgt).sum()
ubd_hh = int(ubd_tot / pov_hh_est)

display_m('U.S. households below the poverty line: **{} ({:.2f}%)**'.format(h_format(pov_hh_est), pov_hh_pct))
display_m('U.S. population below the poverty line: **{} ({:.2f}%)**'.format(h_format(pov_pop_est), pov_pop_pct))


Unnamed: 0_level_0,household_size,wgt,income,age,household_adults,income_poverty_level
Y1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
923,1.0,453.705373,0.0,76,1,11880
924,1.0,452.131876,0.0,76,1,11880
925,1.0,459.123915,0.0,76,1,11880
1411,1.0,231.8069,0.0,55,1,11880
1412,1.0,237.383646,0.0,55,1,11880


U.S. households below the poverty line: **15.9 million (12.61%)**

U.S. population below the poverty line: **48.3 million (14.98%)**

## Eliminating Poverty in The U.S.

Now that we have obtained the set of households below the poverty line, we can calculate the total amount of supplemental income we would need to add to those households in order to bring them above the poverty line in a given year.

In [117]:
pvdf['supplemental_income'] = pvdf.income_poverty_level - pvdf.income
inc_tot = (pvdf.supplemental_income * pvdf.wgt).sum()
inc_phh = int(inc_tot / pov_hh_est)
inc_pha = int(((pvdf.supplemental_income / pvdf.household_adults) * pvdf.wgt).sum() / pvdf.wgt.sum())

display_m('Total annual supplemental income: **$%s**' % h_format(inc_tot))
display_m('Average annual supplemental income per household: **$%s**' % h_format(inc_phh))
display_m('Average annual supplemental income per adult: **$%s**' % h_format(inc_pha))

Total annual supplemental income: **$106.2 billion**

Average annual supplemental income per household: **$6,689**

Average annual supplemental income per adult: **$3,755**

Now that we know the scope of the problem, we can calculate the optimal size of the a U.S. social wealth fund if our policy goal were to eliminate poverty. Here we will assume that the fund has a rate of return of **5%**.

In [124]:
r_rate = 0.05  # Rate of return for the fund
fund_size = inc_tot / r_rate

display_m('Fund size: **$%s**' % h_format(fund_size))

Fund size: **$2.1 trillion**

## Implementing a Universal Basic Dividend

Up to this point we've just considered the optimal size of a U.S. social wealth fund with the primary goal of eliminating poverty. Next, we want to consider the size of such a fund if we were to give every working age adult in the U.S. a universal basic dividend (UBD) that is on par with the annual supplemental income we calculated earlier. According to the [2016 U.S. Census](https://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=PEP_2016_PEPAGESEX&prodType=table), there were **249,485,228** people aged 18 and older.

In [125]:
pop_adult = 249485228
tot_ubd = inc_pha * pop_adult
fund_size_ubd = tot_ubd / r_rate

display_m('Total annual UBD: **$%s**' % h_format(tot_ubd))
display_m('Fund size: **$%s**' % h_format(fund_size_ubd))

Total annual UBD: **$936.8 billion**

Fund size: **$18.7 trillion**

## International Comparisons

We see, from the above calculation, that in order to support our policy goal of paying out a UBD that has the potential to end poverty in the U.S., we would need a social wealth fund that is absolutely enormous. While the size that the fund would need to be may seem hopelessly out of reach, we should really take into account the massive scale of the U.S. economy. To do this, we will take Norway and its soverign wealth fund and attempt to make an apples-to-apples comparison with our hypothetical U.S. social wealth fund.

First let's get some stats on Norway and its [wealth fund](https://en.wikipedia.org/wiki/List_of_countries_by_sovereign_wealth_funds):

In [135]:
no_pop = 5.233e6
no_gdp = 370.6e9
no_fund_size = 1.063e12

no_pc_gdp = int(no_gdp / no_pop)
no_pc_fund_share = int(no_fund_size / no_pop)

display_m('Norway population: **%s**' % h_format(no_pop))
display_m('Norway GDP: **$%s**' % h_format(no_gdp))
display_m('Norway fund size: **$%s**' % h_format(no_fund_size))
display_m('Norway per-capita GDP: **$%s**' % h_format(no_pc_gdp))
display_m('Norway per-capita fund share: **$%s**' % h_format(no_pc_fund_share))

Norway population: **5.2 million**

Norway GDP: **$370.6 billion**

Norway fund size: **$1.1 trillion**

Norway per-capita GDP: **$70,819**

Norway per-capita fund share: **$203,133**

And some stats on the U.S.:

In [136]:
us_gdp = 18.57e12

us_pc_gdp = int(us_gdp / us_pop)

display_m('U.S. population: **%s**' % h_format(us_pop))
display_m('U.S. GDP: **$%s**' % h_format(us_gdp))
display_m('U.S. per-capita GDP: **$%s**' % h_format(us_pc_gdp))

U.S. population: **323.1 million**

U.S. GDP: **$18.6 trillion**

U.S. per-capita GDP: **$57,469**

### Fund Size vs. GDP

Let's assume for simplicity's sake we can say that the size of a country's social wealth fund is related to its GDP. Using Norway, we get:

In [138]:
fund_gdp_ratio = no_fund_size / no_gdp

display_m('Fund-to-GDP ratio: **%s**' % fund_gdp_ratio)

Fund-to-GDP ratio: **2.8683216405828387**

For the U.S., this would give us a fund size of:

In [146]:
us_fund_size = fund_gdp_ratio * us_gdp

us_pc_fund_share = int(us_fund_size / us_pop)
us_tot_ubd = r_rate * us_fund_size
us_pc_ubd = int(us_tot_ubd / pop_adult)

display_m('U.S. fund size: **$%s**' % h_format(us_fund_size))
display_m('U.S. per-capita fund share: **$%s**' % h_format(us_pc_fund_share))
display_m('U.S. total annual UBD: **$%s**' % h_format(us_tot_ubd))
display_m('U.S. annual per-adult UBD: **$%s**' % h_format(us_pc_ubd))

U.S. fund size: **$53.3 trillion**

U.S. per-capita fund share: **$164,841**

U.S. total annual UBD: **$2.7 trillion**

U.S. annual per-adult UBD: **$10,674**

### Fund Size vs. Net National Wealth

This number is quite large, but one has to take into account that the total household wealth in the U.S. is also quite large. Let's now compare the size of Norway's fund with the [amount of private wealth held by Norway's citizens](https://www.ssb.no/en/inntekt-og-forbruk/statistikker/ifhus/aar/2016-12-16).

In [161]:
no_hh_net_wealth = 694.37e9
no_tot_net_wealth = no_hh_net_wealth + no_fund_size
no_fund_wealth_percent = no_fund_size / no_tot_net_wealth * 100

display_m('Norway houshold net wealth: **$%s**' % h_format(no_hh_net_wealth))
display_m('Norway total net wealth: **$%s**' % h_format(no_tot_net_wealth))
display_m('Norway fund-to-wealth percent: **{:.2f}%**'.format(no_fund_wealth_percent))

Norway houshold net wealth: **$694.4 billion**

Norway total net wealth: **$1.8 trillion**

Norway fund-to-wealth percent: **60.49%**

As we can see from the above calculation, Norway holds the majority of its nations wealth in its wealth fund. Let's compare this with a hypothetical U.S. social wealth fund with the same fund-to-wealth ratio as Norway. Since the U.S. does not have a similar fund, we will just use total household net worth as a proxy for total net wealth.

In [163]:
us_tot_net_wealth = (df.wgt * df.networth).sum()
us_fund_size_wp = (no_fund_wealth_percent / 100) * us_tot_net_wealth

display_m('U.S. total net wealth: **$%s**' % h_format(us_tot_net_wealth))
display_m('U.S. fund size: **$%s**' % h_format(us_fund_size_wp))

U.S. total net wealth: **$86.9 trillion**

U.S. fund size: **$52.5 trillion**

It appears that using either method to determine fund size, whether using a multiple of GDP or a percentage of total net wealth, we come up with roughly the same values.