In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
sns.set()
import csv
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.core.display import HTML 
import plotly.graph_objects as go

This analysis seeks to recreate a couple of charts/graphs from this publication done by the [Brookings Institute](https://www.brookings.edu/research/u-s-concentrated-poverty-in-the-wake-of-the-great-recession/)

### Table 1: Total Population and Poor Population in Extreme Poverty Tracts 

In [2]:
Image(url = 'https://www.brookings.edu/wp-content/uploads/2016/03/table-1a.jpg?w=2664&crop=0%2C0px%2C100%2C762px',width=750, height=750)

### Figure 1: Concentrated Poverty by Race and Ethnicity 

In [3]:
Image(url="https://www.brookings.edu/wp-content/uploads/2016/03/figure-1-1.jpg?w=1944&crop=0%2C0px%2C100%2C1685px",width=500, height=750)

Read in CSVs from Census Data Downloader

In [4]:
cp_2014 = pd.read_csv('usa_poverty_2014.csv')
cp_2017 = pd.read_csv('usa_poverty_2017.csv')
pd.set_option('display.max_columns', 999)


Clean up axis from first load in

In [5]:
cp_2014 = cp_2014.drop('Unnamed: 0',axis=1)
cp_2017 = cp_2017.drop('Unnamed: 0',axis=1)

In [7]:
cp_2014.columns

Index(['NAME_2014', 'tot_pop_2014', 'pov_total_est_2014',
       'pov_total_est_moe_2014', 'pov_total_est_inc_12months_2014',
       'pov_total_est_inc_12months_moe_2014', 'pov_total_est_white_2014',
       'pov_total_est_white_moe_2014', 'pov_total_est_inc_12months_white_2014',
       'pov_total_est_inc_12months_white_moe_2014', 'pov_total_est_black_2014',
       'pov_total_est_black_moe_2014', 'pov_total_est_inc_12months_black_2014',
       'pov_total_est_inc_12months_black_moe_2014', 'pov_total_est_anai_2014',
       'pov_total_est_anai_moe_2014', 'pov_total_est_inc_12months_anai_2014',
       'pov_total_est_inc_12months_anai_moe_2014', 'pov_total_est_asian_2014',
       'pov_total_est_asian_moe_2014', 'pov_total_est_inc_12months_asian_2014',
       'pov_total_est_inc_12months_asian_moe_2014', 'pov_total_est_nhopi_2014',
       'pov_total_est_nhopi_moe_2014', 'pov_total_est_inc_12months_nhopi_2014',
       'pov_total_est_inc_12months_nhopi_moe_2014', 'pov_total_est_sora_2014',
     

In [8]:
len(cp_2014)

72877

### Merge the Dataframes for analysis 

In [11]:
df = pd.merge(cp_2014,cp_2017, how='outer', on=['11_digit_code','11_digit_code'])

In [12]:
df['merge_worked'] = (df['NAME_2014']==df['NAME_2017'])
print(df['merge_worked']) == False

0         True
1         True
2         True
3         True
4         True
5         True
6         True
7         True
8         True
9         True
10        True
11        True
12        True
13        True
14        True
15        True
16        True
17        True
18        True
19        True
20        True
21        True
22        True
23        True
24        True
25        True
26        True
27        True
28        True
29        True
         ...  
72851     True
72852     True
72853     True
72854     True
72855     True
72856     True
72857     True
72858     True
72859     True
72860     True
72861     True
72862     True
72863     True
72864     True
72865     True
72866     True
72867     True
72868     True
72869     True
72870     True
72871     True
72872     True
72873     True
72874     True
72875     True
72876     True
72877    False
72878    False
72879    False
72880    False
Name: merge_worked, Length: 72881, dtype: bool


False

### Let's build a table!!!

Use this as documentation:https://plot.ly/python/table/


In [11]:
#sum_dict = df.sum(axis=0, skipna=True)

In [12]:
#sum_dict.to_dict()

In [13]:
#sum_dict

In [15]:
con_pov_20thresh_table = go.Figure(data=[go.Table(
    header=dict(values=['High Poverty Tracts', '2010-2014', '2013-2017','Change from 2010-14 to 2013-17'],
                align='left'),
    cells=dict(values=[['Total Population', 'Poor Population', 'Tracts', ' ', 'Share Total Population','Share Poor Population'], #1st column
                       ["84,659,519", "26,408,172", "22,100",[] , "8.6%", "55.4%"], # 2nd column
                       ["77,132,252", "23,470,285", "20,147",[] , "7.5%", "51.5%"],# 3rd column
                       ["-8.9%", "-11.1%", "-8.8%", [], "-12.8%", "-7.0%"]], # 4th Column
               align='left'))
])

con_pov_20thresh_table.show()


con_pov_40thresh_table = go.Figure(data=[go.Table(
    header=dict(values=['Extreme Poverty Census Tracts', '2010-2014', '2013-2017','Change from 2010-14 to 2013-17'],
                align='left'),
    cells=dict(values=[['Total Population', 'Poor Population', 'Tracts', ' ', 'Share Total Population','Share Poor Population'], #1st column
                       ["14,411,746", "7,014,112", "4,558",[] , "4.7%", "14.7%"], # 2nd column
                       ["11,174,761", "5,422,855", "3,629",[] , "3.6%", "11.9%"],
                       ["-22.5%", "-22.7%", "-20.4%", [], "-24.1%", "-19.0%"]], # 3rd column
               align='left'))
])

con_pov_40thresh_table.show()

In [60]:
df.to_csv('analyze_this.csv')

In [None]:
tableheaders_dict = {'Extreme Poverty Tracts', '2010-2014', '2013-2017'}

In [None]:
fig = go.Figure(data=[go.Table(

Build out variables for analysis: 
* first is find the percentage of those in poverty per census tract 
* next is to find the relative margin of error so that we see the magnitude of error in our analysis 

Add leading zeros to 11-digit FIPS that were lost in the CSV read-in

In [24]:
#cp_2014['11_digit_code'].astype(str)

0         1043964500
1         1043964600
2         1043964700
3         1043964800
4         1043964900
5         1043965000
6         1043965100
7         1043965200
8         1043965300
9         1043965401
10        1043965402
11        1043965500
12        1043965600
13        1043965700
14        1045020000
15        1045020100
16        1045020200
17        1045020300
18        1045020400
19        1045020500
20        1045020700
21        1045020801
22        1045020802
23        1045021101
24        1045021102
25        1045021200
26        1045021300
27        1045021400
28        1047956100
29        1047956201
            ...     
72847    56033000300
72848    56033000400
72849    56033000500
72850    56033000600
72851    56035000101
72852    56035000102
72853    56037970500
72854    56037970601
72855    56037970602
72856    56037970700
72857    56037970800
72858    56037970901
72859    56037970902
72860    56037970903
72861    56037971000
72862    56037971100
72863    5603

In [44]:
# fips_code = cp_2014['11_digit_code'].astype(str)

# fips_code = fips_code.str.zfill(11)

# fips_code
# #cp_2014['11_digit_code'] = fips_code

0        0        0         1043964500\n1         10439...
1        0        0         1043964500\n1         10439...
2        0        0         1043964500\n1         10439...
3        0        0         1043964500\n1         10439...
4        0        0         1043964500\n1         10439...
5        0        0         1043964500\n1         10439...
6        0        0         1043964500\n1         10439...
7        0        0         1043964500\n1         10439...
8        0        0         1043964500\n1         10439...
9        0        0         1043964500\n1         10439...
10       0        0         1043964500\n1         10439...
11       0        0         1043964500\n1         10439...
12       0        0         1043964500\n1         10439...
13       0        0         1043964500\n1         10439...
14       0        0         1043964500\n1         10439...
15       0        0         1043964500\n1         10439...
16       0        0         1043964500\n1         10439.

# 2014

In [38]:
df['pct_in_poverty_2014'] = (df['pov_total_est_inc_12months_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_white_alone_2014'] = (df['pov_total_est_inc_12months_white_nothisplat_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_black_alone_2014'] = (df['pov_total_est_black_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_anai_alone_2014'] = (df['pov_total_est_anai_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_asian_alone_2014'] = (df['pov_total_est_asian_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_nhopi_alone_2014'] = (df['pov_total_est_nhopi_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_sora_alone_2014'] = (df['pov_total_est_sora_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_twomore_alone_2014'] =(df['pov_total_est_twomore_2014'] / df['pov_total_est_2014'])

df['pct_in_pov_hisplat_alone_2014'] = (df['pov_total_est_hisplat_2014'] / df['pov_total_est_2014'])


In [40]:
df['pct_in_poverty_2017'] = (df['pov_total_est_inc_12months_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_white_alone_2017'] = (df['pov_total_est_inc_12months_white_nothisplat_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_black_alone_2017'] = (df['pov_total_est_black_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_anai_alone_2017'] = (df['pov_total_est_anai_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_asian_alone_2017'] = (df['pov_total_est_asian_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_nhopi_alone_2017'] = (df['pov_total_est_nhopi_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_sora_alone_2017'] = (df['pov_total_est_sora_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_twomore_alone_2017'] =(df['pov_total_est_twomore_2017'] / df['pov_total_est_2017'])

df['pct_in_pov_hisplat_alone_2017'] = (df['pov_total_est_hisplat_2017'] / df['pov_total_est_2017'])


In [49]:
df

Unnamed: 0,NAME_2014,NAME_2017,county_2014,county_2017,pov_total_est_2014,pov_total_est_2017,pov_total_est_anai_2014,pov_total_est_anai_2017,pov_total_est_anai_moe_2014,pov_total_est_anai_moe_2017,...,pct_in_pov_hisplat_alone_2014,pct_in_poverty_2017,pct_in_pov_white_alone_2017,pct_in_pov_black_alone_2017,pct_in_pov_anai_alone_2017,pct_in_pov_asian_alone_2017,pct_in_pov_nhopi_alone_2017,pct_in_pov_sora_alone_2017,pct_in_pov_twomore_alone_2017,pct_in_pov_hisplat_alone_2017
0,"Census Tract 9645, Cullman County, Alabama",,43.0,,4450.0,,83.0,,79.0,,...,0.002472,,,,,,,,,
1,"Census Tract 9646, Cullman County, Alabama",,43.0,,4293.0,,8.0,,12.0,,...,0.039366,,,,,,,,,
2,"Census Tract 9647, Cullman County, Alabama",,43.0,,4755.0,,10.0,,16.0,,...,0.068559,,,,,,,,,
3,"Census Tract 9648, Cullman County, Alabama",,43.0,,4472.0,,34.0,,55.0,,...,0.107111,,,,,,,,,
4,"Census Tract 9649, Cullman County, Alabama",,43.0,,6094.0,,103.0,,151.0,,...,0.069741,,,,,,,,,
5,"Census Tract 9650, Cullman County, Alabama",,43.0,,6057.0,,26.0,,38.0,,...,0.070167,,,,,,,,,
6,"Census Tract 9651, Cullman County, Alabama",,43.0,,3786.0,,8.0,,12.0,,...,0.080824,,,,,,,,,
7,"Census Tract 9652, Cullman County, Alabama",,43.0,,2483.0,,0.0,,11.0,,...,0.008055,,,,,,,,,
8,"Census Tract 9653, Cullman County, Alabama",,43.0,,4999.0,,41.0,,40.0,,...,0.039808,,,,,,,,,
9,"Census Tract 9654.01, Cullman County, Alabama",,43.0,,2326.0,,0.0,,11.0,,...,0.010318,,,,,,,,,


In [43]:
df_analysis = df[['NAME_2014','NAME_2017','county_2014', 'county_2017','state_2014','state_2017','tot_pop_2014',
                 'tot_pop_2017','tract_2014','tract_2017','pct_in_poverty_2014','pct_in_poverty_rmoe_2014',
                 'pct_in_pov_white_alone_2014','pct_in_pov_black_alone_2014','pct_in_pov_anai_alone_2014',
                 'pct_in_pov_asian_alone_2014','pct_in_pov_nhopi_alone_2014','pct_in_pov_sora_alone_2014',
                 'pct_in_pov_twomore_alone_2014','pct_in_pov_hisplat_alone_2014','pct_in_poverty_2017',
                 'pct_in_pov_white_alone_2017','pct_in_pov_black_alone_2017','pct_in_pov_anai_alone_2017',
                 'pct_in_pov_asian_alone_2017','pct_in_pov_nhopi_alone_2017','pct_in_pov_sora_alone_2017',
                 'pct_in_pov_twomore_alone_2017','pct_in_pov_hisplat_alone_2017']]

In [44]:
df_analysis

Unnamed: 0,NAME_2014,NAME_2017,county_2014,county_2017,state_2014,state_2017,tot_pop_2014,tot_pop_2017,tract_2014,tract_2017,...,pct_in_pov_hisplat_alone_2014,pct_in_poverty_2017,pct_in_pov_white_alone_2017,pct_in_pov_black_alone_2017,pct_in_pov_anai_alone_2017,pct_in_pov_asian_alone_2017,pct_in_pov_nhopi_alone_2017,pct_in_pov_sora_alone_2017,pct_in_pov_twomore_alone_2017,pct_in_pov_hisplat_alone_2017
0,"Census Tract 9645, Cullman County, Alabama",,43.0,,1.0,,4450.0,,964500.0,,...,0.002472,,,,,,,,,
1,"Census Tract 9646, Cullman County, Alabama",,43.0,,1.0,,4293.0,,964600.0,,...,0.039366,,,,,,,,,
2,"Census Tract 9647, Cullman County, Alabama",,43.0,,1.0,,4771.0,,964700.0,,...,0.068559,,,,,,,,,
3,"Census Tract 9648, Cullman County, Alabama",,43.0,,1.0,,4472.0,,964800.0,,...,0.107111,,,,,,,,,
4,"Census Tract 9649, Cullman County, Alabama",,43.0,,1.0,,6401.0,,964900.0,,...,0.069741,,,,,,,,,
5,"Census Tract 9650, Cullman County, Alabama",,43.0,,1.0,,6314.0,,965000.0,,...,0.070167,,,,,,,,,
6,"Census Tract 9651, Cullman County, Alabama",,43.0,,1.0,,3808.0,,965100.0,,...,0.080824,,,,,,,,,
7,"Census Tract 9652, Cullman County, Alabama",,43.0,,1.0,,2515.0,,965200.0,,...,0.008055,,,,,,,,,
8,"Census Tract 9653, Cullman County, Alabama",,43.0,,1.0,,4999.0,,965300.0,,...,0.039808,,,,,,,,,
9,"Census Tract 9654.01, Cullman County, Alabama",,43.0,,1.0,,2727.0,,965401.0,,...,0.010318,,,,,,,,,


In [28]:
Z_CRIT = 1.645