In [2]:
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 999)
import numpy as np
import csv
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from chart_studio.plotly import plot, iplot
import plotly.figure_factory as ff
import sankey
import seaborn as sns
from scipy.stats import t
from scipy.stats import ttest_ind
from datascience import *

Read in CSVs from Census Data Downloader outputs

In [3]:
cp_2014 = pd.read_csv('usa_poverty_2014.csv')
cp_2017 = pd.read_csv('usa_poverty_2017.csv')

Clean up axis from first load in

In [4]:
cp_2014 = cp_2014.drop('Unnamed: 0',axis=1)
cp_2017 = cp_2017.drop('Unnamed: 0',axis=1)

Read in Crosswalk document for 2010 Census Tracts

In [5]:
x_walk = pd.read_excel('TractXwalk_2010.xlsx')

Read in State and County FIPS ID keys

In [6]:
state_key = pd.read_csv('state_key.csv')
county_key = pd.read_csv('county_fips.csv')

Making sure that the datasets have an equal shape

In [7]:
cp_2014.shape

(73056, 56)

In [8]:
cp_2017.shape

(73056, 56)

Giving both datasets an ID Key (11-Digit FIPS code) as a string to ensure that there are 11 digits. 

In [9]:
cp_2014['11_digit_code'] = cp_2014['11_digit_code'].astype(str)

In [10]:
cp_2014['11_digit_code'] = cp_2014['11_digit_code'].astype(str).str.zfill(11)

In [11]:
cp_2017['11_digit_code'] = cp_2017['11_digit_code'].astype(str)

In [12]:
cp_2017['11_digit_code'] = cp_2017['11_digit_code'].str.zfill(11)

### Merge the Dataframes for analysis 

In [13]:
df = pd.merge(cp_2014,cp_2017, how='outer', on=['11_digit_code','11_digit_code'])

In [14]:
len(df)

73056

In [15]:
x_walk = x_walk.rename(columns={"fips": "11_digit_code"})

In [16]:
x_walk['11_digit_code'] = x_walk['11_digit_code'].astype(str)

In [17]:
x_walk['11_digit_code'] = x_walk['11_digit_code'].str.zfill(11)

In [18]:
x_walk.head()

Unnamed: 0,11_digit_code,cbsa,metro,geotype
0,1001020100,33860,"Montgomery, AL",3
1,1001020200,33860,"Montgomery, AL",3
2,1001020300,33860,"Montgomery, AL",3
3,1001020400,33860,"Montgomery, AL",3
4,1001020500,33860,"Montgomery, AL",3


In [19]:
df = pd.merge(df,x_walk, how='outer', on=['11_digit_code','11_digit_code'])

In [20]:
df.head()

Unnamed: 0,NAME14,tot_pop14,tot_pov14,moe_pov14,poor_pov14,poor_moe_pov14,tot_wht_pov14,moe_wht_pov14,poor_wht_pov14,poor_moe_wht_pov14,tot_blk_pov14,moe_blk_pov14,poor_blk_pov14,poor_moe_blk_pov14,tot_anai_pov14,moe_anai_pov14,poor_anai_pov14,poor_moe_anai_pov14,tot_asn_pov14,moe_asn_pov14,poor_asn_pov14,poor_moe_asn_pov14,tot_nhopi_pov14,moe_nhopi_pov14,poor_nhopi_pov14,poor_moe_nhopi_pov14,tot_sora_pov14,moe_sora_pov14,poor_sora_pov14,poor_moe_sora_pov14,tot_twom_pov14,moe_twom_pov14,poor_twom_pov14,poor_moe_twom_pov14,tot_nhw_pov14,moe_nhw_pov14,poor_nhw_pov14,poor_moe_nhw_pov14,tot_hla_pov14,moe_hla_pov14,poor_hla_pov14,poor_moe_hla_pov14,state14,county14,tract14,pct_pov14,pct_nhw_pov14,pct_blk_pov14,pct_anai_pov14,pct_asn_pov14,pct_nhopi_pov14,pct_sora_pov14,pct_twom_pov14,pct_hla_pov14,conc_chk_x,11_digit_code,NAME17,tot_pop17,tot_pov17,moe_pov17,poor_pov17,poor_moe_pov17,tot_wht_pov17,moe_wht_pov17,poor_wht_pov17,poor_moe_wht_pov17,tot_blk_pov17,moe_blk_pov17,poor_blk_pov17,poor_moe_blk_pov17,tot_anai_pov17,moe_anai_pov17,poor_anai_pov17,poor_moe_anai_pov17,tot_asn_pov17,moe_asn_pov17,poor_asn_pov17,poor_moe_asn_pov17,tot_nhopi_pov17,moe_nhopi_pov17,poor_nhopi_pov17,poor_moe_nhopi_pov17,tot_sora_pov17,moe_sora_pov17,poor_sora_pov17,poor_moe_sora_pov17,tot_twom_pov17,moe_twom_pov17,poor_twom_pov17,poor_moe_twom_pov17,tot_nhw_pov17,moe_nhw_pov17,poor_nhw_pov17,poor_moe_nhw_pov17,tot_hla_pov17,moe_hla_pov17,poor_hla_pov17,poor_moe_hla_pov17,state17,county17,tract17,pct_pov17,pct_nhw_pov17,pct_blk_pov17,pct_anai_pov17,pct_asn_pov17,pct_nhopi_pov17,pct_sora_pov17,pct_twom_pov17,pct_hla_pov17,conc_chk_y,cbsa,metro,geotype
0,"Census Tract 9645, Cullman County, Alabama",4450,4450,372,771,303,4251,402,653,277,14,22,14,22,83,79,38,58,0,11,0,0,0,0,0,11,0,11,0,11,102,94,66,77,4240,417,642,284,11,60,11,60,1,43,964500,0.173258,0.14427,0.003146,0.008539,0.0,0.0,0.0,0.014831,0.002472,0,1043964500,"Census Tract 9645, Cullman County, Alabama",4613,4600,397,685,256,4380,397,516,202,9,17,9,17,51,82,0,11,0,11,0,0,0,0,0,11,0,11,0,11,160,149,160,149,4380,397,516,202,0,11,0,11,1,43,964500,0.148913,0.112174,0.001957,0.0,0.0,0.0,0.0,0.034783,0.0,0,01 non-metropolitan remainder,Alabama Rural Area,4
1,"Census Tract 9646, Cullman County, Alabama",4293,4293,400,617,333,4150,432,617,333,0,11,0,11,8,12,0,11,0,11,0,0,0,0,0,11,135,178,0,11,0,11,0,11,4116,435,583,329,169,187,34,52,1,43,964600,0.143722,0.135802,0.0,0.0,0.0,0.0,0.0,0.0,0.00792,0,1043964600,"Census Tract 9646, Cullman County, Alabama",3831,3831,428,443,206,3707,426,443,206,16,20,0,11,19,34,0,11,0,11,0,0,0,0,0,11,76,118,0,11,13,17,0,11,3605,421,443,206,178,182,0,11,1,43,964600,0.115636,0.115636,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,01 non-metropolitan remainder,Alabama Rural Area,4
2,"Census Tract 9647, Cullman County, Alabama",4771,4755,416,638,192,4569,434,510,174,77,119,77,119,10,16,0,11,10,17,0,0,0,0,0,11,56,88,51,88,33,57,0,11,4376,409,457,162,326,193,181,131,1,43,964700,0.134175,0.096109,0.016193,0.0,0.0,0.0,0.010726,0.0,0.038065,0,1043964700,"Census Tract 9647, Cullman County, Alabama",5272,5222,421,640,257,4944,505,502,198,0,16,0,16,6,9,0,16,73,67,10,10,0,0,0,16,152,154,128,154,47,76,0,16,4510,384,451,184,633,268,179,164,1,43,964700,0.122558,0.086365,0.0,0.0,0.001915,0.0,0.024512,0.0,0.034278,0,01 non-metropolitan remainder,Alabama Rural Area,4
3,"Census Tract 9648, Cullman County, Alabama",4472,4472,413,1263,444,4156,434,1137,424,23,28,16,26,34,55,0,11,9,18,9,9,0,0,0,11,139,161,8,14,111,115,93,111,3748,399,810,291,479,338,343,326,1,43,964800,0.282424,0.181127,0.003578,0.0,0.002013,0.0,0.001789,0.020796,0.076699,1,1043964800,"Census Tract 9648, Cullman County, Alabama",4481,4471,385,1191,390,4302,383,1157,389,0,11,0,11,0,11,0,11,0,11,0,0,0,0,0,11,63,93,0,11,106,87,34,44,3902,372,882,342,400,262,275,256,1,43,964800,0.266383,0.197271,0.0,0.0,0.0,0.0,0.0,0.007605,0.061507,1,01 non-metropolitan remainder,Alabama Rural Area,4
4,"Census Tract 9649, Cullman County, Alabama",6401,6094,488,1190,498,5907,488,1118,498,76,70,70,70,103,151,0,16,6,9,0,0,0,0,0,16,2,5,2,5,0,16,0,16,5484,471,937,486,425,287,183,235,1,43,964900,0.195274,0.153758,0.011487,0.0,0.0,0.0,0.000328,0.0,0.03003,0,1043964900,"Census Tract 9649, Cullman County, Alabama",6526,6275,458,803,297,5978,415,717,287,77,83,74,82,49,78,0,16,66,59,0,0,0,0,0,16,12,19,12,19,93,134,0,16,5731,431,704,283,259,210,25,22,1,43,964900,0.127968,0.112191,0.011793,0.0,0.0,0.0,0.001912,0.0,0.003984,0,01 non-metropolitan remainder,Alabama Rural Area,4


In [21]:
state_key.head()

Unnamed: 0,id,state_name
0,1,Alabama
1,2,Alaska
2,4,Arizona
3,5,Arkansas
4,6,California


In [22]:
state_key = state_key.rename(columns={'id':'state17'})

In [23]:
df = pd.merge(df,state_key, how='outer', on=['state17'])

In [24]:
df.head()

Unnamed: 0,NAME14,tot_pop14,tot_pov14,moe_pov14,poor_pov14,poor_moe_pov14,tot_wht_pov14,moe_wht_pov14,poor_wht_pov14,poor_moe_wht_pov14,tot_blk_pov14,moe_blk_pov14,poor_blk_pov14,poor_moe_blk_pov14,tot_anai_pov14,moe_anai_pov14,poor_anai_pov14,poor_moe_anai_pov14,tot_asn_pov14,moe_asn_pov14,poor_asn_pov14,poor_moe_asn_pov14,tot_nhopi_pov14,moe_nhopi_pov14,poor_nhopi_pov14,poor_moe_nhopi_pov14,tot_sora_pov14,moe_sora_pov14,poor_sora_pov14,poor_moe_sora_pov14,tot_twom_pov14,moe_twom_pov14,poor_twom_pov14,poor_moe_twom_pov14,tot_nhw_pov14,moe_nhw_pov14,poor_nhw_pov14,poor_moe_nhw_pov14,tot_hla_pov14,moe_hla_pov14,poor_hla_pov14,poor_moe_hla_pov14,state14,county14,tract14,pct_pov14,pct_nhw_pov14,pct_blk_pov14,pct_anai_pov14,pct_asn_pov14,pct_nhopi_pov14,pct_sora_pov14,pct_twom_pov14,pct_hla_pov14,conc_chk_x,11_digit_code,NAME17,tot_pop17,tot_pov17,moe_pov17,poor_pov17,poor_moe_pov17,tot_wht_pov17,moe_wht_pov17,poor_wht_pov17,poor_moe_wht_pov17,tot_blk_pov17,moe_blk_pov17,poor_blk_pov17,poor_moe_blk_pov17,tot_anai_pov17,moe_anai_pov17,poor_anai_pov17,poor_moe_anai_pov17,tot_asn_pov17,moe_asn_pov17,poor_asn_pov17,poor_moe_asn_pov17,tot_nhopi_pov17,moe_nhopi_pov17,poor_nhopi_pov17,poor_moe_nhopi_pov17,tot_sora_pov17,moe_sora_pov17,poor_sora_pov17,poor_moe_sora_pov17,tot_twom_pov17,moe_twom_pov17,poor_twom_pov17,poor_moe_twom_pov17,tot_nhw_pov17,moe_nhw_pov17,poor_nhw_pov17,poor_moe_nhw_pov17,tot_hla_pov17,moe_hla_pov17,poor_hla_pov17,poor_moe_hla_pov17,state17,county17,tract17,pct_pov17,pct_nhw_pov17,pct_blk_pov17,pct_anai_pov17,pct_asn_pov17,pct_nhopi_pov17,pct_sora_pov17,pct_twom_pov17,pct_hla_pov17,conc_chk_y,cbsa,metro,geotype,state_name
0,"Census Tract 9645, Cullman County, Alabama",4450,4450,372,771,303,4251,402,653,277,14,22,14,22,83,79,38,58,0,11,0,0,0,0,0,11,0,11,0,11,102,94,66,77,4240,417,642,284,11,60,11,60,1,43,964500,0.173258,0.14427,0.003146,0.008539,0.0,0.0,0.0,0.014831,0.002472,0,1043964500,"Census Tract 9645, Cullman County, Alabama",4613,4600,397,685,256,4380,397,516,202,9,17,9,17,51,82,0,11,0,11,0,0,0,0,0,11,0,11,0,11,160,149,160,149,4380,397,516,202,0,11,0,11,1,43,964500,0.148913,0.112174,0.001957,0.0,0.0,0.0,0.0,0.034783,0.0,0,01 non-metropolitan remainder,Alabama Rural Area,4,Alabama
1,"Census Tract 9646, Cullman County, Alabama",4293,4293,400,617,333,4150,432,617,333,0,11,0,11,8,12,0,11,0,11,0,0,0,0,0,11,135,178,0,11,0,11,0,11,4116,435,583,329,169,187,34,52,1,43,964600,0.143722,0.135802,0.0,0.0,0.0,0.0,0.0,0.0,0.00792,0,1043964600,"Census Tract 9646, Cullman County, Alabama",3831,3831,428,443,206,3707,426,443,206,16,20,0,11,19,34,0,11,0,11,0,0,0,0,0,11,76,118,0,11,13,17,0,11,3605,421,443,206,178,182,0,11,1,43,964600,0.115636,0.115636,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,01 non-metropolitan remainder,Alabama Rural Area,4,Alabama
2,"Census Tract 9647, Cullman County, Alabama",4771,4755,416,638,192,4569,434,510,174,77,119,77,119,10,16,0,11,10,17,0,0,0,0,0,11,56,88,51,88,33,57,0,11,4376,409,457,162,326,193,181,131,1,43,964700,0.134175,0.096109,0.016193,0.0,0.0,0.0,0.010726,0.0,0.038065,0,1043964700,"Census Tract 9647, Cullman County, Alabama",5272,5222,421,640,257,4944,505,502,198,0,16,0,16,6,9,0,16,73,67,10,10,0,0,0,16,152,154,128,154,47,76,0,16,4510,384,451,184,633,268,179,164,1,43,964700,0.122558,0.086365,0.0,0.0,0.001915,0.0,0.024512,0.0,0.034278,0,01 non-metropolitan remainder,Alabama Rural Area,4,Alabama
3,"Census Tract 9648, Cullman County, Alabama",4472,4472,413,1263,444,4156,434,1137,424,23,28,16,26,34,55,0,11,9,18,9,9,0,0,0,11,139,161,8,14,111,115,93,111,3748,399,810,291,479,338,343,326,1,43,964800,0.282424,0.181127,0.003578,0.0,0.002013,0.0,0.001789,0.020796,0.076699,1,1043964800,"Census Tract 9648, Cullman County, Alabama",4481,4471,385,1191,390,4302,383,1157,389,0,11,0,11,0,11,0,11,0,11,0,0,0,0,0,11,63,93,0,11,106,87,34,44,3902,372,882,342,400,262,275,256,1,43,964800,0.266383,0.197271,0.0,0.0,0.0,0.0,0.0,0.007605,0.061507,1,01 non-metropolitan remainder,Alabama Rural Area,4,Alabama
4,"Census Tract 9649, Cullman County, Alabama",6401,6094,488,1190,498,5907,488,1118,498,76,70,70,70,103,151,0,16,6,9,0,0,0,0,0,16,2,5,2,5,0,16,0,16,5484,471,937,486,425,287,183,235,1,43,964900,0.195274,0.153758,0.011487,0.0,0.0,0.0,0.000328,0.0,0.03003,0,1043964900,"Census Tract 9649, Cullman County, Alabama",6526,6275,458,803,297,5978,415,717,287,77,83,74,82,49,78,0,16,66,59,0,0,0,0,0,16,12,19,12,19,93,134,0,16,5731,431,704,283,259,210,25,22,1,43,964900,0.127968,0.112191,0.011793,0.0,0.0,0.0,0.001912,0.0,0.003984,0,01 non-metropolitan remainder,Alabama Rural Area,4,Alabama


In [25]:
county_key['county_fips']=county_key['county_fips'].astype(str)
county_key['county_fips']=county_key['county_fips'].str.zfill(5)

In [26]:
df['state17'] = df['state17'].astype(str)
df['state17'] = df['state17'].str.zfill(2)
df['county17'] = df['county17'].astype(str)
df['county17'] = df['county17'].str.zfill(3)

df['county_fips']= df['state17'] + df['county17']

In [27]:
df = pd.merge(df,county_key,how='outer',on='county_fips')

## Creating the movement columns 

Process: 

1. Check to see if the status of each of the datasets are 'Low', 'High', or 'Extreme'
2. Run for-loop down the entire dataset and create two new columns, one for source and the other for target 
3. Create another row and concatenate the two columns to show direction. 

In [28]:
df["conc_chk_x"].value_counts()

0        50120
1        17595
2         4576
check      765
Name: conc_chk_x, dtype: int64

In [29]:
source = []

for conc in df['conc_chk_x']:
    if conc == '0':
        source.append('Low')
    elif conc == '1':
        source.append('High')
    elif conc == '2':
        source.append('Extreme')
    else:
        source.append('Check')
        
df['source'] = source

In [30]:
target = []

for conc in df['conc_chk_y']:
    if conc == '0':
        target.append('Low')
    elif conc == '1':
        target.append('High')
    elif conc == '2':
        target.append('Extreme')
    else:
        target.append('Check')

df['target'] = target

In [31]:
df['movement'] = df['source'] + "-" + df['target']

## Adding meaning to the geotype column

Runs a for loop to build a dictionary with each of the given values in the "geotype" column and assigns it a value of city, suburb, small metro area, or rural.

In [32]:
geotype_name = []

for gt in df['geotype']:
    if gt == 1:
        geotype_name.append('City ')
    elif gt== 2:
        geotype_name.append('Suburb')
    elif gt == 3:
        geotype_name.append('Sm. Metro Area')
    elif gt == 4:
        geotype_name.append('Rural')
    else:
        geotype_name.append('Check')

In [33]:
len(geotype_name)

73074

In [34]:
df['geotype'].value_counts()

2.0    31036
1.0    16202
3.0    13652
4.0    12166
Name: geotype, dtype: int64

In [35]:
df['geotype_name'] = geotype_name

In [36]:
df['geotype_name'].value_counts()

Suburb            31036
City              16202
Sm. Metro Area    13652
Rural             12166
Check                18
Name: geotype_name, dtype: int64

## New Section - CBSA/Metro Area Analysis 

This next section will analyze the movement of the poor population to see if that changed alongside the general trend of the population migration. It also provides a statistical significance test at the Census tract level by performing a T-Test of Independent means. 

Creates a new dataframe "change population in poverty"

In [37]:
chg_pop_pov = df[['11_digit_code',
                  'tot_pov14', 'moe_pov14','poor_pov14','poor_moe_pov14','pct_pov14', #for total population 2014
                   'tot_nhw_pov14','moe_nhw_pov14','poor_nhw_pov14','poor_moe_nhw_pov14','pct_nhw_pov14',#for white population 2014
                  'tot_blk_pov14','moe_blk_pov14','poor_blk_pov14','poor_moe_blk_pov14','pct_blk_pov14', #for black population 2014
                  'tot_asn_pov14','moe_asn_pov14','poor_asn_pov14','poor_moe_asn_pov14','pct_asn_pov14',#for asian population 2014
                  'tot_hla_pov14','moe_hla_pov14','poor_hla_pov14','poor_moe_hla_pov14','pct_hla_pov14',#for hispanic/latino population 2014
                  'tot_pov17','moe_pov17','poor_pov17','poor_moe_pov17','pct_pov17',#for total population 2017
                  'tot_nhw_pov17','moe_nhw_pov17','poor_nhw_pov17','poor_moe_nhw_pov17','pct_nhw_pov17',#for white population 2017
                  'tot_blk_pov17','moe_blk_pov17','poor_blk_pov17','poor_moe_blk_pov17','pct_blk_pov17',#for black population 2017
                  'tot_asn_pov17','moe_asn_pov17','poor_asn_pov17','poor_moe_asn_pov17','pct_asn_pov17',#for asian population 2017
                  'tot_hla_pov17','moe_hla_pov17','poor_hla_pov17','poor_moe_hla_pov17','pct_hla_pov17',#for hispanic/latino population 2017
                  'source','target','movement',
                  'geotype','geotype_name','state_name','county_name','metro','cbsa']]

Calculate the standard error at a 90% confidence level based on the margin of error provided in dataframe 

In [38]:
chg_pop_pov['se_pov14'] = chg_pop_pov['moe_pov14'] / 1.645
chg_pop_pov['se_poor14'] = chg_pop_pov['poor_moe_pov14'] / 1.645

chg_pop_pov['se_pov17'] = chg_pop_pov['moe_pov17'] / 1.645
chg_pop_pov['se_poor17'] = chg_pop_pov['poor_moe_pov17'] / 1.645

Calculate the standard error for a proportion for each observation period

In [39]:
chg_pop_pov['pct_pov_se14'] = (1/chg_pop_pov['tot_pov14']) * np.sqrt(
    (chg_pop_pov['se_poor14']**2)-(chg_pop_pov['pct_pov14']**2)*chg_pop_pov['se_pov14']**2)


chg_pop_pov['pct_pov_se17'] = (1/chg_pop_pov['tot_pov17']) * np.sqrt(
    (chg_pop_pov['se_poor17']**2)- (chg_pop_pov['pct_pov17']**2)*chg_pop_pov['se_pov17']**2)


invalid value encountered in sqrt


invalid value encountered in sqrt



Calculate the new margins of error given the standard error, again, at a 90% confidence level.

In [40]:
chg_pop_pov['pct_pov_moe14'] = chg_pop_pov['pct_pov_se14']*1.645

chg_pop_pov['pct_pov_moe17'] = chg_pop_pov['pct_pov_se17']*1.645

Calculate if the difference was statistically significant using a T-Test of Independent means. 

In [41]:
chg_pop_pov['ssd_pct_poor'] = (chg_pop_pov['pct_pov17']-chg_pop_pov['pct_pov14'])/np.sqrt(
     (chg_pop_pov['pct_pov_se17']**2 + chg_pop_pov['pct_pov_se14']**2))

Create a dummy variable that marks which had z-scores greater than 1.645 or less than -1.645 

In [42]:
chg_pop_pov['stat_sig'] = np.where(((chg_pop_pov['ssd_pct_poor']< -1.645)  
                                   |(chg_pop_pov['ssd_pct_poor']> 1.645)), 1, 0)

Count the total number of census tracts that passed

In [43]:
chg_pop_pov['stat_sig'].value_counts()

0    67722
1     5352
Name: stat_sig, dtype: int64

Calculates the percentage change of the total population in a specific census tract

In [44]:
chg_pop_pov['pct_change_tot'] = round((chg_pop_pov['tot_pov17']-chg_pop_pov['tot_pov14'])/chg_pop_pov['tot_pov14']*100,2)

In [45]:
df_final = chg_pop_pov

If you want to see the output of the groupby/pivot table functions below, run the two commented out codes below to see what output is. It removes the broken census tracts for each observation year.

In [52]:
chg_pop_pov = chg_pop_pov[chg_pop_pov.movement!='Check-Check']
chg_pop_pov = chg_pop_pov[chg_pop_pov.movement!='Check-Extreme']
chg_pop_pov = chg_pop_pov[chg_pop_pov.movement!='Check-High']
chg_pop_pov = chg_pop_pov[chg_pop_pov.movement!='Check-Low']
chg_pop_pov = chg_pop_pov[chg_pop_pov.movement!='Extreme-Check']
chg_pop_pov = chg_pop_pov[chg_pop_pov.movement!='High-Check']
chg_pop_pov = chg_pop_pov[chg_pop_pov.movement!='Low-Check']

In [53]:
chg_pop_pov = chg_pop_pov.replace([np.inf, -np.inf], np.nan)

# Creating the Concentrated Poverty Shell

In [54]:
df_shell_metro =chg_pop_pov[['11_digit_code',
                  'tot_pov14', 'moe_pov14','poor_pov14','poor_moe_pov14','pct_pov14', #for total population 2014
                   'tot_nhw_pov14','moe_nhw_pov14','poor_nhw_pov14','poor_moe_nhw_pov14','pct_nhw_pov14',#for white population 2014
                  'tot_blk_pov14','moe_blk_pov14','poor_blk_pov14','poor_moe_blk_pov14','pct_blk_pov14', #for black population 2014
                  'tot_asn_pov14','moe_asn_pov14','poor_asn_pov14','poor_moe_asn_pov14','pct_asn_pov14',#for asian population 2014
                  'tot_hla_pov14','moe_hla_pov14','poor_hla_pov14','poor_moe_hla_pov14','pct_hla_pov14',#for hispanic/latino population 2014
                  'tot_pov17','moe_pov17','poor_pov17','poor_moe_pov17','pct_pov17',#for total population 2017
                  'tot_nhw_pov17','moe_nhw_pov17','poor_nhw_pov17','poor_moe_nhw_pov17','pct_nhw_pov17',#for white population 2017
                  'tot_blk_pov17','moe_blk_pov17','poor_blk_pov17','poor_moe_blk_pov17','pct_blk_pov17',#for black population 2017
                  'tot_asn_pov17','moe_asn_pov17','poor_asn_pov17','poor_moe_asn_pov17','pct_asn_pov17',#for asian population 2017
                  'tot_hla_pov17','moe_hla_pov17','poor_hla_pov17','poor_moe_hla_pov17','pct_hla_pov17',#for hispanic/latino population 2017
                  'source','target','movement',
                  'geotype','geotype_name','state_name','county_name','metro','cbsa']]


df_shell_rural =chg_pop_pov[['11_digit_code',
                  'tot_pov14', 'moe_pov14','poor_pov14','poor_moe_pov14','pct_pov14', #for total population 2014
                   'tot_nhw_pov14','moe_nhw_pov14','poor_nhw_pov14','poor_moe_nhw_pov14','pct_nhw_pov14',#for white population 2014
                  'tot_blk_pov14','moe_blk_pov14','poor_blk_pov14','poor_moe_blk_pov14','pct_blk_pov14', #for black population 2014
                  'tot_asn_pov14','moe_asn_pov14','poor_asn_pov14','poor_moe_asn_pov14','pct_asn_pov14',#for asian population 2014
                  'tot_hla_pov14','moe_hla_pov14','poor_hla_pov14','poor_moe_hla_pov14','pct_hla_pov14',#for hispanic/latino population 2014
                  'tot_pov17','moe_pov17','poor_pov17','poor_moe_pov17','pct_pov17',#for total population 2017
                  'tot_nhw_pov17','moe_nhw_pov17','poor_nhw_pov17','poor_moe_nhw_pov17','pct_nhw_pov17',#for white population 2017
                  'tot_blk_pov17','moe_blk_pov17','poor_blk_pov17','poor_moe_blk_pov17','pct_blk_pov17',#for black population 2017
                  'tot_asn_pov17','moe_asn_pov17','poor_asn_pov17','poor_moe_asn_pov17','pct_asn_pov17',#for asian population 2017
                  'tot_hla_pov17','moe_hla_pov17','poor_hla_pov17','poor_moe_hla_pov17','pct_hla_pov17',#for hispanic/latino population 2017
                  'source','target','movement',
                  'geotype','geotype_name','state_name','county_name','metro','cbsa']]

df_shell_smmetro =chg_pop_pov[['11_digit_code',
                  'tot_pov14', 'moe_pov14','poor_pov14','poor_moe_pov14','pct_pov14', #for total population 2014
                   'tot_nhw_pov14','moe_nhw_pov14','poor_nhw_pov14','poor_moe_nhw_pov14','pct_nhw_pov14',#for white population 2014
                  'tot_blk_pov14','moe_blk_pov14','poor_blk_pov14','poor_moe_blk_pov14','pct_blk_pov14', #for black population 2014
                  'tot_asn_pov14','moe_asn_pov14','poor_asn_pov14','poor_moe_asn_pov14','pct_asn_pov14',#for asian population 2014
                  'tot_hla_pov14','moe_hla_pov14','poor_hla_pov14','poor_moe_hla_pov14','pct_hla_pov14',#for hispanic/latino population 2014
                  'tot_pov17','moe_pov17','poor_pov17','poor_moe_pov17','pct_pov17',#for total population 2017
                  'tot_nhw_pov17','moe_nhw_pov17','poor_nhw_pov17','poor_moe_nhw_pov17','pct_nhw_pov17',#for white population 2017
                  'tot_blk_pov17','moe_blk_pov17','poor_blk_pov17','poor_moe_blk_pov17','pct_blk_pov17',#for black population 2017
                  'tot_asn_pov17','moe_asn_pov17','poor_asn_pov17','poor_moe_asn_pov17','pct_asn_pov17',#for asian population 2017
                  'tot_hla_pov17','moe_hla_pov17','poor_hla_pov17','poor_moe_hla_pov17','pct_hla_pov17',#for hispanic/latino population 2017
                  'source','target','movement',
                  'geotype','geotype_name','state_name','county_name','metro','cbsa']]

In [55]:
df_shell_metro = df_shell_metro[df_shell_metro.geotype_name!='Rural']
df_shell_metro = df_shell_metro[df_shell_metro.geotype_name!='Sm. Metro Area']
df_shell_smmetro = df_shell_metro[df_shell_metro.geotype_name=='Sm. Metro Area']
df_shell_rural = df_shell_metro[df_shell_metro.geotype_name=='Rural']

After creating dataframes for each geotype, we create pivot tables to have the datagroup appropriately. I couldn't figure out a way to do it more efficienctly, but if you've got any ideas based on the way that the Concentrated Poverty Shells file is laid out, please let me know! 

Elizabeth had told me that there is an easy way to do it in SAS, so if you find some sample code for that, maybe you can transfer that over into this!

Otherwise, copying and pasting the data into the spreadsheets will work just fine. 

In [62]:
all_people_2014= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['source'],
                            aggfunc={'source':len, 'tot_pov14':np.sum, 'poor_pov14':np.sum})
#all_people_2014.to_csv('all_2014.csv')

In [95]:
all_people_2017= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['target'],
                            aggfunc={'target':len,'tot_pov17':np.sum,'poor_pov17':np.sum})
#all_people_2017.to_csv('all_2017.csv')

In [96]:
blk_pop_2014= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['source'],
                            aggfunc={'source':len, 'tot_blk_pov14':np.sum, 'poor_blk_pov14':np.sum})
#blk_pop_2014.to_csv('blk_pop_2014.csv')

In [97]:
blk_pop_2017= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['target'],
                            aggfunc={'target':len,'tot_blk_pov17':np.sum,'poor_blk_pov17':np.sum})
#blk_pop_2017.to_csv('blk_pop_2017.csv')

In [98]:
hla_pop_2014= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['source'],
                            aggfunc={'source':len, 'tot_hla_pov14':np.sum, 'poor_hla_pov14':np.sum})
#hla_pop_2014.to_csv('hla_pop_2014.csv')

In [99]:
hla_pop_2017= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['target'],
                            aggfunc={'target':len,'tot_hla_pov17':np.sum,'poor_hla_pov17':np.sum})
#hla_pop_2017.to_csv('hla_pop_2017.csv')

In [100]:
nhw_pop_2014= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['source'],
                            aggfunc={'source':len, 'tot_nhw_pov14':np.sum, 'poor_nhw_pov14':np.sum})
#nhw_pop_2014.to_csv('nhw_pop_2014.csv')

In [101]:
nhw_pop_2017= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['target'],
                            aggfunc={'target':len,'tot_nhw_pov17':np.sum,'poor_nhw_pov17':np.sum})
#nhw_pop_2017.to_csv('nhw_pop_2017.csv')

In [102]:
asn_pop_2014= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['source'],
                            aggfunc={'source':len, 'tot_asn_pov14':np.sum, 'poor_asn_pov14':np.sum})
#asn_pop_2014.to_csv('asn_pop_2014.csv')

In [103]:
asn_pop_2017= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['target'],
                            aggfunc={'target':len,'tot_asn_pov17':np.sum,'poor_asn_pov17':np.sum})
#asn_pop_2017.to_csv('asn_pop_2017.csv')

In [104]:
small_metro_2014= pd.pivot_table(df_shell_metro,index=['cbsa','metro','geotype_name'],
                            columns=['source'],
                            aggfunc={'source':len, 'tot_pov14':np.sum, 'poor_pov14':np.sum,
                                    'tot_blk_pov14':np.sum, 'poor_blk_pov14':np.sum,
                                    'tot_hla_pov14':np.sum, 'poor_hla_pov14':np.sum,
                                    'tot_asn_pov14':np.sum, 'poor_asn_pov14':np.sum,
                                    'tot_nhw_pov14':np.sum, 'poor_nhw_pov14':np.sum})

#small_metro_2014.to_csv('small_metro_2014.csv')

In [105]:
small_metro_2017= pd.pivot_table(df_shell_smmetro,index=['cbsa','metro','geotype_name'],
                            columns=['target'],
                            aggfunc={'target':len, 'tot_pov17':np.sum, 'poor_pov17':np.sum,
                                    'tot_blk_pov17':np.sum, 'poor_blk_pov17':np.sum,
                                    'tot_hla_pov17':np.sum, 'poor_hla_pov17':np.sum,
                                    'tot_asn_pov17':np.sum, 'poor_asn_pov17':np.sum,
                                    'tot_nhw_pov17':np.sum, 'poor_nhw_pov17':np.sum})

#small_metro_2017.to_csv('small_metro_2017.csv')

In [106]:
rural_2014= pd.pivot_table(df_shell_rural,index=['cbsa','metro','geotype_name'],
                            columns=['source'],
                            aggfunc={'source':len, 'tot_pov14':np.sum, 'poor_pov14':np.sum,
                                    'tot_blk_pov14':np.sum, 'poor_blk_pov14':np.sum,
                                    'tot_hla_pov14':np.sum, 'poor_hla_pov14':np.sum,
                                    'tot_asn_pov14':np.sum, 'poor_asn_pov14':np.sum,
                                    'tot_nhw_pov14':np.sum, 'poor_nhw_pov14':np.sum})

#rural_2014.to_csv('rural_2014.csv')

In [107]:
rural_2017= pd.pivot_table(df_shell_rural,index=['cbsa','metro','geotype_name'],
                            columns=['target'],
                            aggfunc={'target':len, 'tot_pov17':np.sum, 'poor_pov17':np.sum,
                                    'tot_blk_pov17':np.sum, 'poor_blk_pov17':np.sum,
                                    'tot_hla_pov17':np.sum, 'poor_hla_pov17':np.sum,
                                    'tot_asn_pov17':np.sum, 'poor_asn_pov17':np.sum,
                                    'tot_nhw_pov17':np.sum, 'poor_nhw_pov17':np.sum})

#rural_2017.to_csv('rural_2017.csv')

In [108]:
#df_final.to_csv('DataOutputFile.csv')