In [1]:
import os
import sys
import pandas as pd, geopandas as gp, matplotlib.pyplot as plt, numpy as np
import getpass
import requests 
import json
from pandas import ExcelWriter
import censusdata

user = getpass.getuser()
sys.dont_write_bytecode = True
sys.path.insert(0, '/Users/{}/Box/DataViz Projects/Utility Code'.format(user))

from utils_io import *

### Read selected ACS varibles from csv

In [3]:
acs_coc_selected_vars = pd.read_csv('Data/ACS_Table_Variables_COC_Factors.csv')

In [4]:
acs_vars_lst = acs_coc_selected_vars['ACS_Table_Variable'].tolist()

### Query ACS API

In [4]:
# url = 'https://api.census.gov/data/2018/acs/acs5?'
# var = ','.join(acs_vars_lst)
# param_dict = {'get': var,
#               'for':'tract:*',
#               'in':'county:001,013,041,055,075,081,085,095,097',
#               'in':'state:06',
#               'key':'4d921adb2db836584aa4e67744520787eba00049'}
# rq = requests.get(url, params=param_dict)
# data = rq.json()
# acs_df = pd.DataFrame(data[1:],columns=data[0])

In [5]:
var = ','.join(acs_vars_lst)
counties = '001,013,041,055,075,081,085,095,097'
state = '06'
api_key = '4d921adb2db836584aa4e67744520787eba00049'
url = 'https://api.census.gov/data/2018/acs/acs5?get={var}&for=tract:*&in=county:{counties}&in=state:{state}&key={api_key}'.format(var=var,
                                                                                                                                 counties=counties,
                                                                                                                                 state=state,
                                                                                                                                   api_key=api_key)
rq = requests.get(url)
data = rq.json()
acs_df = pd.DataFrame(data[1:],columns=data[0])

In [6]:
acs_df['county'].unique()

array(['055', '085', '095', '075', '013', '081', '001', '097', '041'],
      dtype=object)

### Change data type to integer select columns

In [7]:
acs_df[acs_vars_lst] = acs_df[acs_vars_lst].apply(pd.to_numeric)

In [8]:
acs_df['geoid'] = acs_df['state'] + acs_df['county'] + acs_df['tract']

### Rename columns for consistancy with prior CoCs

In [9]:
cols = {'county':'county_fips',
        'B03002_001E':'tot_pop_min',
        'B01001_001E':'tot_pop_sen',
        'C17002_001E':'tot_pop_pov',
       'C18108_001E':'tot_pop_civ_ni',
       'B08201_001E':'tot_hh',
       'B11004_001E':'tot_fam',
       'B16005_001E':'tot_pop_over5',
       'B25070_010E':'pop_hus_rent50',
       'B08201_002E':'pop_zvhhs'}
acs_df.rename(columns=cols, inplace=True)

### Calculate CoC and populations

In [10]:
acs_df['pop_minority'] = (acs_df['tot_pop_min'] - 
                          acs_df['B03002_003E'])
acs_df['pop_over75'] = (acs_df['B01001_023E'] + 
                        acs_df['B01001_024E'] + 
                        acs_df['B01001_025E'] + 
                        acs_df['B01001_047E'] + 
                        acs_df['B01001_048E'] + 
                        acs_df['B01001_049E'])
acs_df['pop_spfam'] = (acs_df['B11004_010E'] + 
                       acs_df['B11004_016E'])
acs_df['pop_lep'] = (acs_df['B16005_007E'] + 
                     acs_df['B16005_008E'] + 
                     acs_df['B16005_012E'] + 
                     acs_df['B16005_013E'] + 
                     acs_df['B16005_017E'] + 
                     acs_df['B16005_018E'] + 
                     acs_df['B16005_022E'] + 
                     acs_df['B16005_023E'] + 
                     acs_df['B16005_029E'] + 
                     acs_df['B16005_030E'] + 
                     acs_df['B16005_034E'] + 
                     acs_df['B16005_035E'] + 
                     acs_df['B16005_039E'] + 
                     acs_df['B16005_040E'] + 
                     acs_df['B16005_044E'] + 
                     acs_df['B16005_045E'])
acs_df['pop_below200'] = (acs_df['tot_pop_pov'] - 
                          acs_df['C17002_008E'])
acs_df['pop_disability'] = (acs_df['tot_pop_civ_ni'] - 
                            (acs_df['C18108_005E'] + 
                             acs_df['C18108_009E'] + 
                             acs_df['C18108_013E']))

### Calculate CoC shares

In [11]:
acs_df['pct_minority'] = (np.where(acs_df['tot_pop_min'] == 0,0,
                                   (acs_df['pop_minority']/
                                    acs_df['tot_pop_min'])))
acs_df['pct_over75'] = (np.where(acs_df['tot_pop_sen'] == 0,0,
                                 (acs_df['pop_over75']/
                                  acs_df['tot_pop_sen'])))
acs_df['pct_spfam'] = (np.where(acs_df['tot_fam'] == 0,0,
                                (acs_df['pop_spfam']/
                                 acs_df['tot_fam'])))
acs_df['pct_lep'] = (np.where(acs_df['tot_pop_over5'] == 0,0,
                              (acs_df['pop_lep']/
                               acs_df['tot_pop_over5'])))
acs_df['pct_below200'] = np.where(acs_df['tot_pop_pov'] == 0,0,
                                  (acs_df['pop_below200']/
                                   acs_df['tot_pop_pov']))
acs_df['pct_disab'] = np.where(acs_df['tot_pop_civ_ni'] == 0,0,
                               (acs_df['pop_disability']/
                                acs_df['tot_pop_civ_ni']))
acs_df['pct_zvhhs'] = np.where(acs_df['tot_hh'] == 0,0,
                               (acs_df['pop_zvhhs']/
                                acs_df['tot_hh']))
acs_df['pct_hus_rent50'] = np.where(acs_df['tot_hh'] == 0,0,
                                    (acs_df['pop_hus_rent50']/
                                     acs_df['tot_hh']))

In [12]:
acs_df.head(5)

Unnamed: 0,tot_pop_min,B03002_003E,tot_pop_sen,B01001_023E,B01001_024E,B01001_025E,B01001_047E,B01001_048E,B01001_049E,tot_pop_pov,...,pop_below200,pop_disability,pct_minority,pct_over75,pct_spfam,pct_lep,pct_below200,pct_disab,pct_zvhhs,pct_hus_rent50
0,2772,1388,2772,23,9,10,77,8,8,2580,...,873,392,0.499278,0.048701,0.297345,0.130384,0.338372,0.149675,0.135379,0.16065
1,4127,2874,4127,34,19,14,30,54,46,4125,...,591,382,0.30361,0.047734,0.054027,0.087235,0.143273,0.092561,0.005806,0.020645
2,722,396,722,23,0,0,0,0,0,119,...,103,80,0.451524,0.031856,0.363636,0.00831,0.865546,0.672269,0.891892,0.594595
3,6918,1563,6918,35,17,19,126,26,71,6845,...,460,263,0.774068,0.042498,0.033134,0.04316,0.067202,0.038155,0.021295,0.045198
4,4863,1951,4863,83,27,20,89,67,69,4863,...,776,515,0.598807,0.073,0.084932,0.087213,0.159572,0.105902,0.099839,0.067096


### Flag cocs and coc levels (high, higher, highest) 

In [17]:
def flag_condition_calc(row, df_share_column, standard_deviation):
    cond = (df_share_column.mean() + 
            (standard_deviation * 
             df_share_column.std())).round(decimals=2)
    if row > cond:
        return 1
    else:
        return 0

In [14]:
def flag_mult_columns(dataframe, dictionary, standard_deviation):
    for key, value in dictionary.items():
        dataframe[value] = (dataframe[key]
                            .apply(
                                lambda row : flag_condition_calc(row,acs_df[key],
                                                                 standard_deviation)))

### Flag halfsd columns and count factors

In [15]:
cols_dict_halfsd = {'pct_over75':'over75_halfsd',
                    'pct_minority':'minority_halfsd',
                   'pct_spfam':'spfam_halfsd',
                   'pct_disab':'disab_halfsd',
                   'pct_lep':'lep_halfsd',
                   'pct_below200':'below200_halfsd',
                   'pct_zvhhs':'zvhh_halfsd',
                   'pct_hus_rent50':'hus_rent50_halfsd'}


In [18]:
flag_mult_columns(acs_df,cols_dict_halfsd,.5)

In [19]:
halfsd_cols_list = ['below200_halfsd',
             'minority_halfsd',
             'spfam_halfsd',
             'disab_halfsd',
             'lep_halfsd',
             'over75_halfsd',
             'zvhh_halfsd',
             'hus_rent50_halfsd']
acs_df['count_disadfact_halfsd'] = acs_df[halfsd_cols_list].sum(axis=1)

In [20]:
acs_df[halfsd_cols_list][acs_df['count_disadfact_halfsd'] >= 1]

Unnamed: 0,below200_halfsd,minority_halfsd,spfam_halfsd,disab_halfsd,lep_halfsd,over75_halfsd,zvhh_halfsd,hus_rent50_halfsd
0,1,0,1,1,1,0,0,1
2,1,0,1,1,0,0,1,1
3,0,1,0,0,0,0,0,0
9,1,1,1,0,1,0,0,0
10,0,1,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...
1582,0,0,0,1,0,0,0,0
1583,0,0,0,0,0,1,0,0
1585,0,0,0,0,0,1,0,0
1586,0,0,1,1,1,1,0,0


### Flag halfsd coc

In [21]:
halfsd_remain = ['spfam_halfsd',
                 'disab_halfsd',
                 'lep_halfsd',
                 'over75_halfsd',
                 'zvhh_halfsd',
                 'hus_rent50_halfsd']
halfsd_cond = (((acs_df['minority_halfsd'] == 1) & 
                (acs_df['below200_halfsd'] == 1)) | 
               ((acs_df['below200_halfsd'] == 1) & 
                (acs_df[halfsd_remain].sum(axis=1) >= 3))
              )
acs_df['coc_pba2050_halfsd'] = np.where(halfsd_cond,1,0)

In [41]:
acs_df[halfsd_cols_list][acs_df['coc_pba2050_halfsd'] == 1]

Unnamed: 0,below200_halfsd,minority_halfsd,spfam_halfsd,disab_halfsd,lep_halfsd,over75_halfsd,zvhh_halfsd,hus_rent50_halfsd
0,1,0,1,1,1,0,0,1
2,1,0,1,1,0,0,1,1
9,1,1,1,0,1,0,0,0
11,1,1,1,0,1,0,0,0
18,1,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
1542,1,1,1,0,0,0,0,1
1544,1,1,1,0,1,0,0,1
1549,1,1,1,1,1,0,0,1
1564,1,0,1,1,1,0,0,0


### Flag onesd columns

In [23]:
cols_dict_onesd = {'pct_over75':'over75_onesd',
                    'pct_minority':'minority_onesd',
                   'pct_spfam':'spfam_onesd',
                   'pct_disab':'disab_onesd',
                   'pct_lep':'lep_onesd',
                   'pct_below200':'below200_onesd',
                   'pct_zvhhs':'zvhh_onesd',
                   'pct_hus_rent50':'hus_rent50_onesd'}

In [24]:
flag_mult_columns(acs_df,cols_dict_onesd,1)

In [25]:
onesd_cols_list = ['below200_onesd',
             'minority_onesd',
             'spfam_onesd',
             'disab_onesd',
             'lep_onesd',
             'over75_onesd',
             'zvhh_onesd',
             'hus_rent50_onesd']
acs_df['count_disadfact_onesd'] = acs_df[onesd_cols_list].sum(axis=1)

### Flag onesd coc

In [27]:
onesd_remain = ['spfam_onesd',
                 'disab_onesd',
                 'lep_onesd',
                 'over75_onesd',
                 'zvhh_onesd',
                 'hus_rent50_onesd']
onesd_cond = (((acs_df['minority_onesd'] == 1) & 
                (acs_df['below200_onesd'] == 1)) | 
               ((acs_df['below200_onesd'] == 1) & 
                (acs_df[onesd_remain].sum(axis=1) >= 3))
              )
acs_df['coc_pba2050_onesd'] = np.where(onesd_cond,1,0)

In [40]:
acs_df[onesd_cols_list][(acs_df['coc_pba2050_onesd'] == 1)]

Unnamed: 0,below200_onesd,minority_onesd,spfam_onesd,disab_onesd,lep_onesd,over75_onesd,zvhh_onesd,hus_rent50_onesd
2,1,0,1,1,0,0,1,1
19,1,1,1,1,0,0,1,1
34,1,1,1,0,1,0,0,0
35,1,1,1,0,1,0,0,1
38,1,1,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...
1481,1,1,1,0,1,0,0,1
1506,1,1,1,0,1,0,0,1
1507,1,0,1,0,1,0,0,1
1527,1,0,1,1,1,0,0,1


### Flag onehalfsd columns

In [29]:
cols_dict_onehalfsd = {'pct_over75':'over75_onehalfsd',
                    'pct_minority':'minority_onehalfsd',
                   'pct_spfam':'spfam_onehalfsd',
                   'pct_disab':'disab_onehalfsd',
                   'pct_lep':'lep_onehalfsd',
                   'pct_below200':'below200_onehalfsd',
                   'pct_zvhhs':'zvhh_onehalfsd',
                   'pct_hus_rent50':'hus_rent50_onehalfsd'}

In [30]:
flag_mult_columns(acs_df,cols_dict_onehalfsd,1.5)

In [31]:
onehalfsd_cols_list = ['below200_onehalfsd',
             'minority_onehalfsd',
             'spfam_onehalfsd',
             'disab_onehalfsd',
             'lep_onehalfsd',
             'over75_onehalfsd',
             'zvhh_onehalfsd',
             'hus_rent50_onehalfsd']
acs_df['count_disadfact_onehalfsd'] = acs_df[onehalfsd_cols_list].sum(axis=1)

In [32]:
acs_df[onehalfsd_cols_list][acs_df['count_disadfact_onehalfsd'] >= 1]

Unnamed: 0,below200_onehalfsd,minority_onehalfsd,spfam_onehalfsd,disab_onehalfsd,lep_onehalfsd,over75_onehalfsd,zvhh_onehalfsd,hus_rent50_onehalfsd
0,0,0,1,0,0,0,0,0
2,1,0,1,1,0,0,1,1
9,0,0,0,0,1,0,0,0
10,0,0,0,0,1,0,0,0
11,0,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...
1553,0,0,0,0,1,0,0,0
1564,0,0,0,0,1,0,0,0
1570,0,0,0,1,0,0,0,0
1575,0,0,0,0,1,0,0,0


### Flag onehalfsd coc

In [33]:
onehalfsd_remain = ['spfam_onehalfsd',
                 'disab_onehalfsd',
                 'lep_onehalfsd',
                 'over75_onehalfsd',
                 'zvhh_onehalfsd',
                 'hus_rent50_onehalfsd']
onehalfsd_cond = (((acs_df['minority_onehalfsd'] == 1) & 
                (acs_df['below200_onehalfsd'] == 1)) | 
               ((acs_df['below200_onehalfsd'] == 1) & 
                (acs_df[onehalfsd_remain].sum(axis=1) >= 3))
              )
acs_df['coc_pba2050_onehalfsd'] = np.where(onehalfsd_cond,1,0)

In [39]:
acs_df[onehalfsd_cols_list][(acs_df['coc_pba2050_onehalfsd'] == 1)]

Unnamed: 0,below200_onehalfsd,minority_onehalfsd,spfam_onehalfsd,disab_onehalfsd,lep_onehalfsd,over75_onehalfsd,zvhh_onehalfsd,hus_rent50_onehalfsd
2,1,0,1,1,0,0,1,1
19,1,0,1,1,0,0,0,1
35,1,0,1,0,1,0,0,1
51,1,0,1,1,0,0,0,1
101,1,1,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...
1401,1,0,0,1,1,1,1,0
1460,1,1,0,0,1,1,1,1
1466,1,1,1,1,1,1,1,0
1506,1,1,1,0,1,0,0,1


### Flag 2050 cocs

In [35]:
acs_df['coc_flag_pba2050'] = np.where((acs_df['coc_pba2050_halfsd'] == 1) | 
                                      (acs_df['coc_pba2050_onesd'] == 1) | 
                                      (acs_df['coc_pba2050_onehalfsd']),1,0)

In [38]:
acs_df[['coc_flag_pba2050',
        'coc_pba2050_halfsd',
        'coc_pba2050_onesd',
        'coc_pba2050_onehalfsd']][acs_df['coc_flag_pba2050'] == 1]

Unnamed: 0,coc_flag_pba2050,coc_pba2050_halfsd,coc_pba2050_onesd,coc_pba2050_onehalfsd
0,1,1,0,0
2,1,1,1,1
9,1,1,0,0
11,1,1,0,0
18,1,1,0,0
...,...,...,...,...
1542,1,1,0,0
1544,1,1,0,0
1549,1,1,1,0
1564,1,1,0,0


### Create coc classes

In [102]:
def some_func(df):
    if df['a']>.5:
        return 'high'
    else:
        return 'low'
df=pd.DataFrame(data={'a':np.random.sample(size=10),'b':np.random.sample(size=10)})
df.apply(some_func,axis=1)

0    high
1     low
2    high
3    high
4    high
5    high
6    high
7     low
8     low
9    high
dtype: object

In [103]:
def set_coc_class(df):
    if df['coc_pba2050_onehalfsd'] == 1:
        return 'Highest'
    elif df['coc_pba2050_onesd'] == 1:
        return 'Higher'
    elif df['coc_pba2050_halfsd'] == 1:
        return 'High'
    else: 
        return 'NA'

In [105]:
acs_df['coc_class'] = acs_df.apply(set_coc_class,axis=1)

In [108]:
acs_df[acs_df['coc_class'] != 'NA']

Unnamed: 0,tot_pop_min,B03002_003E,tot_pop_sen,B01001_023E,B01001_024E,B01001_025E,B01001_047E,B01001_048E,B01001_049E,tot_pop_pov,...,spfam_onehalfsd,disab_onehalfsd,lep_onehalfsd,below200_onehalfsd,zvhh_onehalfsd,hus_rent50_onehalfsd,count_disadfact_onehalfsd,coc_pba2050_onehalfsd,coc_flag_pba2050,coc_class
0,2772,1388,2772,23,9,10,77,8,8,2580,...,1,0,0,0,0,0,1,0,1,High
2,722,396,722,23,0,0,0,0,0,119,...,1,1,0,1,1,1,5,1,1,Highest
9,8414,524,8414,77,0,72,158,17,17,8368,...,0,0,1,0,0,0,1,0,1,High
11,4855,68,4855,27,19,40,41,23,21,4790,...,0,0,1,0,0,0,2,0,1,High
18,5571,1096,5571,29,30,9,75,0,22,5501,...,0,0,0,0,0,0,0,0,1,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1542,5794,1335,5794,5,0,0,0,7,9,1743,...,0,0,0,1,0,1,2,0,1,High
1544,7820,2155,7820,62,51,24,42,89,105,7674,...,0,0,0,0,0,0,0,0,1,High
1549,4278,157,4278,64,29,25,79,40,63,4258,...,0,0,1,0,0,0,2,0,1,Higher
1564,5481,2238,5481,98,22,19,5,62,93,5436,...,0,0,1,0,0,0,1,0,1,High


### Calculate regional statistics 

In [None]:
coc_region_stats = acs_df.agg({'pct_over75': ['mean','std'],
           'pct_minority': ['mean','std'],
           'pct_lep': ['mean','std'],
           'pct_spfam': ['mean','std'],
           'pct_below200': ['mean','std'],
           'pct_disab': ['mean','std'],
           'pct_zvhhs': ['mean','std'],
           'pct_hus_rent50': ['mean','std']}).transpose().reset_index()

In [None]:
coc_region_stats.rename(columns={'index':'factors'},inplace=True)

### Create fields for .5, 1, and 1.5 sd from mean

In [None]:
coc_region_stats['plus_half_sd'] = (coc_region_stats['mean'] + 
                                    (.5 * coc_region_stats['std']))
coc_region_stats['plus_one_sd'] = (coc_region_stats['mean'] + 
                                   coc_region_stats['std'])
coc_region_stats['plus_one_half_sd'] = (coc_region_stats['mean'] + 
                                       (1.5 * coc_region_stats['std']))

In [None]:
coc_region_stats