In [1]:
import os
import sys
import pandas as pd, geopandas as gp, matplotlib.pyplot as plt, numpy as np
import getpass
import requests 
import json
from pandas import ExcelWriter
import censusdata

user = getpass.getuser()
sys.dont_write_bytecode = True
sys.path.insert(0, '/Users/{}/Box/DataViz Projects/Utility Code'.format(user))

from utils_io import *

### Read census api key from file

In [2]:
def get_file_contents(filename):
    """ Given a filename,
        return the contents of that file
    """
    try:
        with open(filename, 'r') as f:
            # It's assumed our file contains a single line,
            # with our API key
            return f.read().strip()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

### Read selected ACS varibles from csv

In [3]:
acs_coc_selected_vars = pd.read_csv('Data/ACS_Table_Variables_COC_Factors.csv')

In [4]:
acs_vars_lst = acs_coc_selected_vars['ACS_Table_Variable'].tolist()

### Query ACS API

In [14]:
api_file = 'census_api_key.txt'

api_key = get_file_contents(api_file)

In [13]:
var = ','.join(acs_vars_lst)
counties = '001,013,041,055,075,081,085,095,097'
state = '06'
#api_key2 = '4d921adb2db836584aa4e67744520787eba00049'
url = 'https://api.census.gov/data/2018/acs/acs5?get={var}&for=tract:*&in=county:{counties}&in=state:{state}&key={api_key}'.format(var=var,
                                                                                                                                 counties=counties,
                                                                                                                                 state=state,
                                                                                                                                   api_key=api_key)
rq = requests.get(url)
data = rq.json()
acs_df = pd.DataFrame(data[1:],columns=data[0])

In [None]:
acs_df['county'].unique()

### Change data type to integer select columns

In [None]:
acs_df[acs_vars_lst] = acs_df[acs_vars_lst].apply(pd.to_numeric)

In [None]:
acs_df['geoid'] = acs_df['state'] + acs_df['county'] + acs_df['tract']

### Rename columns for consistancy with prior CoCs

In [None]:
cols = {'county':'county_fips',
        'B03002_001E':'tot_pop_min',
        'B01001_001E':'tot_pop_sen',
        'C17002_001E':'tot_pop_pov',
       'C18108_001E':'tot_pop_civ_ni',
       'B08201_001E':'tot_hh',
       'B11004_001E':'tot_fam',
       'B16005_001E':'tot_pop_over5',
       'B25070_010E':'pop_hus_rent50',
       'B08201_002E':'pop_zvhhs'}
acs_df.rename(columns=cols, inplace=True)

### Calculate CoC and populations

In [None]:
acs_df['pop_minority'] = (acs_df['tot_pop_min'] - 
                          acs_df['B03002_003E'])
acs_df['pop_over75'] = (acs_df['B01001_023E'] + 
                        acs_df['B01001_024E'] + 
                        acs_df['B01001_025E'] + 
                        acs_df['B01001_047E'] + 
                        acs_df['B01001_048E'] + 
                        acs_df['B01001_049E'])
acs_df['pop_spfam'] = (acs_df['B11004_010E'] + 
                       acs_df['B11004_016E'])
acs_df['pop_lep'] = (acs_df['B16005_007E'] + 
                     acs_df['B16005_008E'] + 
                     acs_df['B16005_012E'] + 
                     acs_df['B16005_013E'] + 
                     acs_df['B16005_017E'] + 
                     acs_df['B16005_018E'] + 
                     acs_df['B16005_022E'] + 
                     acs_df['B16005_023E'] + 
                     acs_df['B16005_029E'] + 
                     acs_df['B16005_030E'] + 
                     acs_df['B16005_034E'] + 
                     acs_df['B16005_035E'] + 
                     acs_df['B16005_039E'] + 
                     acs_df['B16005_040E'] + 
                     acs_df['B16005_044E'] + 
                     acs_df['B16005_045E'])
acs_df['pop_below200'] = (acs_df['tot_pop_pov'] - 
                          acs_df['C17002_008E'])
acs_df['pop_disability'] = (acs_df['tot_pop_civ_ni'] - 
                            (acs_df['C18108_005E'] + 
                             acs_df['C18108_009E'] + 
                             acs_df['C18108_013E']))

### Calculate CoC shares

In [None]:
acs_df['pct_minority'] = (np.where(acs_df['tot_pop_min'] == 0,0,
                                   (acs_df['pop_minority']/
                                    acs_df['tot_pop_min'])))
acs_df['pct_over75'] = (np.where(acs_df['tot_pop_sen'] == 0,0,
                                 (acs_df['pop_over75']/
                                  acs_df['tot_pop_sen'])))
acs_df['pct_spfam'] = (np.where(acs_df['tot_fam'] == 0,0,
                                (acs_df['pop_spfam']/
                                 acs_df['tot_fam'])))
acs_df['pct_lep'] = (np.where(acs_df['tot_pop_over5'] == 0,0,
                              (acs_df['pop_lep']/
                               acs_df['tot_pop_over5'])))
acs_df['pct_below200'] = np.where(acs_df['tot_pop_pov'] == 0,0,
                                  (acs_df['pop_below200']/
                                   acs_df['tot_pop_pov']))
acs_df['pct_disab'] = np.where(acs_df['tot_pop_civ_ni'] == 0,0,
                               (acs_df['pop_disability']/
                                acs_df['tot_pop_civ_ni']))
acs_df['pct_zvhhs'] = np.where(acs_df['tot_hh'] == 0,0,
                               (acs_df['pop_zvhhs']/
                                acs_df['tot_hh']))
acs_df['pct_hus_rent50'] = np.where(acs_df['tot_hh'] == 0,0,
                                    (acs_df['pop_hus_rent50']/
                                     acs_df['tot_hh']))

In [None]:
acs_df.head(5)

### Flag cocs and coc levels (high, higher, highest) 

In [None]:
def flag_condition_calc(row, df_share_column, standard_deviation):
    cond = (df_share_column.mean() + 
            (standard_deviation * 
             df_share_column.std())).round(decimals=2)
    if row > cond:
        return 1
    else:
        return 0

In [None]:
def flag_mult_columns(dataframe, dictionary, standard_deviation):
    for key, value in dictionary.items():
        dataframe[value] = (dataframe[key]
                            .apply(
                                lambda row : flag_condition_calc(row,acs_df[key],
                                                                 standard_deviation)))

### Flag halfsd columns and count factors

In [None]:
cols_dict_halfsd = {'pct_over75':'over75_halfsd',
                    'pct_minority':'minority_halfsd',
                   'pct_spfam':'spfam_halfsd',
                   'pct_disab':'disab_halfsd',
                   'pct_lep':'lep_halfsd',
                   'pct_below200':'below200_halfsd',
                   'pct_zvhhs':'zvhh_halfsd',
                   'pct_hus_rent50':'hus_rent50_halfsd'}


In [None]:
flag_mult_columns(acs_df,cols_dict_halfsd,.5)

In [None]:
halfsd_cols_list = ['below200_halfsd',
             'minority_halfsd',
             'spfam_halfsd',
             'disab_halfsd',
             'lep_halfsd',
             'over75_halfsd',
             'zvhh_halfsd',
             'hus_rent50_halfsd']
acs_df['count_disadfact_halfsd'] = acs_df[halfsd_cols_list].sum(axis=1)

In [None]:
acs_df[halfsd_cols_list][acs_df['count_disadfact_halfsd'] >= 1]

### Flag halfsd coc

In [None]:
halfsd_remain = ['spfam_halfsd',
                 'disab_halfsd',
                 'lep_halfsd',
                 'over75_halfsd',
                 'zvhh_halfsd',
                 'hus_rent50_halfsd']
halfsd_cond = (((acs_df['minority_halfsd'] == 1) & 
                (acs_df['below200_halfsd'] == 1)) | 
               ((acs_df['below200_halfsd'] == 1) & 
                (acs_df[halfsd_remain].sum(axis=1) >= 3))
              )
acs_df['coc_pba2050_halfsd'] = np.where(halfsd_cond,1,0)

In [None]:
acs_df[halfsd_cols_list][acs_df['coc_pba2050_halfsd'] == 1]

### Flag onesd columns

In [None]:
cols_dict_onesd = {'pct_over75':'over75_onesd',
                    'pct_minority':'minority_onesd',
                   'pct_spfam':'spfam_onesd',
                   'pct_disab':'disab_onesd',
                   'pct_lep':'lep_onesd',
                   'pct_below200':'below200_onesd',
                   'pct_zvhhs':'zvhh_onesd',
                   'pct_hus_rent50':'hus_rent50_onesd'}

In [None]:
flag_mult_columns(acs_df,cols_dict_onesd,1)

In [None]:
onesd_cols_list = ['below200_onesd',
             'minority_onesd',
             'spfam_onesd',
             'disab_onesd',
             'lep_onesd',
             'over75_onesd',
             'zvhh_onesd',
             'hus_rent50_onesd']
acs_df['count_disadfact_onesd'] = acs_df[onesd_cols_list].sum(axis=1)

### Flag onesd coc

In [None]:
onesd_remain = ['spfam_onesd',
                 'disab_onesd',
                 'lep_onesd',
                 'over75_onesd',
                 'zvhh_onesd',
                 'hus_rent50_onesd']
onesd_cond = (((acs_df['minority_onesd'] == 1) & 
                (acs_df['below200_onesd'] == 1)) | 
               ((acs_df['below200_onesd'] == 1) & 
                (acs_df[onesd_remain].sum(axis=1) >= 3))
              )
acs_df['coc_pba2050_onesd'] = np.where(onesd_cond,1,0)

In [None]:
acs_df[onesd_cols_list][(acs_df['coc_pba2050_onesd'] == 1)]

### Flag onehalfsd columns

In [None]:
cols_dict_onehalfsd = {'pct_over75':'over75_onehalfsd',
                    'pct_minority':'minority_onehalfsd',
                   'pct_spfam':'spfam_onehalfsd',
                   'pct_disab':'disab_onehalfsd',
                   'pct_lep':'lep_onehalfsd',
                   'pct_below200':'below200_onehalfsd',
                   'pct_zvhhs':'zvhh_onehalfsd',
                   'pct_hus_rent50':'hus_rent50_onehalfsd'}

In [None]:
flag_mult_columns(acs_df,cols_dict_onehalfsd,1.5)

In [None]:
onehalfsd_cols_list = ['below200_onehalfsd',
             'minority_onehalfsd',
             'spfam_onehalfsd',
             'disab_onehalfsd',
             'lep_onehalfsd',
             'over75_onehalfsd',
             'zvhh_onehalfsd',
             'hus_rent50_onehalfsd']
acs_df['count_disadfact_onehalfsd'] = acs_df[onehalfsd_cols_list].sum(axis=1)

In [None]:
acs_df[onehalfsd_cols_list][acs_df['count_disadfact_onehalfsd'] >= 1]

### Flag onehalfsd coc

In [None]:
onehalfsd_remain = ['spfam_onehalfsd',
                 'disab_onehalfsd',
                 'lep_onehalfsd',
                 'over75_onehalfsd',
                 'zvhh_onehalfsd',
                 'hus_rent50_onehalfsd']
onehalfsd_cond = (((acs_df['minority_onehalfsd'] == 1) & 
                (acs_df['below200_onehalfsd'] == 1)) | 
               ((acs_df['below200_onehalfsd'] == 1) & 
                (acs_df[onehalfsd_remain].sum(axis=1) >= 3))
              )
acs_df['coc_pba2050_onehalfsd'] = np.where(onehalfsd_cond,1,0)

In [None]:
acs_df[onehalfsd_cols_list][(acs_df['coc_pba2050_onehalfsd'] == 1)]

### Flag 2050 cocs

In [None]:
acs_df['coc_flag_pba2050'] = np.where((acs_df['coc_pba2050_halfsd'] == 1) | 
                                      (acs_df['coc_pba2050_onesd'] == 1) | 
                                      (acs_df['coc_pba2050_onehalfsd']),1,0)

In [None]:
acs_df[['coc_flag_pba2050',
        'coc_pba2050_halfsd',
        'coc_pba2050_onesd',
        'coc_pba2050_onehalfsd']][acs_df['coc_flag_pba2050'] == 1]

### Create coc classes

In [None]:
def some_func(df):
    if df['a']>.5:
        return 'high'
    else:
        return 'low'
df=pd.DataFrame(data={'a':np.random.sample(size=10),'b':np.random.sample(size=10)})
df.apply(some_func,axis=1)

In [None]:
def set_coc_class(df):
    if df['coc_pba2050_onehalfsd'] == 1:
        return 'Highest'
    elif df['coc_pba2050_onesd'] == 1:
        return 'Higher'
    elif df['coc_pba2050_halfsd'] == 1:
        return 'High'
    else: 
        return 'NA'

In [None]:
acs_df['coc_class'] = acs_df.apply(set_coc_class,axis=1)

In [None]:
acs_df[acs_df['coc_class'] != 'NA']

### Calculate regional statistics 

In [None]:
coc_region_stats = acs_df.agg({'pct_over75': ['mean','std'],
           'pct_minority': ['mean','std'],
           'pct_lep': ['mean','std'],
           'pct_spfam': ['mean','std'],
           'pct_below200': ['mean','std'],
           'pct_disab': ['mean','std'],
           'pct_zvhhs': ['mean','std'],
           'pct_hus_rent50': ['mean','std']}).transpose().reset_index()

In [None]:
coc_region_stats.rename(columns={'index':'factors'},inplace=True)

### Create fields for .5, 1, and 1.5 sd from mean

In [None]:
coc_region_stats['plus_half_sd'] = (coc_region_stats['mean'] + 
                                    (.5 * coc_region_stats['std']))
coc_region_stats['plus_one_sd'] = (coc_region_stats['mean'] + 
                                   coc_region_stats['std'])
coc_region_stats['plus_one_half_sd'] = (coc_region_stats['mean'] + 
                                       (1.5 * coc_region_stats['std']))

In [None]:
coc_region_stats