# ACS Download

## ACS TOOL STEP 1 -> SETUP : 

#### Uses: csa2tractcrosswalk.csv, VitalSignsCensus_ACS_Tables.xlsx
#### Creates: ./AcsDataRaw/   ./AcsDataClean/

### Import Modules & Construct Path Handlers

In [None]:
import os
import sys
import pandas as pd

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)

### Get Vital Signs Reference Table

In [None]:
acs_tables = pd.read_csv('https://raw.githubusercontent.com/bniajfi/bniajfi/main/vs_acs_table_ids.csv')

In [None]:
acs_tables.head()

Unnamed: 0,id,description,shortname,20_exists,19_exists,18_exists,17_exists,16_exists,15_exists,14_exists,13_exists,12_exists,11_exists,10_exists
0,B01001,SEX BY AGE,sex_by_age,True,True,True,True,True,True,True,True,True,True,True
1,B01002,MEDIAN AGE BY SEX,median_age,True,True,True,True,True,True,True,True,True,True,True
2,B02001,RACE,race_alone,True,True,True,True,True,True,True,True,True,True,True
3,B03002,HISPANIC OR LATINO ORIGIN BY RACE,race_NH,True,True,True,True,True,True,True,True,True,True,True
4,B06009,PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN TH...,ed_attainment,True,True,True,True,True,True,True,True,True,True,True


### Get Tract/ CSA CrossWalk

In [None]:
file = 'https://raw.githubusercontent.com/bniajfi/bniajfi/main/CSA-to-Tract-2010.csv'
crosswalk = pd.read_csv( file )
crosswalk = dict(zip(crosswalk['TRACTCE10'], crosswalk['CSA2010']  ) )

### Get retrieve_acs_data function

In [None]:
!pip install dataplay geopandas VitalSigns

In [None]:
import VitalSigns as vs

In [None]:
from VitalSigns import acsDownload

In [None]:
help(acsDownload)

In [None]:
help(vs.acsDownload.retrieve_acs_data)

Help on function retrieve_acs_data in module VitalSigns.acsDownload:

retrieve_acs_data(state, county, tract, tableId, year, save)



### Column Operations

In [None]:
import csv # 'quote all'
def fixColNamesForCSV(x): return str(x)[:] if str(x) in ["NAME","state","county","tract", "CSA"] else str(x)[12:]

## ACS TOOL STEP 2 -> Execute :

In [None]:
acs_tables.head()

Unnamed: 0,id,description,shortname,20_exists,19_exists,18_exists,17_exists,16_exists,15_exists,14_exists,13_exists,12_exists,11_exists,10_exists
0,B01001,SEX BY AGE,sex_by_age,True,True,True,True,True,True,True,True,True,True,True
1,B01002,MEDIAN AGE BY SEX,median_age,True,True,True,True,True,True,True,True,True,True,True
2,B02001,RACE,race_alone,True,True,True,True,True,True,True,True,True,True,True
3,B03002,HISPANIC OR LATINO ORIGIN BY RACE,race_NH,True,True,True,True,True,True,True,True,True,True,True
4,B06009,PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN TH...,ed_attainment,True,True,True,True,True,True,True,True,True,True,True


### Save the ACS Data

In [None]:
# Set Index      df.set_index("NAME", inplace = True) 
# Save raw to    '../../data/3_outputs/acs/raw/'+year+'/'+tableId+'_'+description+'_5y'+year+'_est.csv'
# Tract to CSA   df['CSA'] = df.apply(lambda row: crosswalk.get(int(row['tract']), "empty"), axis=1)
# Save 4 use     '../../data/2_cleaned/acs/'+tableId+'_'+description+'_5y'+year+'_est.csv'

year = '19'
count = 0
startFrom = 0

state = '24'
county = '510'
tract = '*'
tableId = 'B19001'
saveAcs = True

# For each ACS Table
for x, row in acs_tables.iterrows():
    count += 1

    # Grab its Meta Data
    description = str(acs_tables.loc[x, 'shortname'])
    tableId = str(acs_tables.loc[x, 'id'])
    yearExists = int(acs_tables.loc[x, year+'_exists'])

    # If the Indicator is valid for the year 
    # use startFrom to being at a specific count
    if yearExists and count >= startFrom:
        print(str(count)+') '+tableId + ' ' + description)

        # retrieve the Python ACS indicator
        print('sending retrieve_acs_data', year, tableId)
        df = vs.acsDownload.retrieve_acs_data(state, county, tract, tableId, year, saveAcs)


        df.set_index("NAME", inplace = True) 

        # Save the Data as Raw
        # We do not want the id in the column names
        saveThis = df.rename( columns = lambda x : ( fixColNamesForCSV(x) ) )
        saveThis.to_csv('./AcsDataRaw/'+tableId+'_'+description+'_5y'+year+'_est.csv', quoting=csv.QUOTE_ALL)

        # Match Tract to CSA
        df['CSA'] = df.apply(lambda row: crosswalk.get(int(row['tract']), "empty"), axis=1)

        # Save the data (again) as Cleaned for me to use in the next scripts
        df.to_csv('./AcsDataClean/'+tableId+'_5y'+year+'_est.csv', quoting=csv.QUOTE_ALL)  


# ACS Create Indicators

ACS TOOL STEP 1 -> SETUP : 

Uses: ./AcsDataClean/      VitalSignsCensus_ACS_Tables.xlsx  VitalSignsCensus_ACS_compare_data.xlsm

Creates: ./VSData/

### Get Vital Signs Reference Table

In [None]:
ls

In [None]:
file = 'VitalSignsCensus_ACS_Tables.xlsx'
xls = pd.ExcelFile(findFile('./', file))
indicators = pd.read_excel(xls, sheet_name='indicators', index_col=0 )

In [None]:
indicators.head(30)

## ACS TOOL STEP 2 -> Execute :

### Create ACS Indicators

#### Settings/ Get Data

In [None]:
flag = True;
year = '19'
vsTbl = pd.read_excel(xls, sheet_name=str('vs'+year), index_col=0 )

# Prepare the Compare Historic Data
file = 'VitalSignsCensus_ACS_compare_data.xlsm'
compare_table = pd.read_excel(findFile('./', file), None);
comparable = False
if( str('VS'+year) in compare_table.keys() ):
    compare_table = compare_table[str('VS'+year)]
    comparable = True  
    columnsNames = compare_table.iloc[0]
    compare_table = compare_table.drop(compare_table.index[0])
    compare_table.set_index(['CSA2010'], drop = True, inplace = True)

#### Create Indicators

In [None]:
# For Each Indicator
for x, row in indicators.iterrows():
    # Grab its Meta Data
    shortSource = str(indicators.loc[x, 'Short Source'])
    shortName = str(indicators.loc[x, 'ShortName'])[:-2]
    yearExists = int(float(indicators.loc[x, year+'_exists']))
    indicator = str(indicators.loc[x, 'Indicator'])
    indicator_number = str(indicators.index.tolist().index(x)+1 )
    fileLocation = str(findFile( './', shortName+'.py') )  

    # If the Indicator is valid for the year, and uses ACS Data, and method exists
    flag = True if fileLocation != str('None') else False
    flag = True if flag and yearExists else False
    flag = True if flag and shortSource in ['ACS', 'Census'] else False
    if flag:

        print(shortSource, shortName, yearExists, indicator, fileLocation, indicator_number )
        
        # retrieve the Python ACS indicator
        module = __import__( shortName )
        result = getattr( module, shortName )( year )

        # Put Baltimore City at the bottom of the list
        idx = result.index.tolist()
        idx.pop(idx.index('Baltimore City')) 
        result = result.reindex(idx+['Baltimore City']) 

        # Write the results back into the XL dataframe
        vsTbl[ str(indicator_number + '_' +shortName ) ] = result

        # Save the Data
        result.to_csv('./VSData/vs'+ str(year)+'_'+shortName+'.csv')
        
# drop columns with any empty values
vsTbl = vsTbl.dropna(axis=1, how='any')
            
# Save the Data
file = 'VS'+str(year)+'_indicators.xlsx'
file = findFile( 'VSData', file)
# writer = pd.ExcelWriter(file)

#vsTbl.to_excel(writer, str(year+'New_VS_Values') )

# Save the Data
vsTbl.to_csv('./VSData/vs'+str(year+'_New_VS_Values')+'.csv')

# Include Historic Data if exist
if( comparable ): 
    # add historic indicator to excel doc
    # compare_table.to_excel(writer,sheet_name = str(year+'Original_VS_Values') )  
    
    # compare sets
    info = pd.DataFrame()
    diff = pd.DataFrame()
    simi = pd.DataFrame()

    for column in vsTbl:
        number = ''
        plchld = ''
        if str(column[0:3]).isdigit(): plchld = 3
        elif str(column[0:2]).isdigit(): plchld = 2
        else: number = plchld = 1
        number = int(column[0:plchld])
        if number == 98: twoNotThree = False;
        new = pd.to_numeric(vsTbl[column], downcast='float')
        old = pd.to_numeric(compare_table[number], downcast='float', errors='coerce')

        info[str(number)+'_Error#_'] = old - new
        diff[str(number)+'_Error#_'] = old - new
        info[str(number)+'_Error%_'] = old / new
        simi[str(number)+'_Error%_'] = old / new
        info[str(number)+'_new_'+column[plchld:]] = vsTbl[column]
        info[str(number)+'_old_'+columnsNames[number]] = compare_table[number]


    #info.to_csv('./VSData/vs_comparisons_'+ str(year)+'.csv')
    #diff.to_csv('./VSData/vs_differences_'+ str(year)+'.csv')

    # Save the info dataframe
    #info.to_excel(writer, str(year+'_ExpandedView') )
    # Save the diff dataframe
    #diff.to_excel(writer,sheet_name = str(year+'_Error') )  
    # Save the diff dataframe
    #simi.to_excel(writer,sheet_name = str(year+'_Similarity_Ratio') )  
    
    info.to_csv('./VSData/vs'+str(year+'_ExpandedView')+'.csv')
    diff.to_csv('./VSData/vs'+str(year+'_Error')+'.csv')
    simi.to_csv('./VSData/vs'+str(year+'_Similarity_Ratio')+'.csv')

# writer.save()


#### Compare Historic Indicators

In [None]:
ls

In [None]:
# Quick test
shortName = str('hh25inc')
year = 19
# retrieve the Python ACS indicator
module = __import__( shortName )
result = getattr( module, shortName )( year )
result

In [None]:
# Delete Unassigned--Jail
df = df[df.index != 'Unassigned--Jail']

# Move Baltimore to Bottom
bc = df.loc[ 'Baltimore City' ]
df = df.drop( df.index[1] )
df.loc[ 'Baltimore City' ] = bc

In [None]:
vsTbl['18_fam']