In [17]:
## Written and published by Nathan Young, Junior Data Analyst for NC Data Dashboard, December 2019 ##

In [18]:
# Imports
import pandas as pd
import requests
from io import BytesIO, StringIO
from zipfile import ZipFile

In [19]:
# Create Backups
df_backup = pd.read_csv('./Updates/STG_BEA_CAINC6N_NC.txt', 
                        encoding = 'ISO-8859-1', 
                        sep = "\t")
df_backup.to_csv('./Backups/STG_BEA_CAINC6N_NC_BACKUP.txt')

In [20]:
# Load BEA CAINC6N_NC data
response = requests.get('https://apps.bea.gov/regional/zip/CAINC6N.zip')
zip_file = ZipFile(BytesIO(response.content))
files = zip_file.namelist()
with zip_file.open(files[34]) as csvfile:
    df = pd.read_csv(csvfile, encoding='ISO-8859-1', sep=",")

In [21]:
# Check for non-data fields
df.tail(10)

Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
12012,"""37199""","Yancey, NC",5.0,CAINC6N,1904.0,814,Private households,Thousands of dollars,664.0,639.0,...,752.0,654.0,684.0,(D),(D),(D),(D),(D),(D),(D)
12013,"""37199""","Yancey, NC",5.0,CAINC6N,2000.0,...,Government and government enterprises,Thousands of dollars,31811.0,32905.0,...,42039.0,43069.0,42119.0,41528,40990,40595,42823,50157,51335,51005
12014,"""37199""","Yancey, NC",5.0,CAINC6N,2001.0,...,Federal civilian,Thousands of dollars,2992.0,3157.0,...,3041.0,3715.0,3545.0,3446,2180,2227,3371,3270,3197,3318
12015,"""37199""","Yancey, NC",5.0,CAINC6N,2002.0,...,Military,Thousands of dollars,652.0,881.0,...,1768.0,1687.0,1633.0,1480,1389,1287,1220,1261,1221,1308
12016,"""37199""","Yancey, NC",5.0,CAINC6N,2010.0,...,State and local,Thousands of dollars,28167.0,28867.0,...,37230.0,37667.0,36941.0,36602,37421,37081,38232,45626,46917,46379
12017,"""37199""","Yancey, NC",5.0,CAINC6N,2011.0,...,State government,Thousands of dollars,5028.0,5552.0,...,6036.0,5825.0,5599.0,5728,5874,5297,5224,5508,5512,5843
12018,"""37199""","Yancey, NC",5.0,CAINC6N,2012.0,...,Local government,Thousands of dollars,23139.0,23315.0,...,31194.0,31842.0,31342.0,30874,31547,31784,33008,40118,41405,40536
12019,Note: See the included footnote file.,,,,,,,,,,...,,,,,,,,,,
12020,"Last updated: November 14, 2019-- new statisti...",,,,,,,,,,...,,,,,,,,,,
12021,Source: U.S. Department of Commerce / Bureau o...,,,,,,,,,,...,,,,,,,,,,


In [22]:
# Remove non-data fields
df_clean = df[:-3]
df_clean.tail(5)

Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
12014,"""37199""","Yancey, NC",5.0,CAINC6N,2001.0,...,Federal civilian,Thousands of dollars,2992,3157,...,3041,3715,3545,3446,2180,2227,3371,3270,3197,3318
12015,"""37199""","Yancey, NC",5.0,CAINC6N,2002.0,...,Military,Thousands of dollars,652,881,...,1768,1687,1633,1480,1389,1287,1220,1261,1221,1308
12016,"""37199""","Yancey, NC",5.0,CAINC6N,2010.0,...,State and local,Thousands of dollars,28167,28867,...,37230,37667,36941,36602,37421,37081,38232,45626,46917,46379
12017,"""37199""","Yancey, NC",5.0,CAINC6N,2011.0,...,State government,Thousands of dollars,5028,5552,...,6036,5825,5599,5728,5874,5297,5224,5508,5512,5843
12018,"""37199""","Yancey, NC",5.0,CAINC6N,2012.0,...,Local government,Thousands of dollars,23139,23315,...,31194,31842,31342,30874,31547,31784,33008,40118,41405,40536


In [23]:
# Set GeoFIPS as Index
df_clean.set_index(df_clean['GeoFIPS'], inplace = True)
df_clean.head()

Unnamed: 0_level_0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
GeoFIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""37000""","""37000""",North Carolina,5.0,CAINC6N,1.0,...,Compensation of employees (thousands of dollar...,Thousands of dollars,160371299,162957779,...,211454924,216515016,222527671,232370554,237874021,250729957,263501208,271845644,283403091,298666892
"""37000""","""37000""",North Carolina,5.0,CAINC6N,5.0,...,Wages and salaries,Thousands of dollars,132002644,133601815,...,169548458,173519379,179966208,187090787,192266678,201605023,212899864,221474327,232229403,245399095


In [24]:
# Drop GeoFIPS column 
df_clean.drop('GeoFIPS', axis = 1, inplace = True)
df_clean

Unnamed: 0_level_0,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,2003,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
GeoFIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""37000""",North Carolina,5.0,CAINC6N,1.0,...,Compensation of employees (thousands of dollar...,Thousands of dollars,160371299,162957779,168340289,...,211454924,216515016,222527671,232370554,237874021,250729957,263501208,271845644,283403091,298666892
"""37000""",North Carolina,5.0,CAINC6N,5.0,...,Wages and salaries,Thousands of dollars,132002644,133601815,136786159,...,169548458,173519379,179966208,187090787,192266678,201605023,212899864,221474327,232229403,245399095


In [25]:
# Save as tab-delimited txt file for export to SSMS
df_clean.to_csv('./Updates/STG_BEA_CAINC6N_NC.txt', sep = '\t')