In [18]:
## Written and published by Nathan Young, Junior Data Analyst for NC Data Dashboard, December 2019 ##

In [19]:
# Imports
import pandas as pd
import requests
from io import BytesIO, StringIO
from zipfile import ZipFile

In [20]:
# Create Backups
df_backup = pd.read_csv('./Updates/STG_BEA_CAINC5N_NC.txt', encoding = 'ISO-8859-1', sep='\t')
df_backup.to_csv('./Backups/STG_BEA_CAINC5N_NC_BACKUP.txt')

In [21]:
# Load BEA CAINC5N_NC data
response = requests.get('https://apps.bea.gov/regional/zip/CAINC5N.zip')
zip_file = ZipFile(BytesIO(response.content))
files = zip_file.namelist()
with zip_file.open(files[34]) as csvfile:
    df = pd.read_csv(csvfile, encoding='ISO-8859-1', sep=",")

In [22]:
# Check for non-data fields
df.tail(10)

Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
13225,"""37199""","Yancey, NC",5.0,CAINC5N,2000.0,...,Government and government enterprises,Thousands of dollars,31811.0,32905.0,...,42039.0,43069.0,42119.0,41528.0,40990.0,40595.0,42823.0,50157.0,51335.0,51005.0
13226,"""37199""","Yancey, NC",5.0,CAINC5N,2001.0,...,Federal civilian,Thousands of dollars,2992.0,3157.0,...,3041.0,3715.0,3545.0,3446.0,2180.0,2227.0,3371.0,3270.0,3197.0,3318.0
13227,"""37199""","Yancey, NC",5.0,CAINC5N,2002.0,...,Military,Thousands of dollars,652.0,881.0,...,1768.0,1687.0,1633.0,1480.0,1389.0,1287.0,1220.0,1261.0,1221.0,1308.0
13228,"""37199""","Yancey, NC",5.0,CAINC5N,2010.0,...,State and local,Thousands of dollars,28167.0,28867.0,...,37230.0,37667.0,36941.0,36602.0,37421.0,37081.0,38232.0,45626.0,46917.0,46379.0
13229,"""37199""","Yancey, NC",5.0,CAINC5N,2011.0,...,State government,Thousands of dollars,5028.0,5552.0,...,6036.0,5825.0,5599.0,5728.0,5874.0,5297.0,5224.0,5508.0,5512.0,5843.0
13230,"""37199""","Yancey, NC",5.0,CAINC5N,2012.0,...,Local government,Thousands of dollars,23139.0,23315.0,...,31194.0,31842.0,31342.0,30874.0,31547.0,31784.0,33008.0,40118.0,41405.0,40536.0
13231,Note: See the included footnote file.,,,,,,,,,,...,,,,,,,,,,
13232,CAINC5N: Personal Income by Major Component an...,,,,,,,,,,...,,,,,,,,,,
13233,"Last updated: November 14, 2019-- new statisti...",,,,,,,,,,...,,,,,,,,,,
13234,Source: U.S. Department of Commerce / Bureau o...,,,,,,,,,,...,,,,,,,,,,


In [23]:
# Remove non-data fields
df_clean = df[:-4]
df_clean.tail(5)

Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
13226,"""37199""","Yancey, NC",5.0,CAINC5N,2001.0,...,Federal civilian,Thousands of dollars,2992,3157,...,3041,3715,3545,3446,2180,2227,3371,3270,3197,3318
13227,"""37199""","Yancey, NC",5.0,CAINC5N,2002.0,...,Military,Thousands of dollars,652,881,...,1768,1687,1633,1480,1389,1287,1220,1261,1221,1308
13228,"""37199""","Yancey, NC",5.0,CAINC5N,2010.0,...,State and local,Thousands of dollars,28167,28867,...,37230,37667,36941,36602,37421,37081,38232,45626,46917,46379
13229,"""37199""","Yancey, NC",5.0,CAINC5N,2011.0,...,State government,Thousands of dollars,5028,5552,...,6036,5825,5599,5728,5874,5297,5224,5508,5512,5843
13230,"""37199""","Yancey, NC",5.0,CAINC5N,2012.0,...,Local government,Thousands of dollars,23139,23315,...,31194,31842,31342,30874,31547,31784,33008,40118,41405,40536


In [24]:
# Set GeoFIPS as Index
df_clean.set_index(df_clean['GeoFIPS'], inplace = True)
df_clean.head(2)

Unnamed: 0_level_0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
GeoFIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""37000""","""37000""",North Carolina,5.0,CAINC5N,10.0,...,Personal income (thousands of dollars),Thousands of dollars,228879962,232267281,...,338315713,341627602,355052267,379925288,376023857,397995923,419891523,433195584,453769026,478861557
"""37000""","""37000""",North Carolina,5.0,CAINC5N,20.0,...,Population (persons) 2/,Number of persons,8210122,8326201,...,9449566,9574293,9656754,9749123,9843599,9933944,10033079,10156679,10270800,10383620


In [25]:
# Drop GeoFIPS column
df_clean.drop('GeoFIPS', axis = 1, inplace = True)
df_clean.head(2)

Unnamed: 0_level_0,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,2003,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
GeoFIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""37000""",North Carolina,5.0,CAINC5N,10.0,...,Personal income (thousands of dollars),Thousands of dollars,228879962,232267281,241625829,...,338315713,341627602,355052267,379925288,376023857,397995923,419891523,433195584,453769026,478861557
"""37000""",North Carolina,5.0,CAINC5N,20.0,...,Population (persons) 2/,Number of persons,8210122,8326201,8422501,...,9449566,9574293,9656754,9749123,9843599,9933944,10033079,10156679,10270800,10383620


In [26]:
# Save as tab-delimited txt file for export to SSMS
df_clean.to_csv('./Updates/STG_BEA_CAINC5N_NC.txt', sep = '\t')