In [1]:
import pandas as pd

In [2]:
# Link as reference
# https://apps.bea.gov/iTable/iTable.cfm?reqid=70&step=30&isuri=1&major_area=4&area=xx&year=2020&tableid=20&category=720&area_type=4&year_end=-1&classification=non-industry&state=xx&statistic=3&yearbegin=-1&unit_of_measure=levels

In [3]:
# NOTE: Skip rows needed to remove header info.  Error occurs otherwise.
# Create DataFrame for county and per capita personal income

income_original_df = pd.read_csv("Resources/bea_income_2020.csv", skiprows=4)
income_original_df

Unnamed: 0,GeoFips,GeoName,2020
0,01001,"Autauga, AL",46814
1,01003,"Baldwin, AL",50953
2,01005,"Barbour, AL",37850
3,01007,"Bibb, AL",34300
4,01009,"Blount, AL",38808
...,...,...,...
3154,"* Shawano, WI and Menominee, WI are combined a...",,
3155,Metropolitan Areas are defined (geographically...,,
3156,Note. All dollar estimates are in thousands of...,,
3157,(NA) Not available.,,


In [4]:
# Check footer info
income_original_df.tail(20)

Unnamed: 0,GeoFips,GeoName,2020
3139,56045,"Weston, WY",47599.0
3140,Legend / Footnotes:,,
3141,2/ Per capita personal income was computed usi...,,
3142,* Estimates for 1979 forward reflect Alaska Ce...,,
3143,* Estimates from 1988 forward separate Aleutia...,,
3144,* Estimates for 1991 forward separate Denali B...,,
3145,* Estimates from 1993 forward separate Skagway...,,
3146,* Wade Hampton Census Area was renamed Kusilva...,,
3147,"* On January 2, 2019, two new county equivalen...",,
3148,"* La Paz County, AZ was separated from Yuma Co...",,


In [5]:
# Use NAN in last two columns to find where to remove footer rows
both_columns_null_idx = income_original_df[["GeoName", "2020"]].isna().all(axis="columns")
both_columns_null_idx

0       False
1       False
2       False
3       False
4       False
        ...  
3154     True
3155     True
3156     True
3157     True
3158     True
Length: 3159, dtype: bool

In [6]:
# Use NAN in last two columns to find where to remove footer rows
income_original_df.loc[both_columns_null_idx]

Unnamed: 0,GeoFips,GeoName,2020
3140,Legend / Footnotes:,,
3141,2/ Per capita personal income was computed usi...,,
3142,* Estimates for 1979 forward reflect Alaska Ce...,,
3143,* Estimates from 1988 forward separate Aleutia...,,
3144,* Estimates for 1991 forward separate Denali B...,,
3145,* Estimates from 1993 forward separate Skagway...,,
3146,* Wade Hampton Census Area was renamed Kusilva...,,
3147,"* On January 2, 2019, two new county equivalen...",,
3148,"* La Paz County, AZ was separated from Yuma Co...",,
3149,"* Broomfield County, CO, was created from part...",,


In [7]:
# Drop footer rows
income_original_df.drop(income_original_df.tail(19).index, inplace = True)
income_original_df

Unnamed: 0,GeoFips,GeoName,2020
0,01001,"Autauga, AL",46814
1,01003,"Baldwin, AL",50953
2,01005,"Barbour, AL",37850
3,01007,"Bibb, AL",34300
4,01009,"Blount, AL",38808
...,...,...,...
3135,56037,"Sweetwater, WY",54000
3136,56039,"Teton, WY",220645
3137,56041,"Uinta, WY",42854
3138,56043,"Washakie, WY",54361


In [8]:
# Rename columnns
income_original_df = income_original_df.rename(columns={"2020": "per_capita_income"})
income_original_df

Unnamed: 0,GeoFips,GeoName,per_capita_income
0,01001,"Autauga, AL",46814
1,01003,"Baldwin, AL",50953
2,01005,"Barbour, AL",37850
3,01007,"Bibb, AL",34300
4,01009,"Blount, AL",38808
...,...,...,...
3135,56037,"Sweetwater, WY",54000
3136,56039,"Teton, WY",220645
3137,56041,"Uinta, WY",42854
3138,56043,"Washakie, WY",54361


In [11]:
# Drop GeoName column
income_original_df.drop(columns=["GeoName"], inplace=True)
income_original_df

Unnamed: 0,GeoFips,per_capita_income
0,01001,46814
1,01003,50953
2,01005,37850
3,01007,34300
4,01009,38808
...,...,...
3135,56037,54000
3136,56039,220645
3137,56041,42854
3138,56043,54361


In [12]:
# Check dtypes
income_original_df.dtypes

GeoFips              object
per_capita_income    object
dtype: object

In [13]:
# Saving file as reference
income_original_df.to_csv("Resources/income_original_df.csv")