In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

#### Table F-12. Number of Earners--All Families by Median and Mean Income: 1947 to 2021					
Information on confidentiality protection, sampling error, nonsampling error, and definitions is available at <https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar22.pdf>.					
Footnotes are available at <www.census.gov/topics/income-poverty/income/guidance/cps-historic-footnotes.html>.					
Source: U.S. Census Bureau, Current Population Survey, 1948 to 2022 Annual Social and Economic Supplements (CPS ASEC).					
(Families as of March of the following year. Income in current and 2021 R-CPI-U-RS adjusted dollars (28). From 1974 to 1988, income is for families with civilian members. Data are not available for 4 or more earners before 1966)					
#### NOTE: mean income only goes through 1967, so building the tables from there, also did not use 4 or more, due to top code medians and nans for several years

In [2]:
f12all = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                    skiprows=8, nrows=57)
pd.set_option('display.max_rows', None)
f12all.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,84283,88590,88590,121840,121840
1,2020 (41),83723,84348,88286,115310,120694
2,2019,83698,86011,91151,116735,123711


In [3]:
#drop empty columns or columns not needed
f12all = f12all.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12all = f12all.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12all = f12all.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12all = f12all.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12all = f12all.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12all

Unnamed: 0,year,total_families,median,mean
0,2021,84283,88590,121840
1,2020 (41),83723,88286,120694
2,2019,83698,91151,123711
3,2018,83508,84856,114418
4,2017 (40),83539,84149,114083
5,2017,83103,83931,110968
6,2015,82199,80849,105980
7,2014,81730,76331,101686
8,2013 (39),82316,76271,102134
9,2013 (38),81217,74342,98657


f12all.to_csv('../data/single_parent/census/historical_income_families/f12_all_families_med_mean_1987.csv', index = False)

In [4]:
f12_0 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=88, nrows=57)
pd.set_option('display.max_rows', None)
f12_0.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,14480,39419,39419,56775,56775
1,2020 (41),14092,38371,40163,53048,55525
2,2019,13527,41535,44017,59062,62592


In [5]:
#drop empty columns or columns not needed
f12_0 = f12_0.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_0 = f12_0.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12_0 = f12_0.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_0 = f12_0.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_0 = f12_0.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_0.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,14480,39419,56775
1,2020 (41),14092,40163,55525


In [6]:
no_earners_0 = f12_0[['year', 'total_families', 'median', 'mean']]

#create a column 
no_earners_0['number_of_earners'] = '0'

#create a column 
no_earners_0['number_of_earners_txt'] = 'zero'
no_earners_0.head(2)

Unnamed: 0,year,total_families,median,mean,number_of_earners,number_of_earners_txt
0,2021,14480,39419,56775,0,zero
1,2020 (41),14092,40163,55525,0,zero


In [7]:
f12_1 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=168, nrows=57)
pd.set_option('display.max_rows', None)
f12_1.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,26890,61568,61568,91321,91321
1,2020 (41),26787,59904,62701,86442,90478
2,2019,26119,59649,63214,85916,91050


In [8]:
#drop empty columns or columns not needed
f12_1 = f12_1.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_1 = f12_1.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12_1 = f12_1.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_1 = f12_1.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_1 = f12_1.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_1.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,26890,61568,91321
1,2020 (41),26787,62701,90478


In [9]:
no_earners_1 = f12_1[['year', 'total_families', 'median', 'mean']]

#create a column 
no_earners_1['number_of_earners'] = '1'

#create a column 
no_earners_1['number_of_earners_txt'] = 'one'
no_earners_1.head(2)

Unnamed: 0,year,total_families,median,mean,number_of_earners,number_of_earners_txt
0,2021,26890,61568,91321,1,one
1,2020 (41),26787,62701,90478,1,one


In [10]:
f12_2 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=248, nrows=57)
pd.set_option('display.max_rows', None)
f12_2.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,34264,121234,121234,158027,158027
1,2020 (41),34311,113181,118466,148604,155543
2,2019,34896,113527,120311,147767,156597


In [11]:
#drop empty columns or columns not needed
f12_2 = f12_2.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_2 = f12_2.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12_2 = f12_2.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_2 = f12_2.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_2 = f12_2.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_2.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,34264,121234,158027
1,2020 (41),34311,118466,155543


In [12]:
no_earners_2 = f12_2[['year', 'total_families', 'median', 'mean']]

#create a column 
no_earners_2['number_of_earners'] = '2'

#create a column 
no_earners_2['number_of_earners_txt'] = 'two'
no_earners_2.head(2)

Unnamed: 0,year,total_families,median,mean,number_of_earners,number_of_earners_txt
0,2021,34264,121234,158027,2,two
1,2020 (41),34311,118466,155543,2,two


In [13]:
f12_3 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=328, nrows=57)
pd.set_option('display.max_rows', None)
f12_3.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,6523,138949,138949,173433,173433
1,2020 (41),6400,133195,139414,165730,173469
2,2019,6813,130407,138200,165935,175852


In [14]:
#drop empty columns or columns not needed
f12_3 = f12_3.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_3 = f12_3.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12_3 = f12_3.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_3 = f12_3.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_3 = f12_3.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_3.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,6523,138949,173433
1,2020 (41),6400,139414,173469


In [15]:
no_earners_3 = f12_3[['year', 'total_families', 'median', 'mean']]

#create a column 
no_earners_3['number_of_earners'] = '3'

#create a column 
no_earners_3['number_of_earners_txt'] = 'three'
no_earners_3.head(2)

Unnamed: 0,year,total_families,median,mean,number_of_earners,number_of_earners_txt
0,2021,6523,138949,173433,3,three
1,2020 (41),6400,139414,173469,3,three


In [16]:
# List of dataframes
list = [no_earners_0, no_earners_1, no_earners_2, no_earners_3]  
# concat dataframes
num_earners_fam_inc_mean_med_1967 = pd.concat(list, ignore_index=True)

num_earners_fam_inc_mean_med_1967

Unnamed: 0,year,total_families,median,mean,number_of_earners,number_of_earners_txt
0,2021,14480,39419,56775,0,zero
1,2020 (41),14092,40163,55525,0,zero
2,2019,13527,44017,62592,0,zero
3,2018,13459,38871,53725,0,zero
4,2017 (40),13558,36700,53700,0,zero
5,2017,13558,36506,51429,0,zero
6,2015,13080,34443,46828,0,zero
7,2014,13305,32800,45709,0,zero
8,2013 (39),13206,32579,45222,0,zero
9,2013 (38),13403,33429,42790,0,zero


In [17]:
num_earners_fam_inc_mean_med_1967.to_csv('../data/single_parent/census/historical_income_families/f12_num_earners_fam_inc_mean_med_1967.csv', index = False)