In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

### Table H-12. Number of Earners--All Households by Median and Mean Income: 1980 to 2021					
				
Source: U.S. Census Bureau, Current Population Survey, 1981 to 2022 Annual Social and Economic Supplements (CPS ASEC).					
(Households as of March of the following year. Income in current and 2021 R-CPI-U-RS adjusted dollars (28). Data cells are plugged with '100,000+' to indicate that the medians were topcoded)

#### NOTE: med income only goes through 1987, so building the tables from there, did not use 4 or more, due to top coded medians and nans for several years

#### Table F-12. Number of Earners--All Families by Median and Mean Income: 1947 to 2021					
Information on confidentiality protection, sampling error, nonsampling error, and definitions is available at <https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar22.pdf>.					
Footnotes are available at <www.census.gov/topics/income-poverty/income/guidance/cps-historic-footnotes.html>.					
Source: U.S. Census Bureau, Current Population Survey, 1948 to 2022 Annual Social and Economic Supplements (CPS ASEC).					
(Families as of March of the following year. Income in current and 2021 R-CPI-U-RS adjusted dollars (28). From 1974 to 1988, income is for families with civilian members. Data are not available for 4 or more earners before 1966)					
#### NOTE: mean income only goes through 1967, so building the tables from there, also did not use 4 or more, due to top code medians and nans for several years

In [34]:
h12 = pd.read_excel('../data/single_parent/census/historical_income_households/h12ar.xlsx', skiprows=8, nrows=37)
pd.set_option('display.max_rows', None)
h12

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,131202,70784,70784,102316,102316
1,2020 (41),129244,68010,71186,97469,102020
2,2019,128451,68703,72808,98088,103949
3,2018,128579,63179,68168,90021,97129
4,2017 (40),127669,61136,67571,87643,96869
5,2017,127586,61372,67832,86220,95296
6,2016,126224,59039,66657,83143,93871
7,2015,125819,56516,64631,79263,90645
8,2014,124587,53657,61468,75738,86763
9,2013 (39),123931,53585,62425,75195,87599


In [35]:
#drop empty columns or columns not needed
h12 = h12.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
h12 = h12.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'med_2021', '2021 dollars.1': 'mean_2021'})

#drop prior index rows, drop original rows and keep revised
h12 = h12.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
h12 = h12.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
h12 = h12.applymap(lambda x: x.strip() if isinstance(x, str) else x)

h12.head(2)

Unnamed: 0,year,total_hh,med_2021,mean_2021
0,2021,131202,70784,102316
1,2020 (41),129244,71186,102020


In [36]:
h12.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   year       35 non-null     object
 1   total_hh   35 non-null     int64 
 2   med_2021   35 non-null     int64 
 3   mean_2021  35 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 1.2+ KB


In [37]:
h12_0 = pd.read_excel('../data/single_parent/census/historical_income_households/h12ar.xlsx', skiprows=55, nrows=37)
pd.set_option('display.max_rows', None)
h12_0.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,32817,26148,26148,41945,41945
1,2020 (41),31382,25367,26552,39883,41745
2,2019,30170,26809,28411,42491,45030


In [38]:
#drop empty columns or columns not needed
h12_0 = h12_0.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
h12_0 = h12_0.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'med_2021_0', '2021 dollars.1': 'mean_2021_0'})

#drop prior index rows, drop original rows and keep revised
h12_0 = h12_0.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
h12_0 = h12_0.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
h12_0 = h12_0.applymap(lambda x: x.strip() if isinstance(x, str) else x)

h12_0.head(3)

Unnamed: 0,year,total_hh_none,med_2021_0,mean_2021_0
0,2021,32817,26148,41945
1,2020 (41),31382,26552,41745
2,2019,30170,28411,45030


In [40]:
h12_1 = pd.read_excel('../data/single_parent/census/historical_income_households/h12ar.xlsx', skiprows=102, nrows=37)
pd.set_option('display.max_rows', None)
h12_1

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,46880,58983,58983,84821,84821
1,2020 (41),46527,56190,58814,79356,83062
2,2019,46222,55293,58597,78081,82748
3,2018,46467,51971,56075,73989,79831
4,2017 (40),46782,50762,56105,71857,79421
5,2017,46733,50607,55934,71336,78845
6,2016,46138,48550,54815,68310,77124
7,2015,46356,46763,53478,65209,74573
8,2014,45821,45228,51812,62611,71725
9,2013 (39),46954,44889,52294,61690,71867


In [41]:
#drop empty columns or columns not needed
h12_1 = h12_1.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
h12_1 = h12_1.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
h12_1 = h12_1.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
h12_1 = h12_1.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
h12_1 = h12_1.applymap(lambda x: x.strip() if isinstance(x, str) else x)

h12_1.head(3)

Unnamed: 0,year,total_hh_1,med_2021_1,mean_2021_1
0,2021,46880,58983,84821
1,2020 (41),46527,58814,83062
2,2019,46222,58597,82748


In [67]:
h12_2 = pd.read_excel('../data/single_parent/census/historical_income_households/h12ar.xlsx', skiprows=149, nrows=37)
pd.set_option('display.max_rows', None)
h12_2.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,41149,115757,115757,151415,151415
1,2020 (41),41110,109905,115037,143326,150019
2,2019,41171,110529,117134,142488,151003


In [44]:
#drop empty columns or columns not needed
h12_2 = h12_2.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
h12_2 = h12_2.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_hh_2', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
h12_2 = h12_2.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
h12_2 = h12_2.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
h12_2 = h12_2.applymap(lambda x: x.strip() if isinstance(x, str) else x)

h12_2.head(2)

Unnamed: 0,year,total_hh_2,med_2021_2,mean_2021_2
0,2021,41149,115757,151415
1,2020 (41),41110,115037,150019


In [45]:
h12merged = pd.merge(h12merged, h12_2, on = 'year', how = 'left')
h12merged.head(3)

Unnamed: 0,year,total_hh,med_2021,mean_2021,total_hh_none,med_2021_0,mean_2021_0,total_hh_1,med_2021_1,mean_2021_1,total_hh_2,med_2021_2,mean_2021_2
0,2021,131202,70784,102316,32817,26148,41945,46880,58983,84821,41149,115757,151415
1,2020 (41),129244,71186,102020,31382,26552,41745,46527,58814,83062,41110,115037,150019
2,2019,128451,72808,103949,30170,28411,45030,46222,58597,82748,41171,117134,151003


In [46]:
h12_3 = pd.read_excel('../data/single_parent/census/historical_income_households/h12ar.xlsx', skiprows=196, nrows=37)
pd.set_option('display.max_rows', None)
h12_3

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,7794,134884,134884,167969,167969
1,2020 (41),7618,131012,137129,162260,169837
2,2019,8050,127399,135012,163396,173161
3,2018,7734,124824,134680,156255,168593
4,2017 (40),7641,121551,134346,148282,163891
5,2017,7676,121697,134507,146971,162442
6,2016,7644,115357,130242,139687,157711
7,2015,7608,111416,127415,131956,150904
8,2014,7342,107314,122935,126664,145102
9,2013 (39),7043,101894,118703,127744,148817


In [47]:
#drop empty columns or columns not needed
h12_3 = h12_3.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
h12_3 = h12_3.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_hh_3', '2021 dollars': 'med_2021_3', '2021 dollars.1': 'mean_2021_3'})

#drop prior index rows, drop original rows and keep revised
h12_3 = h12_3.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
h12_3 = h12_3.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
h12_3 = h12_3.applymap(lambda x: x.strip() if isinstance(x, str) else x)

h12_3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   year         35 non-null     object
 1   total_hh_3   35 non-null     int64 
 2   med_2021_3   35 non-null     int64 
 3   mean_2021_3  35 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 1.2+ KB


In [48]:
h12merged = pd.merge(h12merged, h12_3, on = 'year', how = 'left')
h12merged.head(2)

Unnamed: 0,year,total_hh,med_2021,mean_2021,total_hh_none,med_2021_0,mean_2021_0,total_hh_1,med_2021_1,mean_2021_1,total_hh_2,med_2021_2,mean_2021_2,total_hh_3,med_2021_3,mean_2021_3
0,2021,131202,70784,102316,32817,26148,41945,46880,58983,84821,41149,115757,151415,7794,134884,167969
1,2020 (41),129244,71186,102020,31382,26552,41745,46527,58814,83062,41110,115037,150019,7618,137129,169837


In [49]:
h12merged = h12merged.astype({'year': 'str'})

In [50]:
#locate additional letters in year column
h12merged = h12merged[h12merged['year'].str.match('\d{4}[a-zA-Z]?', na=False)]
#extract only the year
h12merged['year'] = h12merged['year'].str.extract('(\d{4})', expand=False).astype(int)
#sort by descending
h12merged = h12merged.sort_values('year', ascending=False)

h12merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35 entries, 0 to 34
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   year           35 non-null     int32
 1   total_hh       35 non-null     int64
 2   med_2021       35 non-null     int64
 3   mean_2021      35 non-null     int64
 4   total_hh_none  35 non-null     int64
 5   med_2021_0     35 non-null     int64
 6   mean_2021_0    35 non-null     int64
 7   total_hh_1     35 non-null     int64
 8   med_2021_1     35 non-null     int64
 9   mean_2021_1    35 non-null     int64
 10  total_hh_2     35 non-null     int64
 11  med_2021_2     35 non-null     int64
 12  mean_2021_2    35 non-null     int64
 13  total_hh_3     35 non-null     int64
 14  med_2021_3     35 non-null     int64
 15  mean_2021_3    35 non-null     int64
dtypes: int32(1), int64(15)
memory usage: 4.5 KB


In [51]:
h12merged

Unnamed: 0,year,total_hh,med_2021,mean_2021,total_hh_none,med_2021_0,mean_2021_0,total_hh_1,med_2021_1,mean_2021_1,total_hh_2,med_2021_2,mean_2021_2,total_hh_3,med_2021_3,mean_2021_3
0,2021,131202,70784,102316,32817,26148,41945,46880,58983,84821,41149,115757,151415,7794,134884,167969
1,2020,129244,71186,102020,31382,26552,41745,46527,58814,83062,41110,115037,150019,7618,137129,169837
2,2019,128451,72808,103949,30170,28411,45030,46222,58597,82748,41171,117134,151003,8050,135012,173161
3,2018,128579,68168,97129,30736,25760,39732,46467,56075,79831,40981,109073,139798,7734,134680,168593
4,2017,127669,67571,96869,30345,25387,39974,46782,56105,79421,40210,108914,140982,7641,134346,163891
5,2017,127586,67832,95296,30382,25140,38798,46733,55934,78845,40099,108682,138100,7676,134507,162442
6,2015,125819,64631,90645,29694,24609,36175,46356,53478,74573,39648,105237,132965,7608,127415,150904
7,2014,124587,61468,86763,29883,23337,35307,45821,51812,71725,39162,101041,127170,7342,122935,145102
9,2013,122952,60507,84624,29097,23295,33714,45623,51161,70316,38934,99079,123448,7026,120831,142844
8,2013,123931,62425,87599,29073,23178,35176,46954,52294,71867,38670,100866,129149,7043,118703,148817


h12merged.to_csv('../data/single_parent/census/historical_income_households/h12_hh_earners_mean_med_1987.csv', index = False)

In [71]:
f12 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                    skiprows=8, nrows=57)
pd.set_option('display.max_rows', None)
f12.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,84283,88590,88590,121840,121840
1,2020 (41),83723,84348,88286,115310,120694
2,2019,83698,86011,91151,116735,123711


In [72]:
#drop empty columns or columns not needed
f12 = f12.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12 = f12.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12 = f12.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12 = f12.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12 = f12.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,84283,88590,121840
1,2020 (41),83723,88286,120694


In [62]:
f12_0 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=88, nrows=57)
pd.set_option('display.max_rows', None)
f12_0.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,14480,39419,39419,56775,56775
1,2020 (41),14092,38371,40163,53048,55525
2,2019,13527,41535,44017,59062,62592


In [63]:
#drop empty columns or columns not needed
f12_0 = f12_0.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_0 = f12_0.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12_0 = f12_0.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_0 = f12_0.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_0 = f12_0.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_0.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,14480,39419,56775
1,2020 (41),14092,40163,55525


In [65]:
no_earners_0 = f12_0[['year', 'total_families', 'median', 'mean']]

#create a column 
no_earners_0['number_of_earners'] = '0'

#create a column 
no_earners_0['number_of_earners_txt'] = 'zero'
no_earners_0.head(2)

Unnamed: 0,year,total_families,median,mean,number_of_earners,number_of_earners_txt
0,2021,14480,39419,56775,0,zero
1,2020 (41),14092,40163,55525,0,zero
2,2019,13527,44017,62592,0,zero
3,2018,13459,38871,53725,0,zero
4,2017 (40),13558,36700,53700,0,zero
5,2017,13558,36506,51429,0,zero
6,2015,13080,34443,46828,0,zero
7,2014,13305,32800,45709,0,zero
8,2013 (39),13206,32579,45222,0,zero
9,2013 (38),13403,33429,42790,0,zero


In [56]:
f12_1 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=168, nrows=57)
pd.set_option('display.max_rows', None)
f12_1.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,26890,61568,61568,91321,91321
1,2020 (41),26787,59904,62701,86442,90478
2,2019,26119,59649,63214,85916,91050


In [57]:
#drop empty columns or columns not needed
f12_1 = f12_1.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_1 = f12_1.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12_1 = f12_1.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_1 = f12_1.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_1 = f12_1.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_1.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,26890,61568,91321
1,2020 (41),26787,62701,90478


In [68]:
no_earners_1 = f12_1[['year', 'total_families', 'median', 'mean']]

#create a column 
no_earners_1['number_of_earners'] = '1'

#create a column 
no_earners_1['number_of_earners_txt'] = 'one'
no_earners_1.head(2)

Unnamed: 0,year,total_families,median,mean,number_of_earners,number_of_earners_txt
0,2021,26890,61568,91321,1,one
1,2020 (41),26787,62701,90478,1,one


In [69]:
f12_2 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=248, nrows=57)
pd.set_option('display.max_rows', None)
f12_2.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,34264,121234,121234,158027,158027
1,2020 (41),34311,113181,118466,148604,155543
2,2019,34896,113527,120311,147767,156597


In [70]:
#drop empty columns or columns not needed
f12_2 = f12_2.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_2 = f12_2.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_families', '2021 dollars': 'median', '2021 dollars.1': 'mean'})

#drop prior index rows, drop original rows and keep revised
f12_2 = f12_2.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_2 = f12_2.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_2 = f12_2.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_2.head(2)

Unnamed: 0,year,total_families,median,mean
0,2021,34264,121234,158027
1,2020 (41),34311,118466,155543


In [None]:
no_earners = f12_2[['year', 'total_families', 'median', 'mean']]

#create a column 
no_earners['number_of_earners'] = '2'

#create a column 
no_earners['number_of_earners_txt'] = 'two'
no_earners.head(2)

In [31]:
f12_3 = pd.read_excel('../data/single_parent/census/historical_income_families/f12ar_num_earners_families_mean_median_income.xlsx',
                      skiprows=328, nrows=57)
pd.set_option('display.max_rows', None)
f12_3.head(3)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Current dollars,2021 dollars,Current dollars.1,2021 dollars.1
0,2021,6523,138949,138949,173433,173433
1,2020 (41),6400,133195,139414,165730,173469
2,2019,6813,130407,138200,165935,175852


In [32]:
#drop empty columns or columns not needed
f12_3 = f12_3.drop(['Current dollars', 'Current dollars.1'], axis=1)

#rename columns
f12_3 = f12_3.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_fam_3', '2021 dollars': 'med_2021_3', '2021 dollars.1': 'mean_2021_3'})

#drop prior index rows, drop original rows and keep revised
f12_3 = f12_3.drop(labels=[6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f12_3 = f12_3.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f12_3 = f12_3.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f12_3.head(2)

Unnamed: 0,year,total_fam_3,med_2021_3,mean_2021_3
0,2021,6523,138949,173433
1,2020 (41),6400,139414,173469


In [33]:
f12merged = pd.merge(f12merged, f12_3, on = 'year', how = 'left')
f12merged

Unnamed: 0,year,total_fam,med_2021,mean_2021,total_fam_0,med_2021_0,mean_2021_0,total_fam_1,med_2021_1,mean_2021_1,total_fam_2,med_2021_2,mean_2021_2,total_fam_3,med_2021_3,mean_2021_3
0,2021,84283,88590,121840,14480,39419,56775,26890,61568,91321,34264,121234,158027,6523,138949,173433
1,2020 (41),83723,88286,120694,14092,40163,55525,26787,62701,90478,34311,118466,155543,6400,139414,173469
2,2019,83698,91151,123711,13527,44017,62592,26119,63214,91050,34896,120311,156597,6813,138200,175852
3,2018,83508,84856,114418,13459,38871,53725,26741,59233,85398,34583,113283,144464,6547,136359,171499
4,2017 (40),83539,84149,114083,13558,36700,53700,27281,57912,85371,33986,113173,146298,6516,136360,165103
5,2017,83103,83931,110968,13558,36506,51429,27257,57736,84020,33558,111997,141870,6523,134733,161223
6,2015,82199,80849,105980,13080,34443,46828,27174,55608,79196,33458,109599,137084,6424,130545,152611
7,2014,81730,76331,101686,13305,32800,45709,27078,53746,76241,33076,104661,131468,6304,126138,148402
8,2013 (39),82316,76271,102134,13206,32579,45222,28246,53884,76423,32923,104767,133115,6107,121401,151300
9,2013 (38),81217,74342,98657,13403,33429,42790,27242,53144,75437,32749,102513,127692,5925,123474,145801


In [34]:
f12merged = f12merged.astype({'year': 'str'})
f12merged

Unnamed: 0,year,total_fam,med_2021,mean_2021,total_fam_0,med_2021_0,mean_2021_0,total_fam_1,med_2021_1,mean_2021_1,total_fam_2,med_2021_2,mean_2021_2,total_fam_3,med_2021_3,mean_2021_3
0,2021,84283,88590,121840,14480,39419,56775,26890,61568,91321,34264,121234,158027,6523,138949,173433
1,2020 (41),83723,88286,120694,14092,40163,55525,26787,62701,90478,34311,118466,155543,6400,139414,173469
2,2019,83698,91151,123711,13527,44017,62592,26119,63214,91050,34896,120311,156597,6813,138200,175852
3,2018,83508,84856,114418,13459,38871,53725,26741,59233,85398,34583,113283,144464,6547,136359,171499
4,2017 (40),83539,84149,114083,13558,36700,53700,27281,57912,85371,33986,113173,146298,6516,136360,165103
5,2017,83103,83931,110968,13558,36506,51429,27257,57736,84020,33558,111997,141870,6523,134733,161223
6,2015,82199,80849,105980,13080,34443,46828,27174,55608,79196,33458,109599,137084,6424,130545,152611
7,2014,81730,76331,101686,13305,32800,45709,27078,53746,76241,33076,104661,131468,6304,126138,148402
8,2013 (39),82316,76271,102134,13206,32579,45222,28246,53884,76423,32923,104767,133115,6107,121401,151300
9,2013 (38),81217,74342,98657,13403,33429,42790,27242,53144,75437,32749,102513,127692,5925,123474,145801


In [35]:
#extract year from footnotes
f12merged['year'] = f12merged['year'].str.extract('(\d{4})', expand=False).astype(int)
#sort by descending
f12merged = f12merged.sort_values('year', ascending=False)

f12merged

Unnamed: 0,year,total_fam,med_2021,mean_2021,total_fam_0,med_2021_0,mean_2021_0,total_fam_1,med_2021_1,mean_2021_1,total_fam_2,med_2021_2,mean_2021_2,total_fam_3,med_2021_3,mean_2021_3
0,2021,84283,88590,121840,14480,39419,56775,26890,61568,91321,34264,121234,158027,6523,138949,173433
1,2020,83723,88286,120694,14092,40163,55525,26787,62701,90478,34311,118466,155543,6400,139414,173469
2,2019,83698,91151,123711,13527,44017,62592,26119,63214,91050,34896,120311,156597,6813,138200,175852
3,2018,83508,84856,114418,13459,38871,53725,26741,59233,85398,34583,113283,144464,6547,136359,171499
4,2017,83539,84149,114083,13558,36700,53700,27281,57912,85371,33986,113173,146298,6516,136360,165103
5,2017,83103,83931,110968,13558,36506,51429,27257,57736,84020,33558,111997,141870,6523,134733,161223
6,2015,82199,80849,105980,13080,34443,46828,27174,55608,79196,33458,109599,137084,6424,130545,152611
7,2014,81730,76331,101686,13305,32800,45709,27078,53746,76241,33076,104661,131468,6304,126138,148402
8,2013,82316,76271,102134,13206,32579,45222,28246,53884,76423,32923,104767,133115,6107,121401,151300
9,2013,81217,74342,98657,13403,33429,42790,27242,53144,75437,32749,102513,127692,5925,123474,145801


# List of dataframes
list = [aian_merged, api_merged, asian_merged, black_merged, hispanic_merged, nhopi_merged, white_merged]  
# concat dataframes
sp_percent_count_state = pd.concat(list, ignore_index=True)

sp_percent_count_state.head()

f12merged.to_csv('../data/single_parent/census/historical_households/f12_fam_earners_mean_med_1967.csv', index = False)