In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

#### f07 - Table F-7. Type of Family--All Families by Median and Mean Income: 1947 to 2021					
Information on confidentiality protection, sampling error, nonsampling error, and definitions is available at <https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar22.pdf>.					
Footnotes are available at <www.census.gov/topics/income-poverty/income/guidance/cps-historic-footnotes.html>.					
Source: U.S. Census Bureau, Current Population Survey, 1948 to 2022 Annual Social and Economic Supplements (CPS ASEC).					
(Families as of March of the following year. Income in current and 2021 R-CPI-U-RS adjusted dollars (28))

In [2]:
#read in all families only through 1970
f07a = pd.read_excel('../data/single_parent/census/historical_income_families/f07ar_mean_med_type_fam.xlsx', skiprows=7, nrows=55)
pd.set_option('display.max_rows', None)
f07a

Unnamed: 0,Type of family and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current dollars,2021 dollars,Current dollars,2021 dollars
1,2021,84283.0,88590,88590,121840,121840
2,2020 (41),83723.0,84348,88286,115310,120694
3,2019,83698.0,86011,91151,116735,123711
4,2018,83508.0,78646,84856,106045,114418
5,2017 (40),83539.0,76135,84149,103218,114083
6,2017,83103.0,75938,83931,100400,110968
7,2016,82854.0,72707,82089,97357,109919
8,2015,82199.0,70697,80849,92673,105980
9,2014,81730.0,66632,76331,88765,101686


In [3]:
#rename columns
f07a = f07a.rename(columns = {'Type of family and year': 'year', 'Number (thousands)': 'all_families (thousands)', 
                            'Unnamed: 3': 'all_med_2021_dollars', 'Unnamed: 5': 'all_mean_2021_dollars'})

#drop empty columns or columns not needed
f07a = f07a.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f07a = f07a.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f07a = f07a.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f07a = f07a.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f07a.head(2)

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars
0,2021,84283.0,88590,121840
1,2020 (41),83723.0,88286,120694


In [4]:
f07a = f07a.astype({'year': 'str', 'all_families (thousands)': 'int64'})
f07a

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars
0,2021,84283,88590,121840
1,2020 (41),83723,88286,120694
2,2019,83698,91151,123711
3,2018,83508,84856,114418
4,2017 (40),83539,84149,114083
5,2016,82854,82089,109919
6,2015,82199,80849,105980
7,2014,81730,76331,101686
8,2013 (39),82316,76271,102134
9,2012,80944,73583,97939


In [5]:
f07a.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 4 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   year                      52 non-null     object
 1   all_families (thousands)  52 non-null     int64 
 2   all_med_2021_dollars      52 non-null     int64 
 3   all_mean_2021_dollars     52 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 1.8+ KB


In [6]:
#read in married couple families through 1970
f07mc = pd.read_excel('../data/single_parent/census/historical_income_families/f07ar_mean_med_type_fam.xlsx',
                    skiprows=87, nrows=55)
pd.set_option('display.max_rows', None)
f07mc

Unnamed: 0,Type of family and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current dollars,2021 dollars,Current dollars,2021 dollars
1,2021,61448.0,106696,106696,141531,141531
2,2020 (41),61297.0,101554,106296,134045,140304
3,2019,62355.0,102032,108130,134442,142476
4,2018,61971.0,93329,100698,121827,131447
5,2017 (40),61883.0,91105,100695,119590,132178
6,2017,61254.0,90148,99637,115655,127829
7,2016,60821.0,86811,98012,112007,126460
8,2015,60258.0,84324,96432,107229,122626
9,2014,60015.0,80814,92578,103313,118352


In [7]:
#rename columns
f07mc = f07mc.rename(columns = {'Type of family and year': 'year', 'Number (thousands)': 'married_couple_families (thousands)', 
                            'Unnamed: 3': 'mc_med_2021_dollars', 'Unnamed: 5': 'mc_mean_2021_dollars'})

#drop empty columns or columns not needed
f07mc = f07mc.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f07mc = f07mc.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f07mc = f07mc.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f07mc = f07mc.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f07mc.head(2)

Unnamed: 0,year,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars
0,2021,61448.0,106696,141531
1,2020 (41),61297.0,106296,140304


In [8]:
f07mc = f07mc.astype({'year': 'str', 'married_couple_families (thousands)': 'int64'})
f07mc

Unnamed: 0,year,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars
0,2021,61448,106696,141531
1,2020 (41),61297,106296,140304
2,2019,62355,108130,142476
3,2018,61971,100698,131447
4,2017 (40),61883,100695,132178
5,2016,60821,98012,126460
6,2015,60258,96432,122626
7,2014,60015,92578,118352
8,2013 (39),59643,91582,118815
9,2012,59224,89299,113690


In [9]:
f07 = pd.merge(f07a, f07mc, on = 'year', how = 'left')
f07

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars
0,2021,84283,88590,121840,61448,106696,141531
1,2020 (41),83723,88286,120694,61297,106296,140304
2,2019,83698,91151,123711,62355,108130,142476
3,2018,83508,84856,114418,61971,100698,131447
4,2017 (40),83539,84149,114083,61883,100695,132178
5,2016,82854,82089,109919,60821,98012,126460
6,2015,82199,80849,105980,60258,96432,122626
7,2014,81730,76331,101686,60015,92578,118352
8,2013 (39),82316,76271,102134,59643,91582,118815
9,2012,80944,73583,97939,59224,89299,113690


In [10]:
#read in male hh families until 1970
f07mhh = pd.read_excel('../data/single_parent/census/historical_income_families/f07ar_mean_med_type_fam.xlsx',
                    skiprows=167, nrows=55)
pd.set_option('display.max_rows', None)
f07mhh.head(3)

Unnamed: 0,Type of family and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current dollars,2021 dollars,Current dollars,2021 dollars
1,2021,7214.0,61980,61980,83424,83424
2,2020 (41),6964.0,60224,63036,77429,81044


In [11]:
#rename columns
f07mhh = f07mhh.rename(columns = {'Type of family and year': 'year', 'Number (thousands)': 'male_hh_families (thousands)', 
                            'Unnamed: 3': 'mhh_med_2021_dollars', 'Unnamed: 5': 'mhh_mean_2021_dollars'})

#drop empty columns or columns not needed
f07mhh = f07mhh.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f07mhh = f07mhh.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f07mhh = f07mhh.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f07mhh = f07mhh.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f07mhh.head(2)

Unnamed: 0,year,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars
0,2021,7214.0,61980,83424
1,2020 (41),6964.0,63036,81044


In [12]:
f07mhh = f07mhh.astype({'year': 'str', 'male_hh_families (thousands)': 'int64'})
f07mhh

Unnamed: 0,year,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars
0,2021,7214,61980,83424
1,2020 (41),6964,63036,81044
2,2019,6506,65393,84526
3,2018,6485,58626,79043
4,2017 (40),6351,57137,75991
5,2016,6452,58222,81727
6,2015,6311,56919,76688
7,2014,6162,54525,71390
8,2013 (39),6497,54976,74977
9,2012,6231,50077,67169


In [13]:
f07 = pd.merge(f07, f07mhh, on = 'year', how = 'left')
f07.head(3)

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars
0,2021,84283,88590,121840,61448,106696,141531,7214,61980,83424
1,2020 (41),83723,88286,120694,61297,106296,140304,6964,63036,81044
2,2019,83698,91151,123711,62355,108130,142476,6506,65393,84526


In [14]:
#read in male hh families until 1970
f07fhh = pd.read_excel('../data/single_parent/census/historical_income_families/f07ar_mean_med_type_fam.xlsx',
                    skiprows=247, nrows=55)
pd.set_option('display.max_rows', None)
f07fhh.head(3)

Unnamed: 0,Type of family and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current dollars,2021 dollars,Current dollars,2021 dollars
1,2021,15621.0,45437,45437,62126,62126
2,2020 (41),15462.0,43884,45933,58099,60812


In [15]:
#rename columns
f07fhh = f07fhh.rename(columns = {'Type of family and year': 'year', 'Number (thousands)': 'female_hh_families (thousands)', 
                            'Unnamed: 3': 'fhh_med_2021_dollars', 'Unnamed: 5': 'fhh_mean_2021_dollars'})

#drop empty columns or columns not needed
f07fhh = f07fhh.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f07fhh = f07fhh.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f07fhh = f07fhh.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f07fhh = f07fhh.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f07fhh.head(2)

Unnamed: 0,year,female_hh_families (thousands),fhh_med_2021_dollars,fhh_mean_2021_dollars
0,2021,15621.0,45437,62126
1,2020 (41),15462.0,45933,60812


In [16]:
f07fhh = f07fhh.astype({'year': 'str', 'female_hh_families (thousands)': 'int64'})
f07fhh

Unnamed: 0,year,female_hh_families (thousands),fhh_med_2021_dollars,fhh_mean_2021_dollars
0,2021,15621,45437,62126
1,2020 (41),15462,45933,60812
2,2019,14838,45950,62033
3,2018,15052,43410,59548
4,2017 (40),15305,40691,56721
5,2016,15581,41388,57026
6,2015,15630,39026,53629
7,2014,15553,36395,49382
8,2013 (39),16176,36266,51530
9,2012,15489,36278,50092


In [17]:
f07 = pd.merge(f07, f07fhh, on = 'year', how = 'left')
f07.head(3)

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars,female_hh_families (thousands),fhh_med_2021_dollars,fhh_mean_2021_dollars
0,2021,84283,88590,121840,61448,106696,141531,7214,61980,83424,15621,45437,62126
1,2020 (41),83723,88286,120694,61297,106296,140304,6964,63036,81044,15462,45933,60812
2,2019,83698,91151,123711,62355,108130,142476,6506,65393,84526,14838,45950,62033


In [18]:
f07.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 52 entries, 0 to 51
Data columns (total 13 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   year                                 52 non-null     object
 1   all_families (thousands)             52 non-null     int64 
 2   all_med_2021_dollars                 52 non-null     int64 
 3   all_mean_2021_dollars                52 non-null     int64 
 4   married_couple_families (thousands)  52 non-null     int64 
 5   mc_med_2021_dollars                  52 non-null     int64 
 6   mc_mean_2021_dollars                 52 non-null     int64 
 7   male_hh_families (thousands)         52 non-null     int64 
 8   mhh_med_2021_dollars                 52 non-null     int64 
 9   mhh_mean_2021_dollars                52 non-null     int64 
 10  female_hh_families (thousands)       52 non-null     int64 
 11  fhh_med_2021_dollars                 52 non-nul

In [19]:
#locate additional letters in year column
f07 = f07[f07['year'].str.match('\d{4}[a-zA-Z]?', na=False)]
#extract only the year
f07['year'] = f07['year'].str.extract('(\d{4})', expand=False).astype(int)
#sort by descending
f07 = f07.sort_values('year', ascending=False)

f07

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars,female_hh_families (thousands),fhh_med_2021_dollars,fhh_mean_2021_dollars
0,2021,84283,88590,121840,61448,106696,141531,7214,61980,83424,15621,45437,62126
1,2020,83723,88286,120694,61297,106296,140304,6964,63036,81044,15462,45933,60812
2,2019,83698,91151,123711,62355,108130,142476,6506,65393,84526,14838,45950,62033
3,2018,83508,84856,114418,61971,100698,131447,6485,58626,79043,15052,43410,59548
4,2017,83539,84149,114083,61883,100695,132178,6351,57137,75991,15305,40691,56721
5,2016,82854,82089,109919,60821,98012,126460,6452,58222,81727,15581,41388,57026
6,2015,82199,80849,105980,60258,96432,122626,6311,56919,76688,15630,39026,53629
7,2014,81730,76331,101686,60015,92578,118352,6162,54525,71390,15553,36395,49382
8,2013,82316,76271,102134,59643,91582,118815,6497,54976,74977,16176,36266,51530
9,2012,80944,73583,97939,59224,89299,113690,6231,50077,67169,15489,36278,50092


In [20]:
#read in median income
real_med = pd.read_csv('../data/single_parent/fred/real_median_hh_inc_1984-2021.csv')
pd.set_option('display.max_rows', None)
real_med.head(3)

Unnamed: 0,DATE,MEHOINUSA672N
0,2021,70784
1,2020,71186
2,2019,72808


In [21]:
#rename columns
real_med = real_med.rename(columns = {'DATE': 'year', 'MEHOINUSA672N': 'real_med_inc_fred'})
real_med.head(3)

Unnamed: 0,year,real_med_inc_fred
0,2021,70784
1,2020,71186
2,2019,72808


In [22]:
#drop rows prior to 1984
f07_1984 = f07[:-14]
f07_1984

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars,female_hh_families (thousands),fhh_med_2021_dollars,fhh_mean_2021_dollars
0,2021,84283,88590,121840,61448,106696,141531,7214,61980,83424,15621,45437,62126
1,2020,83723,88286,120694,61297,106296,140304,6964,63036,81044,15462,45933,60812
2,2019,83698,91151,123711,62355,108130,142476,6506,65393,84526,14838,45950,62033
3,2018,83508,84856,114418,61971,100698,131447,6485,58626,79043,15052,43410,59548
4,2017,83539,84149,114083,61883,100695,132178,6351,57137,75991,15305,40691,56721
5,2016,82854,82089,109919,60821,98012,126460,6452,58222,81727,15581,41388,57026
6,2015,82199,80849,105980,60258,96432,122626,6311,56919,76688,15630,39026,53629
7,2014,81730,76331,101686,60015,92578,118352,6162,54525,71390,15553,36395,49382
8,2013,82316,76271,102134,59643,91582,118815,6497,54976,74977,16176,36266,51530
9,2012,80944,73583,97939,59224,89299,113690,6231,50077,67169,15489,36278,50092


In [23]:
f07_1984 = pd.merge(f07_1984, real_med, on = 'year', how = 'left')
f07_1984

Unnamed: 0,year,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars,female_hh_families (thousands),fhh_med_2021_dollars,fhh_mean_2021_dollars,real_med_inc_fred
0,2021,84283,88590,121840,61448,106696,141531,7214,61980,83424,15621,45437,62126,70784
1,2020,83723,88286,120694,61297,106296,140304,6964,63036,81044,15462,45933,60812,71186
2,2019,83698,91151,123711,62355,108130,142476,6506,65393,84526,14838,45950,62033,72808
3,2018,83508,84856,114418,61971,100698,131447,6485,58626,79043,15052,43410,59548,68168
4,2017,83539,84149,114083,61883,100695,132178,6351,57137,75991,15305,40691,56721,67571
5,2016,82854,82089,109919,60821,98012,126460,6452,58222,81727,15581,41388,57026,66657
6,2015,82199,80849,105980,60258,96432,122626,6311,56919,76688,15630,39026,53629,64631
7,2014,81730,76331,101686,60015,92578,118352,6162,54525,71390,15553,36395,49382,61468
8,2013,82316,76271,102134,59643,91582,118815,6497,54976,74977,16176,36266,51530,62425
9,2012,80944,73583,97939,59224,89299,113690,6231,50077,67169,15489,36278,50092,60313


In [25]:
# remove real_med from dataframe
col = f07_1984.pop('real_med_inc_fred')  
# insert real_med at index 1
f07_1984.insert(1, 'real_med_inc_fred', col)

f07_1984

Unnamed: 0,year,real_med_inc_fred,all_families (thousands),all_med_2021_dollars,all_mean_2021_dollars,married_couple_families (thousands),mc_med_2021_dollars,mc_mean_2021_dollars,male_hh_families (thousands),mhh_med_2021_dollars,mhh_mean_2021_dollars,female_hh_families (thousands),fhh_med_2021_dollars,fhh_mean_2021_dollars
0,2021,70784,84283,88590,121840,61448,106696,141531,7214,61980,83424,15621,45437,62126
1,2020,71186,83723,88286,120694,61297,106296,140304,6964,63036,81044,15462,45933,60812
2,2019,72808,83698,91151,123711,62355,108130,142476,6506,65393,84526,14838,45950,62033
3,2018,68168,83508,84856,114418,61971,100698,131447,6485,58626,79043,15052,43410,59548
4,2017,67571,83539,84149,114083,61883,100695,132178,6351,57137,75991,15305,40691,56721
5,2016,66657,82854,82089,109919,60821,98012,126460,6452,58222,81727,15581,41388,57026
6,2015,64631,82199,80849,105980,60258,96432,122626,6311,56919,76688,15630,39026,53629
7,2014,61468,81730,76331,101686,60015,92578,118352,6162,54525,71390,15553,36395,49382
8,2013,62425,82316,76271,102134,59643,91582,118815,6497,54976,74977,16176,36266,51530
9,2012,60313,80944,73583,97939,59224,89299,113690,6231,50077,67169,15489,36278,50092


In [28]:
f07.to_csv('../data/single_parent/census/historical_income_families/f07_med_mean_inc_by_family_type.csv', index = False)

In [30]:
f07_1984.to_csv('../data/single_parent/census/historical_income_families/f07_w_real_med.csv', index = False)