In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

#### Table F-10. Presence of Children Under 18 Years Old--All Families by Median and Mean Income: 1974 to 2021					
Information on confidentiality protection, sampling error, nonsampling error, and definitions is available at <https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar22.pdf>.					
Footnotes are available at <www.census.gov/topics/income-poverty/income/guidance/cps-historic-footnotes.html>.					
Source: U.S. Census Bureau, Current Population Survey, 1975 to 2022 Annual Social and Economic Supplements (CPS ASEC).					
(Families as of March of the following year. Related' children beginning in 1987, 'Own' children for earlier years. Income in current and 2021 R-CPI-U-RS adjusted dollars (28))					

### real median income
https://fred.stlouisfed.org/series/MEHOINUSA672N

Suggested Citation:
U.S. Census Bureau, Real Median Household Income in the United States [MEHOINUSA672N], retrieved from FRED, Federal Reserve Bank of St. Louis; https://fred.stlouisfed.org/series/MEHOINUSA672N, April 19, 2023.

See additional source notes

#### Table F-11. Age of Householder--All Families by Median and Mean Income: 1947 to 2021					
Information on confidentiality protection, sampling error, nonsampling error, and definitions is available at <https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar22.pdf>.					
Footnotes are available at <www.census.gov/topics/income-poverty/income/guidance/cps-historic-footnotes.html>.					
Source: U.S. Census Bureau, Current Population Survey, 1948 to 2022 Annual Social and Economic Supplements (CPS ASEC).					
(Families as of March of the following year. Householders 15 years old and over beginning with March 1980, and householders 14 years old and over as of March of the following year for previous years. Income in current and 2021 R-CPI-U-RS adjusted dollars (28))					

In [2]:
#read in all families only through 1984 (to avoid nulls for mhh)
f10a = pd.read_excel('../data/single_parent/census/historical_income_families/f10ar_mean_med_inc_type_fam_childund18.xlsx',
                     skiprows=8, nrows=38)
pd.set_option('display.max_rows', None)
f10a

Unnamed: 0,"Type of family, number of children, and year",Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current dollars,2021 dollars,Current dollars,2021 dollars
1,2021,84283.0,88590,88590,121840,121840
2,2020 (41),83723.0,84348,88286,115310,120694
3,2019,83698.0,86011,91151,116735,123711
4,2018,83508.0,78646,84856,106045,114418
5,2017 (40),83539.0,76135,84149,103218,114083
6,2017,83103.0,75938,83931,100400,110968
7,2016,82854.0,72707,82089,97357,109919
8,2015,82199.0,70697,80849,92673,105980
9,2014,81730.0,66632,76331,88765,101686


In [3]:
#rename columns
f10a = f10a.rename(columns = {'Type of family, number of children, and year': 'year', 'Number (thousands)': 'all_families', 
                            'Unnamed: 3': 'all_med_2021', 'Unnamed: 5': 'all_mean_2021'})

#drop empty columns or columns not needed
f10a  = f10a .drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f10a  = f10a .drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f10a  = f10a .rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f10a  = f10a .applymap(lambda x: x.strip() if isinstance(x, str) else x)

f10a.head(2)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021
0,2021,84283.0,88590,121840
1,2020 (41),83723.0,88286,120694


In [4]:
f10a = f10a.astype({'year': 'str', 'all_families': 'int64'})
f10a.head(2)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021
0,2021,84283,88590,121840
1,2020 (41),83723,88286,120694


In [5]:
#read in all family types with one or more children under 18 yrs through 1974
f10wchild = pd.read_excel('../data/single_parent/census/historical_income_families/f10ar_mean_med_inc_type_fam_childund18.xlsx',
                    skiprows=114, nrows=38)
pd.set_option('display.max_rows', None)
f10wchild

Unnamed: 0,"Type of family, number of children, and year",Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,37088.0,86832,86832,121123,121123
2,2020 (41),37058.0,81884,85708,116427,121863
3,2019,36878.0,83694,88696,116474,123434
4,2018,37480.0,76696,82752,103815,112012
5,2017 (40),38059.0,73152,80852,100666,111262
6,2017,37980.0,72991,80674,99182,109622
7,2016,37982.0,69869,78884,95318,107617
8,2015,38321.0,66557,76114,91137,104224
9,2014,38736.0,63767,73049,87464,100196


In [6]:
#rename columns
f10wchild = f10wchild.rename(columns = {'Type of family, number of children, and year': 'year', 'Number (thousands)': 'fam_wchild', 
                                      'Unnamed: 3': 'wchild_med_2021', 'Unnamed: 5': 'wchild_mean_2021'})

#drop empty columns or columns not needed
f10wchild = f10wchild .drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f10wchild  = f10wchild.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f10wchild  = f10wchild.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f10wchild  = f10wchild.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f10wchild.head(2)

Unnamed: 0,year,fam_wchild,wchild_med_2021,wchild_mean_2021
0,2021,37088.0,86832,121123
1,2020 (41),37058.0,85708,121863


In [7]:
# datatypes for merge
f10wchild = f10wchild.astype({'year': 'str', 'fam_wchild': 'int64'})
f10wchild.head(2)

Unnamed: 0,year,fam_wchild,wchild_med_2021,wchild_mean_2021
0,2021,37088,86832,121123
1,2020 (41),37058,85708,121863


In [8]:
f10 = pd.merge(f10a, f10wchild, on = 'year', how = 'left')
f10.head(2)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021
0,2021,84283,88590,121840,37088,86832,121123
1,2020 (41),83723,88286,120694,37058,85708,121863


In [9]:
#read in married couple families with at least 1 child under 18 through 1974
f10mc_wchild = pd.read_excel('../data/single_parent/census/historical_income_families/f10ar_mean_med_inc_type_fam_childund18.xlsx',
                    skiprows=274, nrows=38)
pd.set_option('display.max_rows', None)
f10mc_wchild

Unnamed: 0,"Type of family, number of children, and year",Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,24489.0,118077,118077,153380,153380
2,2020 (41),24451.0,111247,116441,147719,154617
3,2019,24961.0,111281,117932,145887,154606
4,2018,25128.0,101285,109283,130093,140365
5,2017 (40),25425.0,98045,108366,127072,140448
6,2017,25229.0,97622,107898,124897,138044
7,2016,25098.0,93754,105851,120017,135503
8,2015,25117.0,91097,104178,115349,131912
9,2014,25539.0,87420,100145,111278,127476


In [10]:
#rename columns
f10mc_wchild = f10mc_wchild.rename(columns = {'Type of family, number of children, and year': 'year', 'Number (thousands)': 'mc_wchild', 
                            'Unnamed: 3': 'mc_med_2021', 'Unnamed: 5': 'mc_mean_2021'})

#drop empty columns or columns not needed
f10mc_wchild = f10mc_wchild.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f10mc_wchild = f10mc_wchild.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f10mc_wchild = f10mc_wchild.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f10mc_wchild = f10mc_wchild.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f10mc_wchild.head(2)

Unnamed: 0,year,mc_wchild,mc_med_2021,mc_mean_2021
0,2021,24489.0,118077,153380
1,2020 (41),24451.0,116441,154617


In [11]:
f10mc_wchild = f10mc_wchild.astype({'year': 'str', 'mc_wchild': 'int64'})
f10mc_wchild.head(2)

Unnamed: 0,year,mc_wchild,mc_med_2021,mc_mean_2021
0,2021,24489,118077,153380
1,2020 (41),24451,116441,154617


In [12]:
f10 = pd.merge(f10, f10mc_wchild, on = 'year', how = 'left')
f10.head(2)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,mc_mean_2021
0,2021,84283,88590,121840,37088,86832,121123,24489,118077,153380
1,2020 (41),83723,88286,120694,37058,85708,121863,24451,116441,154617


In [13]:
#read in female hh, no spouse present, with at least 1 child under 18, through 1974
f10fhh = pd.read_excel('../data/single_parent/census/historical_income_families/f10ar_mean_med_inc_type_fam_childund18.xlsx',
                    skiprows=434, nrows=38)
pd.set_option('display.max_rows', None)
f10fhh

Unnamed: 0,"Type of family, number of children, and year",Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,9290.0,37228,37228,51837,51837
2,2020 (41),9291.0,36144,37832,50144,52486
3,2019,8791.0,36061,38216,49435,52389
4,2018,9167.0,32960,35563,46067,49705
5,2017 (40),9503.0,30783,34023,42647,47136
6,2017,9587.0,31148,34427,43099,47636
7,2016,9789.0,30235,34136,42222,47670
8,2015,10099.0,28323,32390,40495,46310
9,2014,10174.0,26374,30213,36780,42134


In [14]:
#rename columns
f10fhh = f10fhh.rename(columns = {'Type of family, number of children, and year': 'year', 'Number (thousands)': 'fhh_nospouse_wchild', 
                            'Unnamed: 3': 'fhh_med_2021', 'Unnamed: 5': 'fhh_mean_2021'})

#drop empty columns or columns not needed
f10fhh = f10fhh.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f10fhh = f10fhh.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f10fhh = f10fhh.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f10fhh = f10fhh.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f10fhh

Unnamed: 0,year,fhh_nospouse_wchild,fhh_med_2021,fhh_mean_2021
0,2021,9290.0,37228,51837
1,2020 (41),9291.0,37832,52486
2,2019,8791.0,38216,52389
3,2018,9167.0,35563,49705
4,2017 (40),9503.0,34023,47136
5,2016,9789.0,34136,47670
6,2015,10099.0,32390,46310
7,2014,10174.0,30213,42134
8,2013 (39),10576.0,29589,41793
9,2012,10033.0,30138,41954


In [15]:
f10fhh = f10fhh.astype({'year': 'str', 'fhh_nospouse_wchild': 'int64'})
f10fhh.head(2)

Unnamed: 0,year,fhh_nospouse_wchild,fhh_med_2021,fhh_mean_2021
0,2021,9290,37228,51837
1,2020 (41),9291,37832,52486


In [16]:
f10 = pd.merge(f10, f10fhh, on = 'year', how = 'left')
f10.head(3)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,mc_mean_2021,fhh_nospouse_wchild,fhh_med_2021,fhh_mean_2021
0,2021,84283,88590,121840,37088,86832,121123,24489,118077,153380,9290,37228,51837
1,2020 (41),83723,88286,120694,37058,85708,121863,24451,116441,154617,9291,37832,52486
2,2019,83698,91151,123711,36878,88696,123434,24961,117932,154606,8791,38216,52389


In [17]:
#read in male hh, no spouse present, with at least 1 child under 18, through 1974
f10mhh = pd.read_excel('../data/single_parent/census/historical_income_families/f10ar_mean_med_inc_type_fam_childund18.xlsx',
                    skiprows=594, nrows=38)
pd.set_option('display.max_rows', None)
f10mhh

Unnamed: 0,"Type of family, number of children, and year",Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,3308.0,56017,56017,76901,76901
2,2020 (41),3316.0,52482,54933,71411,74745
3,2019,3126.0,52676,55824,70135,74326
4,2018,3185.0,48259,52070,62698,67649
5,2017 (40),3131.0,46494,51388,62348,68911
6,2017,3165.0,47275,52251,64069,70813
7,2016,3095.0,45462,51328,62969,71094
8,2015,3105.0,41466,47420,59997,68612
9,2014,3024.0,41436,47468,56856,65132


In [18]:
#rename columns
f10mhh = f10mhh.rename(columns = {'Type of family, number of children, and year': 'year', 'Number (thousands)': 'mhh_nospouse_wchild', 
                            'Unnamed: 3': 'mhh_med_2021', 'Unnamed: 5': 'mhh_mean_2021'})

#drop empty columns or columns not needed
f10mhh = f10mhh.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f10mhh = f10mhh.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f10mhh = f10mhh.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f10mhh = f10mhh.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f10mhh

Unnamed: 0,year,mhh_nospouse_wchild,mhh_med_2021,mhh_mean_2021
0,2021,3308.0,56017,76901
1,2020 (41),3316.0,54933,74745
2,2019,3126.0,55824,74326
3,2018,3185.0,52070,67649
4,2017 (40),3131.0,51388,68911
5,2016,3095.0,51328,71094
6,2015,3105.0,47420,68612
7,2014,3024.0,47468,65132
8,2013 (39),3293.0,47978,66543
9,2012,3169.0,43117,59391


In [19]:
f10mhh = f10mhh.astype({'year': 'str', 'mhh_nospouse_wchild': 'int64'})

In [20]:
f10 = pd.merge(f10, f10mhh, on = 'year', how = 'left')
f10.head(3)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,mc_mean_2021,fhh_nospouse_wchild,fhh_med_2021,fhh_mean_2021,mhh_nospouse_wchild,mhh_med_2021,mhh_mean_2021
0,2021,84283,88590,121840,37088,86832,121123,24489,118077,153380,9290,37228,51837,3308,56017,76901
1,2020 (41),83723,88286,120694,37058,85708,121863,24451,116441,154617,9291,37832,52486,3316,54933,74745
2,2019,83698,91151,123711,36878,88696,123434,24961,117932,154606,8791,38216,52389,3126,55824,74326


In [21]:
#read in f11_15_24, age of householder mean & med income
f11_15_24 = pd.read_excel('../data/single_parent/census/historical_income_families/f11ar_mean_med_inc_age_hh.xlsx',
                    skiprows=87, nrows=38)
pd.set_option('display.max_rows', None)
f11_15_24

Unnamed: 0,Age and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,2905.0,55009,55009,67872,67872
2,2020 (41),2547.0,48473,50736,61634,64512
3,2019,2541.0,52972,56137,65370,69276
4,2018,3006.0,48356,52174,67644,72985
5,2017 (40),3025.0,40622,44898,57139,63154
6,2017,3017.0,41855,46261,60481,66847
7,2016,3160.0,44501,50243,59881,67608
8,2015,3187.0,37759,43181,51979,59443
9,2014,3347.0,34082,39043,47673,54612


In [22]:
#rename columns
f11_15_24 = f11_15_24.rename(columns = {'Age and year': 'year', 'Number (thousands)': 'age_15-24', 
                            'Unnamed: 3': 'age_15-24_med2021', 'Unnamed: 5': 'age_15-24_mean2021'})

#drop empty columns or columns not needed
f11_15_24 = f11_15_24.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f11_15_24 = f11_15_24.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f11_15_24 = f11_15_24.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f11_15_24 = f11_15_24.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f11_15_24.head(3)

Unnamed: 0,year,age_15-24,age_15-24_med2021,age_15-24_mean2021
0,2021,2905.0,55009,67872
1,2020 (41),2547.0,50736,64512
2,2019,2541.0,56137,69276


In [23]:
f11_15_24 = f11_15_24.astype({'year': 'str', 'age_15-24': 'int64'})
f11_15_24.head(3)

Unnamed: 0,year,age_15-24,age_15-24_med2021,age_15-24_mean2021
0,2021,2905,55009,67872
1,2020 (41),2547,50736,64512
2,2019,2541,56137,69276


In [24]:
#read in f11_25_34, age of householder mean & med income
f11_25_34 = pd.read_excel('../data/single_parent/census/historical_income_families/f11ar_mean_med_inc_age_hh.xlsx',
                    skiprows=167, nrows=38)
pd.set_option('display.max_rows', None)
f11_25_34.head(3)

Unnamed: 0,Age and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,12717.0,76059,76059,97998,97998
2,2020 (41),12748.0,71835,75189,91084,95337


In [25]:
#rename columns
f11_25_34 = f11_25_34.rename(columns = {'Age and year': 'year', 'Number (thousands)': 'age_25-34', 
                            'Unnamed: 3': 'age_25-34_med2021', 'Unnamed: 5': 'age_25-34_mean2021'})

#drop empty columns or columns not needed
f11_25_34 = f11_25_34.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f11_25_34 = f11_25_34.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f11_25_34 = f11_25_34.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f11_25_34 = f11_25_34.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f11_25_34.head(3)

Unnamed: 0,year,age_25-34,age_25-34_med2021,age_25-34_mean2021
0,2021,12717.0,76059,97998
1,2020 (41),12748.0,75189,95337
2,2019,12690.0,75182,95659


In [26]:
f11_25_34 = f11_25_34.astype({'year': 'str', 'age_25-34': 'int64'})
f11_25_34.head(3)

Unnamed: 0,year,age_25-34,age_25-34_med2021,age_25-34_mean2021
0,2021,12717,76059,97998
1,2020 (41),12748,75189,95337
2,2019,12690,75182,95659


In [27]:
age_groups = pd.merge(f11_15_24, f11_25_34, on = 'year', how = 'left')
age_groups.head(3)

Unnamed: 0,year,age_15-24,age_15-24_med2021,age_15-24_mean2021,age_25-34,age_25-34_med2021,age_25-34_mean2021
0,2021,2905,55009,67872,12717,76059,97998
1,2020 (41),2547,50736,64512,12748,75189,95337
2,2019,2541,56137,69276,12690,75182,95659


In [28]:
#read in f11_35_44, age of householder mean & med income
f11_35_44 = pd.read_excel('../data/single_parent/census/historical_income_families/f11ar_mean_med_inc_age_hh.xlsx',
                    skiprows=247, nrows=38)
pd.set_option('display.max_rows', None)
f11_35_44.head(3)

Unnamed: 0,Age and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,17574.0,97675,97675,129107,129107
2,2020 (41),17452.0,92270,96579,123270,129026


In [29]:
#rename columns
f11_35_44 = f11_35_44.rename(columns = {'Age and year': 'year', 'Number (thousands)': 'age_35-44', 
                            'Unnamed: 3': 'age_35-44_med2021', 'Unnamed: 5': 'age_35-44_mean2021'})

#drop empty columns or columns not needed
f11_35_44 = f11_35_44.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f11_35_44 = f11_35_44.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f11_35_44 = f11_35_44.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f11_35_44 = f11_35_44.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f11_35_44.head(3)

Unnamed: 0,year,age_35-44,age_35-44_med2021,age_35-44_mean2021
0,2021,17574.0,97675,129107
1,2020 (41),17452.0,96579,129026
2,2019,17044.0,100417,129796


In [30]:
f11_35_44 = f11_35_44.astype({'year': 'str', 'age_35-44': 'int64'})
f11_35_44.head(3)

Unnamed: 0,year,age_35-44,age_35-44_med2021,age_35-44_mean2021
0,2021,17574,97675,129107
1,2020 (41),17452,96579,129026
2,2019,17044,100417,129796


In [31]:
age_groups = pd.merge(age_groups,  f11_35_44, on = 'year', how = 'left')
age_groups

Unnamed: 0,year,age_15-24,age_15-24_med2021,age_15-24_mean2021,age_25-34,age_25-34_med2021,age_25-34_mean2021,age_35-44,age_35-44_med2021,age_35-44_mean2021
0,2021,2905,55009,67872,12717,76059,97998,17574,97675,129107
1,2020 (41),2547,50736,64512,12748,75189,95337,17452,96579,129026
2,2019,2541,56137,69276,12690,75182,95659,17044,100417,129796
3,2018,3006,52174,72985,12994,71997,92373,16986,93222,120043
4,2017 (40),3025,44898,63154,12969,68970,89446,17132,92324,119462
5,2016,3160,50243,67608,12945,69595,87825,16986,90512,115740
6,2015,3187,43181,59443,12911,64928,84243,16915,87805,112493
7,2014,3347,39043,54612,13355,61261,79294,16641,82693,107171
8,2013 (39),3543,39366,53772,13572,59633,76790,17107,84060,106407
9,2012,3289,37099,50742,13355,60946,76870,16861,80003,103866


In [32]:
#read in f11_45_54, age of householder mean & med income
f11_45_54 = pd.read_excel('../data/single_parent/census/historical_income_families/f11ar_mean_med_inc_age_hh.xlsx',
                    skiprows=327, nrows=38)
pd.set_option('display.max_rows', None)
f11_45_54.head(3)

Unnamed: 0,Age and year,Number (thousands),Median income,Unnamed: 3,Mean income,Unnamed: 5
0,,,Current\ndollars,2021\ndollars,Current\ndollars,2021\ndollars
1,2021,16504.0,111223,111223,146433,146433
2,2020 (41),16392.0,105460,110384,140904,147483


In [33]:
#rename columns
f11_45_54 = f11_45_54.rename(columns = {'Age and year': 'year', 'Number (thousands)': 'age_45-54', 
                            'Unnamed: 3': 'age_45-54_med2021', 'Unnamed: 5': 'age_45-54_mean2021'})

#drop empty columns or columns not needed
f11_45_54 = f11_45_54.drop(['Median income', 'Mean income'], axis=1)

#drop prior index rows, drop original rows and keep revised
f11_45_54 = f11_45_54.drop(labels=[0, 6, 11]).reset_index(drop=True)

#remove extra spaces in column headers
f11_45_54 = f11_45_54.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
f11_45_54 = f11_45_54.applymap(lambda x: x.strip() if isinstance(x, str) else x)

f11_45_54.head(3)

Unnamed: 0,year,age_45-54,age_45-54_med2021,age_45-54_mean2021
0,2021,16504.0,111223,146433
1,2020 (41),16392.0,110384,147483
2,2019,16234.0,115100,151370


In [34]:
f11_45_54 = f11_45_54.astype({'year': 'str', 'age_45-54': 'int64'})
f11_45_54.head(3)

Unnamed: 0,year,age_45-54,age_45-54_med2021,age_45-54_mean2021
0,2021,16504,111223,146433
1,2020 (41),16392,110384,147483
2,2019,16234,115100,151370


In [35]:
age_groups = pd.merge(age_groups, f11_45_54, on = 'year', how = 'left')
age_groups

Unnamed: 0,year,age_15-24,age_15-24_med2021,age_15-24_mean2021,age_25-34,age_25-34_med2021,age_25-34_mean2021,age_35-44,age_35-44_med2021,age_35-44_mean2021,age_45-54,age_45-54_med2021,age_45-54_mean2021
0,2021,2905,55009,67872,12717,76059,97998,17574,97675,129107,16504,111223,146433
1,2020 (41),2547,50736,64512,12748,75189,95337,17452,96579,129026,16392,110384,147483
2,2019,2541,56137,69276,12690,75182,95659,17044,100417,129796,16234,115100,151370
3,2018,3006,52174,72985,12994,71997,92373,16986,93222,120043,16414,108085,141158
4,2017 (40),3025,44898,63154,12969,68970,89446,17132,92324,119462,16753,103459,132752
5,2016,3160,50243,67608,12945,69595,87825,16986,90512,115740,17005,101747,132168
6,2015,3187,43181,59443,12911,64928,84243,16915,87805,112493,17093,98061,126246
7,2014,3347,39043,54612,13355,61261,79294,16641,82693,107171,17190,96828,124312
8,2013 (39),3543,39366,53772,13572,59633,76790,17107,84060,106407,17321,97309,121903
9,2012,3289,37099,50742,13355,60946,76870,16861,80003,103866,17543,92492,116850


In [36]:
f10_f11 = pd.merge(f10, age_groups, on = 'year', how = 'left')
f10_f11.head(3)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,mc_mean_2021,...,age_15-24_mean2021,age_25-34,age_25-34_med2021,age_25-34_mean2021,age_35-44,age_35-44_med2021,age_35-44_mean2021,age_45-54,age_45-54_med2021,age_45-54_mean2021
0,2021,84283,88590,121840,37088,86832,121123,24489,118077,153380,...,67872,12717,76059,97998,17574,97675,129107,16504,111223,146433
1,2020 (41),83723,88286,120694,37058,85708,121863,24451,116441,154617,...,64512,12748,75189,95337,17452,96579,129026,16392,110384,147483
2,2019,83698,91151,123711,36878,88696,123434,24961,117932,154606,...,69276,12690,75182,95659,17044,100417,129796,16234,115100,151370


In [37]:
#locate additional letters in year column
f10_f11 = f10_f11[f10_f11['year'].str.match('\d{4}[a-zA-Z]?', na=False)]
#extract only the year
f10_f11['year'] = f10_f11['year'].str.extract('(\d{4})', expand=False).astype(int)
#sort by descending
f10_f11 = f10_f11.sort_values('year', ascending=False)

f10_f11.tail(3)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,mc_mean_2021,...,age_15-24_mean2021,age_25-34,age_25-34_med2021,age_25-34_mean2021,age_35-44,age_35-44_med2021,age_35-44_mean2021,age_45-54,age_45-54_med2021,age_45-54_mean2021
32,1989,66090,72381,87810,34279,70783,83830,25476,84613,98544,...,42779,14854,65315,72637,16694,85051,97738,11712,97531,114519
33,1988,65837,71080,85250,34255,69875,81851,25599,83536,96274,...,45486,15100,65207,72399,16300,85762,97144,11336,93163,109485
34,1987,65204,70855,84385,33996,70555,81825,25464,83339,95655,...,44316,15045,64668,71081,15863,84543,96156,11132,94746,108991


In [38]:
f10_f11.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35 entries, 0 to 34
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   year                 35 non-null     int32
 1   all_families         35 non-null     int64
 2   all_med_2021         35 non-null     int64
 3   all_mean_2021        35 non-null     int64
 4   fam_wchild           35 non-null     int64
 5   wchild_med_2021      35 non-null     int64
 6   wchild_mean_2021     35 non-null     int64
 7   mc_wchild            35 non-null     int64
 8   mc_med_2021          35 non-null     int64
 9   mc_mean_2021         35 non-null     int64
 10  fhh_nospouse_wchild  35 non-null     int64
 11  fhh_med_2021         35 non-null     int64
 12  fhh_mean_2021        35 non-null     int64
 13  mhh_nospouse_wchild  35 non-null     int64
 14  mhh_med_2021         35 non-null     int64
 15  mhh_mean_2021        35 non-null     int64
 16  age_15-24            35 non-

In [39]:
#read in median income
real_med = pd.read_csv('../data/single_parent/fred/real_median_hh_inc_1984-2021.csv')
pd.set_option('display.max_rows', None)
real_med.head(3)

Unnamed: 0,DATE,MEHOINUSA672N
0,2021,70784
1,2020,71186
2,2019,72808


In [40]:
#rename columns
real_med = real_med.rename(columns = {'DATE': 'year', 'MEHOINUSA672N': 'real_med_inc_fred'})
real_med.head(3)

Unnamed: 0,year,real_med_inc_fred
0,2021,70784
1,2020,71186
2,2019,72808


In [41]:
#drop rows prior to 1984
real_med_1987 = real_med[:-3]
real_med_1987.tail(3)

Unnamed: 0,year,real_med_inc_fred
32,1989,61153
33,1988,60115
34,1987,59624


In [42]:
f10_f11_1987 = pd.merge(f10_f11, real_med_1987, on = 'year', how = 'left')
f10_f11_1987.head(3)

Unnamed: 0,year,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,mc_mean_2021,...,age_25-34,age_25-34_med2021,age_25-34_mean2021,age_35-44,age_35-44_med2021,age_35-44_mean2021,age_45-54,age_45-54_med2021,age_45-54_mean2021,real_med_inc_fred
0,2021,84283,88590,121840,37088,86832,121123,24489,118077,153380,...,12717,76059,97998,17574,97675,129107,16504,111223,146433,70784
1,2020,83723,88286,120694,37058,85708,121863,24451,116441,154617,...,12748,75189,95337,17452,96579,129026,16392,110384,147483,71186
2,2019,83698,91151,123711,36878,88696,123434,24961,117932,154606,...,12690,75182,95659,17044,100417,129796,16234,115100,151370,72808


In [43]:
# remove real_med from dataframe
col = f10_f11_1987.pop('real_med_inc_fred')  
# insert real_med at index 1
f10_f11_1987.insert(1, 'real_med_inc_fred', col)

f10_f11_1987.head(3)

Unnamed: 0,year,real_med_inc_fred,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,...,age_15-24_mean2021,age_25-34,age_25-34_med2021,age_25-34_mean2021,age_35-44,age_35-44_med2021,age_35-44_mean2021,age_45-54,age_45-54_med2021,age_45-54_mean2021
0,2021,70784,84283,88590,121840,37088,86832,121123,24489,118077,...,67872,12717,76059,97998,17574,97675,129107,16504,111223,146433
1,2020,71186,83723,88286,120694,37058,85708,121863,24451,116441,...,64512,12748,75189,95337,17452,96579,129026,16392,110384,147483
2,2019,72808,83698,91151,123711,36878,88696,123434,24961,117932,...,69276,12690,75182,95659,17044,100417,129796,16234,115100,151370


In [44]:
pd.set_option('display.max_columns', None)
f10_f11_1987

Unnamed: 0,year,real_med_inc_fred,all_families,all_med_2021,all_mean_2021,fam_wchild,wchild_med_2021,wchild_mean_2021,mc_wchild,mc_med_2021,mc_mean_2021,fhh_nospouse_wchild,fhh_med_2021,fhh_mean_2021,mhh_nospouse_wchild,mhh_med_2021,mhh_mean_2021,age_15-24,age_15-24_med2021,age_15-24_mean2021,age_25-34,age_25-34_med2021,age_25-34_mean2021,age_35-44,age_35-44_med2021,age_35-44_mean2021,age_45-54,age_45-54_med2021,age_45-54_mean2021
0,2021,70784,84283,88590,121840,37088,86832,121123,24489,118077,153380,9290,37228,51837,3308,56017,76901,2905,55009,67872,12717,76059,97998,17574,97675,129107,16504,111223,146433
1,2020,71186,83723,88286,120694,37058,85708,121863,24451,116441,154617,9291,37832,52486,3316,54933,74745,2547,50736,64512,12748,75189,95337,17452,96579,129026,16392,110384,147483
2,2019,72808,83698,91151,123711,36878,88696,123434,24961,117932,154606,8791,38216,52389,3126,55824,74326,2541,56137,69276,12690,75182,95659,17044,100417,129796,16234,115100,151370
3,2018,68168,83508,84856,114418,37480,82752,112012,25128,109283,140365,9167,35563,49705,3185,52070,67649,3006,52174,72985,12994,71997,92373,16986,93222,120043,16414,108085,141158
4,2017,67571,83539,84149,114083,38059,80852,111262,25425,108366,140448,9503,34023,47136,3131,51388,68911,3025,44898,63154,12969,68970,89446,17132,92324,119462,16753,103459,132752
5,2016,66657,82854,82089,109919,37982,78884,107617,25098,105851,135503,9789,34136,47670,3095,51328,71094,3160,50243,67608,12945,69595,87825,16986,90512,115740,17005,101747,132168
6,2015,64631,82199,80849,105980,38321,76114,104224,25117,104178,131912,10099,32390,46310,3105,47420,68612,3187,43181,59443,12911,64928,84243,16915,87805,112493,17093,98061,126246
7,2014,61468,81730,76331,101686,38736,73049,100196,25539,100145,127476,10174,30213,42134,3024,47468,65132,3347,39043,54612,13355,61261,79294,16641,82693,107171,17190,96828,124312
8,2013,62425,82316,76271,102134,39678,71789,98239,25810,99015,125412,10576,29589,41793,3293,47978,66543,3543,39366,53772,13572,59633,76790,17107,84060,106407,17321,97309,121903
9,2012,60313,80944,73583,97939,38471,70914,94836,25269,96298,120277,10033,30138,41954,3169,43117,59391,3289,37099,50742,13355,60946,76870,16861,80003,103866,17543,92492,116850


f10_f11_1987.to_csv('../data/single_parent/census/historical_income_families/f10_med_meaninc_1987.csv', index = False)

In [45]:
#pull needed data
married_couple_med = f10_f11_1987[['year', 'mc_med_2021']]

#create a column for family type
married_couple_med['family_type'] = 'married_couple'

#rename columns
married_couple_med = married_couple_med.rename(columns = {'mc_med_2021': 'med_income'})
married_couple_med.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  married_couple_med['family_type'] = 'married_couple'


Unnamed: 0,year,med_income,family_type
0,2021,118077,married_couple
1,2020,116441,married_couple
2,2019,117932,married_couple
3,2018,109283,married_couple
4,2017,108366,married_couple


In [46]:
married_couple_med.describe()

Unnamed: 0,year,med_income
count,35.0,35.0
mean,2004.0,96816.514286
std,10.246951,9809.136286
min,1987.0,82706.0
25%,1995.5,88897.5
50%,2004.0,97897.0
75%,2012.5,100084.0
max,2021.0,118077.0


In [47]:
#locate needed data
single_father_med = f10_f11_1987[['year', 'mhh_med_2021']]
single_father_med.head()

#create a column for family type
single_father_med['family_type'] = 'single_father'

#rename columns
single_father_med = single_father_med.rename(columns = {'mhh_med_2021': 'med_income'})
single_father_med.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single_father_med['family_type'] = 'single_father'


Unnamed: 0,year,med_income,family_type
0,2021,56017,single_father
1,2020,54933,single_father
2,2019,55824,single_father
3,2018,52070,single_father
4,2017,51388,single_father


In [48]:
single_father_med.describe()

Unnamed: 0,year,med_income
count,35.0,35.0
mean,2004.0,49126.457143
std,10.246951,3657.626531
min,1987.0,41378.0
25%,1995.5,47073.5
50%,2004.0,48984.0
75%,2012.5,51407.5
max,2021.0,56017.0


In [49]:
#locate needed data
single_mother_med = f10_f11_1987[['year', 'fhh_med_2021']]
single_mother_med.head()

#create a column for family type
single_mother_med['family_type'] = 'single_mother'

#rename columns
single_mother_med = single_mother_med.rename(columns = {'fhh_med_2021': 'med_income'})
single_mother_med.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single_mother_med['family_type'] = 'single_mother'


Unnamed: 0,year,med_income,family_type
0,2021,37228,single_mother
1,2020,37832,single_mother
2,2019,38216,single_mother
3,2018,35563,single_mother
4,2017,34023,single_mother


In [50]:
single_mother_med.describe()

Unnamed: 0,year,med_income
count,35.0,35.0
mean,2004.0,31130.142857
std,10.246951,3655.913969
min,1987.0,24944.0
25%,1995.5,28512.0
50%,2004.0,31864.0
75%,2012.5,33700.5
max,2021.0,38216.0


In [51]:
# List of dataframes
med_inc = [married_couple_med, single_father_med, single_mother_med]  
# concat dataframes
med_inc_fam_type = pd.concat(med_inc, ignore_index=True)

med_inc_fam_type.head()

Unnamed: 0,year,med_income,family_type
0,2021,118077,married_couple
1,2020,116441,married_couple
2,2019,117932,married_couple
3,2018,109283,married_couple
4,2017,108366,married_couple


In [52]:
#locate needed data
med_inc_real = f10_f11_1987[['year', 'real_med_inc_fred']]
med_inc_real.head()

#create a column 
med_inc_real['med_inc_real'] = 'med_inc_real'

#rename columns
med_inc_real = med_inc_real.rename(columns = {'real_med_inc_fred': 'real_med_inc'})
med_inc_real.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  med_inc_real['med_inc_real'] = 'med_inc_real'


Unnamed: 0,year,real_med_inc,med_inc_real
0,2021,70784,med_inc_real
1,2020,71186,med_inc_real
2,2019,72808,med_inc_real
3,2018,68168,med_inc_real
4,2017,67571,med_inc_real


med_inc_real.to_csv('../data/single_parent/census/historical_income_families/f10_med_inc_real.csv', index = False)

med_inc_fam_type.to_csv('../data/single_parent/census/historical_income_families/f10_med_inc_fam_type.csv', index = False)

In [53]:
#locate needed data
age15_24 = f10_f11_1987[['year', 'age_15-24_med2021']]

#create a column
age15_24['age_group'] = '15-24'

#rename columns
age15_24 = age15_24.rename(columns = {'age_15-24_med2021': 'med_inc'})
age15_24.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age15_24['age_group'] = '15-24'


Unnamed: 0,year,med_inc,age_group
0,2021,55009,15-24
1,2020,50736,15-24
2,2019,56137,15-24
3,2018,52174,15-24
4,2017,44898,15-24


In [54]:
age25_34 = f10_f11_1987[['year', 'age_25-34_med2021']]

#create a column 
age25_34['age_group'] = '25-34'

#rename columns
age25_34 = age25_34.rename(columns = {'age_25-34_med2021': 'med_inc'})
age25_34.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age25_34['age_group'] = '25-34'


Unnamed: 0,year,med_inc,age_group
0,2021,76059,25-34
1,2020,75189,25-34
2,2019,75182,25-34
3,2018,71997,25-34
4,2017,68970,25-34


In [55]:
age35_44 = f10_f11_1987[['year', 'age_35-44_med2021']]

#create a column
age35_44['age_group'] = '35-44'

#rename columns
age35_44 = age35_44.rename(columns = {'age_35-44_med2021': 'med_inc'})
age35_44.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age35_44['age_group'] = '35-44'


Unnamed: 0,year,med_inc,age_group
0,2021,97675,35-44
1,2020,96579,35-44
2,2019,100417,35-44
3,2018,93222,35-44
4,2017,92324,35-44


In [56]:
age45_54 = f10_f11_1987[['year', 'age_45-54_med2021']]

#create a column 
age45_54['age_group'] = '45-54'

#rename columns
age45_54 = age45_54.rename(columns = {'age_45-54_med2021': 'med_inc'})
age45_54.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age45_54['age_group'] = '45-54'


Unnamed: 0,year,med_inc,age_group
0,2021,111223,45-54
1,2020,110384,45-54
2,2019,115100,45-54
3,2018,108085,45-54
4,2017,103459,45-54


In [57]:
# List of dataframes
age_inc = [age15_24, age25_34, age35_44, age45_54]  
# concat dataframes
med_inc_age = pd.concat(age_inc, ignore_index=True)

med_inc_age.head()

Unnamed: 0,year,med_inc,age_group
0,2021,55009,15-24
1,2020,50736,15-24
2,2019,56137,15-24
3,2018,52174,15-24
4,2017,44898,15-24


med_inc_age.to_csv('../data/single_parent/census/historical_income_families/f11_med_inc_age.csv', index = False)