In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

#### FM2 - All Parent/Child Situations, by Type, Race, and Hispanic Origin of the Householder or Reference Person: 1970 to Present (Numbers in thousands)
Source:  U.S. Census Bureau, Current Population Survey, March and Annual Social and Economic Supplements, 2022 and earlier.

In [3]:
fm2 = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=14, nrows=48)
pd.set_option('display.max_rows', None)
fm2

Unnamed: 0,Year,Unnamed: 1,Unnamed: 2,Total,Married,Unmarried,Unnamed: 6,Total.1,Mother only,Father only
0,,,,,,,,,,
1,.All races,,,,,,,,,
2,..2022y,36994.0,,26105.0,24040.0,2065.0,,10889.0,8657.0,2232.0
3,"..2021y,r",37044.0,,25953.0,23992.0,1961.0,,11091.0,8797.0,2294.0
4,..2021y,36828.0,,25812.0,23855.0,1957.0,,11016.0,8765.0,2251.0
5,..2020y,36881.0,,26161.0,24477.0,1684.0,,10720.0,8625.0,2094.0
6,..2019y,37381.0,,26373.0,24509.0,1864.0,,11008.0,8880.0,2128.0
7,..2018x,37706.0,,26384.0,24555.0,1829.0,,11322.0,9232.0,2090.0
8,..2017x,37870.0,,26203.0,24465.0,1738.0,,11667.0,9497.0,2170.0
9,..2016x,38276.0,,26462.0,24638.0,1824.0,,11814.0,9781.0,2033.0


In [4]:
#rename columns to flatten table
fm2 = fm2.rename(columns = {'Year': 'year', 'Unnamed: 1': 'all_total', 'Total': '2P_total', 'Married': 'married', 'Unmarried': 'unmarried', 'Total.1': '1P_total', 'Mother only': 'mother_only', 'Father only': 'father_only'})

In [5]:
#drop empty columns
fm2 = fm2.drop(['Unnamed: 2', 'Unnamed: 6'], axis=1)
#drop empty rows, drop original rows and keep revised
fm2 = fm2.drop(labels=[0, 1, 4, 15]).reset_index(drop=True)
fm2

Unnamed: 0,year,all_total,2P_total,married,unmarried,1P_total,mother_only,father_only
0,..2022y,36994.0,26105.0,24040.0,2065.0,10889.0,8657.0,2232.0
1,"..2021y,r",37044.0,25953.0,23992.0,1961.0,11091.0,8797.0,2294.0
2,..2020y,36881.0,26161.0,24477.0,1684.0,10720.0,8625.0,2094.0
3,..2019y,37381.0,26373.0,24509.0,1864.0,11008.0,8880.0,2128.0
4,..2018x,37706.0,26384.0,24555.0,1829.0,11322.0,9232.0,2090.0
5,..2017x,37870.0,26203.0,24465.0,1738.0,11667.0,9497.0,2170.0
6,..2016x,38276.0,26462.0,24638.0,1824.0,11814.0,9781.0,2033.0
7,..2015x,38642.0,26862.0,24857.0,2005.0,11780.0,9891.0,1889.0
8,"..2014x, s",38586.0,26712.0,24775.0,1937.0,11874.0,9929.0,1945.0
9,..2013x,38576.0,26569.0,24677.0,1892.0,12007.0,10007.0,2000.0


In [6]:
#remove .. before and any characaters after 4-digit year
fm2['year'] = fm2['year'].str.replace(r'^\.{2}(\d{4}).*$', r'\1')

fm2

  fm2['year'] = fm2['year'].str.replace(r'^\.{2}(\d{4}).*$', r'\1')


Unnamed: 0,year,all_total,2P_total,married,unmarried,1P_total,mother_only,father_only
0,2022,36994.0,26105.0,24040.0,2065.0,10889.0,8657.0,2232.0
1,2021,37044.0,25953.0,23992.0,1961.0,11091.0,8797.0,2294.0
2,2020,36881.0,26161.0,24477.0,1684.0,10720.0,8625.0,2094.0
3,2019,37381.0,26373.0,24509.0,1864.0,11008.0,8880.0,2128.0
4,2018,37706.0,26384.0,24555.0,1829.0,11322.0,9232.0,2090.0
5,2017,37870.0,26203.0,24465.0,1738.0,11667.0,9497.0,2170.0
6,2016,38276.0,26462.0,24638.0,1824.0,11814.0,9781.0,2033.0
7,2015,38642.0,26862.0,24857.0,2005.0,11780.0,9891.0,1889.0
8,2014,38586.0,26712.0,24775.0,1937.0,11874.0,9929.0,1945.0
9,2013,38576.0,26569.0,24677.0,1892.0,12007.0,10007.0,2000.0


In [7]:
fm2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44 entries, 0 to 43
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   year         44 non-null     object 
 1   all_total    44 non-null     float64
 2   2P_total     44 non-null     float64
 3   married      44 non-null     float64
 4   unmarried    16 non-null     float64
 5   1P_total     44 non-null     float64
 6   mother_only  44 non-null     float64
 7   father_only  44 non-null     float64
dtypes: float64(7), object(1)
memory usage: 2.9+ KB


In [8]:
#replace NaN values with 0 for conversion of unmarried column to int64
fm2 = fm2.fillna(0)
fm2

Unnamed: 0,year,all_total,2P_total,married,unmarried,1P_total,mother_only,father_only
0,2022,36994.0,26105.0,24040.0,2065.0,10889.0,8657.0,2232.0
1,2021,37044.0,25953.0,23992.0,1961.0,11091.0,8797.0,2294.0
2,2020,36881.0,26161.0,24477.0,1684.0,10720.0,8625.0,2094.0
3,2019,37381.0,26373.0,24509.0,1864.0,11008.0,8880.0,2128.0
4,2018,37706.0,26384.0,24555.0,1829.0,11322.0,9232.0,2090.0
5,2017,37870.0,26203.0,24465.0,1738.0,11667.0,9497.0,2170.0
6,2016,38276.0,26462.0,24638.0,1824.0,11814.0,9781.0,2033.0
7,2015,38642.0,26862.0,24857.0,2005.0,11780.0,9891.0,1889.0
8,2014,38586.0,26712.0,24775.0,1937.0,11874.0,9929.0,1945.0
9,2013,38576.0,26569.0,24677.0,1892.0,12007.0,10007.0,2000.0


In [9]:
#convert datatypes
fm2 = fm2.astype({'all_total': 'int64', '2P_total': 'int64', 'married': 'int64', 'unmarried': 'int64', '1P_total': 'int64', 'mother_only': 'int64', 'father_only': 'int64'})
fm2.head(2)

Unnamed: 0,year,all_total,2P_total,married,unmarried,1P_total,mother_only,father_only
0,2022,36994,26105,24040,2065,10889,8657,2232
1,2021,37044,25953,23992,1961,11091,8797,2294


In [10]:
fm2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44 entries, 0 to 43
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   year         44 non-null     object
 1   all_total    44 non-null     int64 
 2   2P_total     44 non-null     int64 
 3   married      44 non-null     int64 
 4   unmarried    44 non-null     int64 
 5   1P_total     44 non-null     int64 
 6   mother_only  44 non-null     int64 
 7   father_only  44 non-null     int64 
dtypes: int64(7), object(1)
memory usage: 2.9+ KB


fm2.to_csv('../data/single_parent/census/historical_family_tables/fm2_parent_child_situations.csv', index = False)

In [19]:
fm2_w = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=64, nrows=61)
fm2_w

Unnamed: 0,.White,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,"..2022a,y",27500,,20645,19075,1569.0,,6855,5260,1596
1,"..2021a,y,r",27554,,20503,19060,1443.0,,7051,5381,1670
2,"..2021a,y",27399,,20400,18959,1440.0,,6999,5362,1637
3,"..2020a,y",27613,,20712,19425,1287.0,,6901,5341,1560
4,"..2019a,y",28120,,21004,19589,1415.0,,7116,5544,1572
5,"..2018a, x",28226,,21008,19630,1378.0,,7218,5639,1579
6,"..2017a, x",28489,,20925,19612,1313.0,,7564,5917,1647
7,"..2016a, x",28879,,21272,19899,1373.0,,7607,6019,1588
8,"..2016b, x",29534,,21663,20249,1414.0,,7871,6240,1631
9,"..2015a, x",29303,,21664,20188,1476.0,,7639,6177,1462


In [24]:
fm2_b = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=127, nrows=61)
fm2_b

Unnamed: 0,.Black,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,"..2022a,y",5371,,2289,1952,337.0,,3083,2661,422
1,"..2021a,y,r",5383,,2269,1963,305.0,,3115,2693,422
2,"..2021a,y",5350,,2253,1949,304.0,,3097,2682,416
3,"..2020a,y",5239,,2263,2023,239.0,,2976,2622,354
4,"..2019a, y",5171,,2211,1934,277.0,,2960,2581,379
5,"..2018a, x",5395,,2183,1910,273.0,,3212,2855,357
6,"..2017a, x",5494,,2252,1996,256.0,,3242,2902,340
7,"..2016a, x",5467,,2159,1877,282.0,,3308,3007,301
8,"..2016b, x",5817,,2325,2030,295.0,,3492,3164,328
9,"..2015a, x",5471,,2209,1894,315.0,,3262,2971,291


In [25]:
fm2_h = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=190, nrows=45)
fm2_h

Unnamed: 0,.Hispanic origin***,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2022y,8216,,5544,4837,707.0,,2672,2204,468
1,"..2021y,r",8236,,5510,4854,656.0,,2727,2250,477
2,..2021y,8199,,5490,4836,655.0,,2708,2240,468
3,..2020y,8075,,5401,4851,550.0,,2673,2234,440
4,..2019y,8280,,5510,4938,572.0,,2770,2312,458
5,..2018x,8233,,5552,5014,538.0,,2681,2257,424
6,..2017x,8294,,5558,5044,514.0,,2736,2217,519
7,..2016x,8319,,5575,4964,611.0,,2744,2321,423
8,..2015x,8095,,5407,4783,624.0,,2688,2338,350
9,"..2014x, s",7996,,5230,4596,634.0,,2766,2429,337


In [31]:
fm2_fam_all = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=239, nrows=28)
fm2_fam_all

Unnamed: 0,.All races,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006,36466,,25982,25982,,,10484,8389,2095
1,..2005,36211,,25919,25919,,,10291,8270,2021
2,..2004,35944,,25793,25793,,,10151,8221,1931
3,..2003,35968,,25914,25914,,,10054,8139,1915
4,..2002,35705,,25792,25792,,,9913,8010,1903
5,..2001,35355,,25980,25980,,,9375,7538,1836
6,..2000,34605,,25248,25248,,,9357,7571,1786
7,..1999,34613,,25066,25066,,,9547,7841,1706
8,..1998,34760,,25269,25269,,,9491,7693,1798
9,..1997,34665,,25083,25083,,,9583,7874,1709


In [36]:
fm2_fam_w = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=270, nrows=32)
fm2_fam_w

Unnamed: 0,.White,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006a,28937,,22010,22010,,,6927,5283,1644
1,..2006b,29394,,22285,22285,,,7110,5424,1686
2,..2005a,28824,,21902,21902,,,6922,5328,1594
3,..2005b,29306,,22202,22202,,,7104,5475,1629
4,..2004a,28410,,21769,21769,,,6641,5203,1438
5,..2004b,28869,,22052,22052,,,6817,5330,1487
6,..2003a,28641,,21980,21980,,,6661,5155,1506
7,..2003b,29064,,22238,22238,,,6826,5272,1554
8,..2002,28537,,21978,21978,,,6559,5052,1507
9,..2001,28567,,22220,22220,,,6347,4870,1476


In [43]:
fm2_fam_b = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=305, nrows=32)
fm2_fam_b

Unnamed: 0,.Black,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006a,4967,,2021,2021,,,2947,2639,308
1,..2006b,5138,,2100,2100,,,3039,2712,327
2,..2005a,4825,,2023,2023,,,2803,2512,291
3,..2005b,4945,,2074,2074,,,2871,2572,299
4,..2004a,4973,,2035,2035,,,2938,2582,356
5,..2004b,5104,,2102,2102,,,3002,2640,362
6,..2003a,4958,,2082,2082,,,2876,2591,285
7,..2003b,5068,,2139,2139,,,2929,2630,299
8,..2002,5065,,2148,2148,,,2917,2593,324
9,..2001,4760,,2107,2107,,,2653,2372,281


In [48]:
fm2_fam_h = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=340, nrows=28)
fm2_fam_h

Unnamed: 0,.Hispanic origin***,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006,6119.0,,4334.0,4334.0,,,1785.0,1470.0,315.0
1,..2005,5993.0,,4164.0,4164.0,,,1828.0,1504.0,324.0
2,..2004,5837.0,,4086.0,4086.0,,,1751.0,1422.0,329.0
3,..2003,5704.0,,4001.0,4001.0,,,1703.0,1357.0,346.0
4,..2002,5343.0,,3754.0,3754.0,,,1589.0,1259.0,330.0
5,..2001,5062.0,,3628.0,3628.0,,,1434.0,1153.0,281.0
6,..2000,4814.0,,3423.0,3423.0,,,1391.0,1145.0,246.0
7,..1999,4614.0,,3218.0,3218.0,,,1396.0,1174.0,222.0
8,..1998,4475.0,,3121.0,3121.0,,,1354.0,1121.0,233.0
9,..1997,4305.0,,2962.0,2962.0,,,1342.0,1138.0,204.0


In [49]:
fm2_rel_sub_all = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=371, nrows=28)
fm2_rel_sub_all

Unnamed: 0,.All races,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006,2425,,472,472,,,1953,1606,347
1,..2005,2619,,528,528,,,2091,1704,387
2,..2004,2578,,571,571,,,2006,1710,296
3,..2003,2375,,519,519,,,1856,1596,260
4,..2002,2322,,465,465,,,1857,1588,269
5,..2001,2452,,536,536,,,1916,1665,251
6,..2000,2346,,512,512,,,1834,1633,201
7,..1999,2328,,456,456,,,1872,1591,281
8,..1998,2348,,425,425,,,1923,1673,250
9,..1997,2360,,465,465,,,1895,1651,244


In [51]:
fm2_rel_sub_w = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=402, nrows=32)
fm2_rel_sub_w

Unnamed: 0,.White,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006a,1616,,383,383,,,1233,1009,224
1,..2006b,1689,,395,395,,,1294,1060,234
2,..2005a,1733,,383,383,,,1349,1102,247
3,..2005b,1780,,392,392,,,1388,1131,257
4,..2004a,1696,,419,419,,,1277,1081,196
5,..2004b,1755,,435,435,,,1320,1118,202
6,..2003a,1595,,417,417,,,1178,1003,175
7,..2003b,1634,,426,426,,,1208,1030,178
8,..2002,1638,,391,391,,,1247,1060,187
9,..2001,1648,,430,430,,,1218,1051,167


In [53]:
fm2_rel_sub_b = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=437, nrows=32)
fm2_rel_sub_b

Unnamed: 0,.Black,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006a,588,,29,29,,,559,460,99
1,..2006b,613,,33,33,,,581,478,103
2,..2005a,615,,43,43,,,572,476,96
3,..2005b,641,,44,44,,,598,492,106
4,..2004a,628,,56,56,,,572,493,79
5,..2004b,646,,59,59,,,588,506,82
6,..2003a,576,,39,39,,,537,475,62
7,..2003b,593,,44,44,,,549,486,63
8,..2002,525,,30,30,,,495,437,58
9,..2001,637,,38,38,,,599,526,73


In [54]:
fm2_rel_sub_h = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=472, nrows=28)
fm2_rel_sub_h

Unnamed: 0,.Hispanic origin***,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006,687.0,,182.0,182.0,,,505.0,410.0,95.0
1,..2005,658.0,,166.0,166.0,,,492.0,384.0,108.0
2,..2004,655.0,,179.0,179.0,,,476.0,389.0,88.0
3,..2003,674.0,,200.0,200.0,,,474.0,390.0,84.0
4,..2002,633.0,,173.0,173.0,,,460.0,389.0,70.0
5,..2001,661.0,,181.0,181.0,,,480.0,415.0,66.0
6,..2000,596.0,,194.0,194.0,,,402.0,347.0,55.0
7,..1999,497.0,,129.0,129.0,,,368.0,319.0,49.0
8,..1998,483.0,,100.0,100.0,,,384.0,342.0,42.0
9,..1997,477.0,,137.0,137.0,,,340.0,297.0,43.0


In [57]:
fm2_unrel_sub_all = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=503, nrows=28)
fm2_unrel_sub_all

Unnamed: 0,.All races,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006,483,,14,14,,,468,409,59
1,..2005,488,,35,35,,,453,392,61
2,..2004,480,,13,13,,,467,406,61
3,..2003,503,,12,12,,,491,407,84
4,..2002,446,,15,15,,,431,371,59
5,..2001,538,,14,14,,,524,455,70
6,..2000,544,,11,11,,,534,477,57
7,..1999,488,,16,16,,,472,408,64
8,..1998,549,,14,14,,,535,463,72
9,..1997,594,,29,29,,,564,487,77


In [60]:
fm2_unrel_sub_w = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=534, nrows=32)
fm2_unrel_sub_w

Unnamed: 0,.White,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006a,394.0,,11.0,11.0,,,383.0,337.0,46.0
1,..2006b,408.0,,11.0,11.0,,,398.0,349.0,49.0
2,..2005a,403.0,,34.0,34.0,,,369.0,317.0,52.0
3,..2005b,419.0,,34.0,34.0,,,384.0,331.0,53.0
4,..2004a,363.0,,11.0,11.0,,,352.0,308.0,44.0
5,..2004b,381.0,,12.0,12.0,,,370.0,326.0,44.0
6,..2003a,403.0,,12.0,12.0,,,391.0,313.0,78.0
7,..2003b,418.0,,12.0,12.0,,,406.0,328.0,78.0
8,..2002,375.0,,12.0,12.0,,,363.0,311.0,51.0
9,..2001,454.0,,13.0,13.0,,,441.0,387.0,54.0


In [62]:
fm2_unrel_sub_b = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=569, nrows=32)
fm2_unrel_sub_b

Unnamed: 0,.Black,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006a,53.0,,1,1,,,53.0,45.0,8
1,..2006b,56.0,,1,1,,,55.0,47.0,8
2,..2005a,55.0,,-,-,,,55.0,49.0,6
3,..2005b,56.0,,-,-,,,56.0,50.0,6
4,..2004a,87.0,,-,-,,,87.0,70.0,17
5,..2004b,94.0,,-,-,,,94.0,78.0,17
6,..2003a,64.0,,-,-,,,64.0,58.0,6
7,..2003b,69.0,,-,-,,,69.0,63.0,6
8,..2002,45.0,,1,1,,,44.0,37.0,7
9,..2001,61.0,,-,-,,,61.0,49.0,12


In [64]:
fm2_unrel_sub_b = pd.read_excel('../data/single_parent/census/historical_family_tables/fm2.xls', skiprows=604, nrows=28)
fm2_unrel_sub_b

Unnamed: 0,.Hispanic origin***,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,..2006,99.0,,8.0,8.0,,,91.0,77.0,14
1,..2005,101.0,,15.0,15.0,,,86.0,76.0,10
2,..2004,80.0,,8.0,8.0,,,72.0,60.0,12
3,..2003,90.0,,9.0,9.0,,,81.0,61.0,20
4,..2002,74.0,,8.0,8.0,,,66.0,56.0,10
5,..2001,87.0,,10.0,10.0,,,77.0,76.0,1
6,..2000,92.0,,9.0,9.0,,,84.0,73.0,11
7,..1999,82.0,,7.0,7.0,,,75.0,67.0,8
8,..1998,63.0,,12.0,12.0,,,51.0,37.0,14
9,..1997,88.0,,15.0,15.0,,,72.0,64.0,8


fm1_fam_type.to_csv('../data/single_parent/census/historical_family_tables/fm1_fam_type.csv', index = False)

NameError: name 'fm2' is not defined