In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

#### FM1 - Families by Presence of Own Children Under 18: 1950 to Present (Numbers in thousands)
Source:  U.S. Census Bureau, Current Population Survey, March and Annual Social and Economic Supplements, 2022 and earlier.

In [2]:
fm1 = pd.read_excel('../data/single_parent/census/historical_family_tables/fm1.xls', skiprows=7, nrows=80)
pd.set_option('display.max_rows', None)
fm1

Unnamed: 0,Year,All families,Families with own children under 18,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
0,,,Total,Married couple,One parent families,,
1,,,,,Total,Mother only,Father only
2,,,,,,,
3,2022,84265.0,33924,23345,10579,7906,2673
4,2021r,83711.0,33748,23222,10526,7859,2667
5,2021,83907.0,33579,23089,10490,7829,2661
6,2020,83677.0,33464,23618,9846,7472,2374
7,2019,83482.0,33942,23735,10207,7707,2500
8,2018,83088.0,34452,23812,10640,8156,2484
9,2017,82827.0,34292,23651,10641,8246,2395


In [3]:
#rename columns to flatten table
fm1 = fm1.rename(columns = {'Unnamed: 3': 'Married couple', 'Unnamed: 4': 'One parent families', 'Unnamed: 5': 'Mother only', 'Unnamed: 6': 'Father only'})
#drop prior index rows, drop original rows and keep revised
fm1 = fm1.drop(labels=[0, 1, 2, 5, 16, 48, 59]).reset_index(drop=True)
#remove extra spaces in column headers
fm1 = fm1.rename(columns=lambda x: x.strip())

fm1

Unnamed: 0,Year,All families,Families with own children under 18,Married couple,One parent families,Mother only,Father only
0,2022,84265.0,33924,23345,10579,7906,2673
1,2021r,83711.0,33748,23222,10526,7859,2667
2,2020,83677.0,33464,23618,9846,7472,2374
3,2019,83482.0,33942,23735,10207,7707,2500
4,2018,83088.0,34452,23812,10640,8156,2484
5,2017,82827.0,34292,23651,10641,8246,2395
6,2016,82184.0,34769,23772,10997,8525,2472
7,2015,81716.0,34979,24040,10939,8551,2388
8,2014s,81353.0,34955,23933,11022,8550,2472
9,2013,80902.0,35058,23870,11187,8627,2560


In [4]:
fm1 = fm1.astype({'Year': 'str', 'All families': 'int64', 'Families with own children under 18': 'int64', 'Married couple': 'int64', 'One parent families': 'int64', 'Mother only': 'int64', 'Father only': 'int64'})
fm1

Unnamed: 0,Year,All families,Families with own children under 18,Married couple,One parent families,Mother only,Father only
0,2022,84265,33924,23345,10579,7906,2673
1,2021r,83711,33748,23222,10526,7859,2667
2,2020,83677,33464,23618,9846,7472,2374
3,2019,83482,33942,23735,10207,7707,2500
4,2018,83088,34452,23812,10640,8156,2484
5,2017,82827,34292,23651,10641,8246,2395
6,2016,82184,34769,23772,10997,8525,2472
7,2015,81716,34979,24040,10939,8551,2388
8,2014s,81353,34955,23933,11022,8550,2472
9,2013,80902,35058,23870,11187,8627,2560


In [5]:
#locate additional letters in year column
fm1_fam_type = fm1[fm1['Year'].str.match('\d{4}[a-zA-Z]?', na=False)]
#extract only the year
fm1_fam_type['Year'] = fm1_fam_type['Year'].str.extract('(\d{4})', expand=False).astype(int)
#sort by descending
fm1_fam_type = fm1_fam_type.sort_values('Year', ascending=False)

fm1_fam_type

Unnamed: 0,Year,All families,Families with own children under 18,Married couple,One parent families,Mother only,Father only
0,2022,84265,33924,23345,10579,7906,2673
1,2021,83711,33748,23222,10526,7859,2667
2,2020,83677,33464,23618,9846,7472,2374
3,2019,83482,33942,23735,10207,7707,2500
4,2018,83088,34452,23812,10640,8156,2484
5,2017,82827,34292,23651,10641,8246,2395
6,2016,82184,34769,23772,10997,8525,2472
7,2015,81716,34979,24040,10939,8551,2388
8,2014,81353,34955,23933,11022,8550,2472
9,2013,80902,35058,23870,11187,8627,2560


In [6]:
fm1_fam_type.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 73 entries, 0 to 72
Data columns (total 7 columns):
 #   Column                               Non-Null Count  Dtype
---  ------                               --------------  -----
 0   Year                                 73 non-null     int32
 1   All families                         73 non-null     int64
 2   Families with own children under 18  73 non-null     int64
 3   Married couple                       73 non-null     int64
 4   One parent families                  73 non-null     int64
 5   Mother only                          73 non-null     int64
 6   Father only                          73 non-null     int64
dtypes: int32(1), int64(6)
memory usage: 4.3 KB


In [None]:
fm1_fam_type.to_csv('../data/single_parent/census/historical_family_tables/fm1_fam_type.csv', index = False)

Jupyter notebooks shows 20 columns only by default
#### syntax to force notebook to show all columns
pd.set_option('max_columns', None)
#### display all column names in df
print(df.columns.tolist())
#### syntax to reset columns
pd.reset_option('max_columns')