In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

#### ch1 - Living Arrangements of Children Under 18 Years Old: 1960 to Present (Numbers in thousands)
Source:  Excludes householders, subfamily reference persons, and their spouses.  Based on Current Population Survey (CPS) unless otherwise indicated.

In [2]:
ch1 = pd.read_excel('../data/single_parent/census/historical_living_arrangements_of_children/ch1_living_arrangements_of_children_1960_pres.xls', skiprows=9, nrows=63)
pd.set_option('display.max_rows', None)
ch1

Unnamed: 0.1,Unnamed: 0,Total children under 18,Two parents,Unnamed: 3,One parent,Unnamed: 5,Unnamed: 6,Unnamed: 7,No parents,Unnamed: 9
0,,,,,Total,Mother only,Father only,,Other relatives,Non-relatives
1,Year,,,,,,,,,
2,2022z,73289.0,51450.0,,19224,15782,3442,,1948,667
3,"2021z, r",73817.0,51434.0,,19538,15900,3638,,2241,603
4,2021z,72568.0,50609.0,,19172,15607,3565,,2196,592
5,2020z,72901.0,51334.0,,18580,15310,3270,,2323,663
6,2019z,73525.0,51561.0,,18998,15764,3234,,2319,647
7,2018y,73741.0,50953.0,,19646,16395,3251,,2386,755
8,2017y,73781.0,50827.0,,19973,16767,3206,,2421,560
9,2016y,73745.0,50679.0,,20229,17223,3006,,2279,558


In [3]:
#rename columns
ch1 = ch1.rename(columns = {'Unnamed: 0': 'year', 'Total children under 18': 'total_children', 'Two parents': 'two_parents', 'No parents': 'with_relatives', 'Unnamed: 9': 'with_non_relatives', 'One parent': 'total_one_parent', 'Unnamed: 5': 'mother_only', 'Unnamed: 6': 'father_only'})
ch1.head(2)

#drop empty columns or columns not needed
ch1 = ch1.drop(['Unnamed: 3', 'Unnamed: 7'], axis=1)

#drop prior index rows, drop original rows and keep revised
ch1 = ch1.drop(labels=[0, 1, 4, 15, 20, 48, 59]).reset_index(drop=True)

#remove extra spaces in column headers
ch1 = ch1.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
ch1 = ch1.applymap(lambda x: x.strip() if isinstance(x, str) else x)

ch1.head(2)

Unnamed: 0,year,total_children,two_parents,total_one_parent,mother_only,father_only,with_relatives,with_non_relatives
0,2022z,73289.0,51450.0,19224,15782,3442,1948,667
1,"2021z, r",73817.0,51434.0,19538,15900,3638,2241,603


In [4]:
ch1 = ch1.astype({'year': 'str', 'total_children': 'int64', 'two_parents': 'int64', 'total_one_parent': 'int64', 'mother_only': 'int64', 'father_only': 'int64', 'with_relatives': 'int64'})
ch1

Unnamed: 0,year,total_children,two_parents,total_one_parent,mother_only,father_only,with_relatives,with_non_relatives
0,2022z,73289,51450,19224,15782,3442,1948,667
1,"2021z, r",73817,51434,19538,15900,3638,2241,603
2,2020z,72901,51334,18580,15310,3270,2323,663
3,2019z,73525,51561,18998,15764,3234,2319,647
4,2018y,73741,50953,19646,16395,3251,2386,755
5,2017y,73781,50827,19973,16767,3206,2421,560
6,2016y,73745,50679,20229,17223,3006,2279,558
7,2015y,73623,50966,19757,17006,2751,2430,469
8,"2014y, s",73692,50602,20258,17410,2848,2271,562
9,2013y,73910,50646,20531,17532,2999,2121,612


In [7]:
#locate additional letters in year column
ch1_by_children = ch1[ch1['year'].str.match('\d{4}[a-zA-Z]?', na=False)]
#extract only the year
ch1_by_children['year'] = ch1_by_children['year'].str.extract('(\d{4})', expand=False).astype(int)
#sort by descending
ch1_by_children = ch1_by_children.sort_values('year', ascending=False)

ch1_by_children.head(2)

Unnamed: 0,year,total_children,two_parents,total_one_parent,mother_only,father_only,with_relatives,with_non_relatives
0,2022,73289,51450,19224,15782,3442,1948,667
1,2021,73817,51434,19538,15900,3638,2241,603


In [None]:
ch1_by_children.info()

In [9]:
ch1_by_children.to_csv('../data/single_parent/census/historical_living_arrangements_of_children/ch1_by_children.csv', index = False)