In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

%matplotlib inline

#### ch1 - Living Arrangements of Children Under 18 Years Old: 1960 to Present (Numbers in thousands)
Source:  U.S. Census Bureau, Current Population Survey, March and Annual Social and Economic Supplements, 2014 and earlier. Source of Decennial Census data:  1980 Census of Population, PC80-2-4B, "Living Arrangements of Children and Adults," Table 1.  1970 Census of Population, PC(2)-4B, "Persons by Family Characteristics," Table 1.

In [20]:
ch7 = pd.read_excel('../data/single_parent/census/historical_living_arrangements_of_children/ch7_grandchildren_living_with_grandparents_1970_2014.xls', skiprows=8, nrows=30)
pd.set_option('display.max_rows', None)
ch7

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Both parents present,Mother only present,Father only present,No parent(s) present
0,,,,Total,,,,
1,,,,,,,,
2,"2014x, s",73692.0,,4834,926.0,2078.0,240.0,1591.0
3,2013x,73910.0,,4556,868.0,1984.0,273.0,1431.0
4,2012x,73817.0,,4573,832.0,2035.0,252.0,1455.0
5,"2011x, r",74002.0,,4907,992.0,2051.0,207.0,1656.0
6,2011x,74630.0,,4968,1013.0,2093.0,213.0,1648.0
7,2010x,74718.0,,4851,823.0,2137.0,236.0,1655.0
8,2009x,74230.0,,4474,760.0,1965.0,210.0,1538.0
9,2008x,74104.0,,4350,761.0,1858.0,221.0,1510.0


In [21]:
#rename columns
ch7 = ch7.rename(columns = {'Unnamed: 0': 'year', 'Unnamed: 1': 'total_children', 'Unnamed: 3': 'living_with_gp',
                            'Both parents present': 'parents_present', 'Mother only present': 'm_only_present',
                            'Father only present': 'f_only_present', 'No parent(s) present': 'no_parents_present'})

#drop empty columns or columns not needed
ch7 = ch7.drop(['Unnamed: 2'], axis=1)

#drop prior index rows, drop original rows and keep revised
ch7 = ch7.drop(labels=[0, 1, 6]).reset_index(drop=True)

#remove extra spaces in column headers
ch7 = ch7.rename(columns=lambda x: x.strip())

# remove extra spaces over all strings
ch7 = ch7.applymap(lambda x: x.strip() if isinstance(x, str) else x)

ch7.head(2)

Unnamed: 0,year,total_children,living_with_gp,parents_present,m_only_present,f_only_present,no_parents_present
0,"2014x, s",73692.0,4834,926.0,2078.0,240.0,1591.0
1,2013x,73910.0,4556,868.0,1984.0,273.0,1431.0


In [23]:
ch7 = ch7.astype({'year': 'str', 'total_children': 'int64', 'living_with_gp': 'int64', 'parents_present': 'int64', 'm_only_present': 'int64', 'f_only_present': 'int64', 'no_parents_present': 'int64'})
ch7

Unnamed: 0,year,total_children,living_with_gp,parents_present,m_only_present,f_only_present,no_parents_present
0,"2014x, s",73692,4834,926,2078,240,1591
1,2013x,73910,4556,868,1984,273,1431
2,2012x,73817,4573,832,2035,252,1455
3,"2011x, r",74002,4907,992,2051,207,1656
4,2010x,74718,4851,823,2137,236,1655
5,2009x,74230,4474,760,1965,210,1538
6,2008x,74104,4350,761,1858,221,1510
7,2007x,73746,4013,709,1793,204,1307
8,2006,73664,3731,412,1650,184,1484
9,2005,73494,4141,486,1821,240,1595


In [24]:
#locate additional letters in year column
ch7_children_w_gp = ch7[ch7['year'].str.match('\d{4}[a-zA-Z]?', na=False)]
#extract only the year
ch7_children_w_gp['year'] = ch7_children_w_gp['year'].str.extract('(\d{4})', expand=False).astype(int)
#sort by descending
ch7_children_w_gp = ch7_children_w_gp.sort_values('year', ascending=False)

ch7_children_w_gp.head(2)

Unnamed: 0,year,total_children,living_with_gp,parents_present,m_only_present,f_only_present,no_parents_present
0,2014,73692,4834,926,2078,240,1591
1,2013,73910,4556,868,1984,273,1431


In [25]:
ch7_children_w_gp.to_csv('../data/single_parent/census/historical_living_arrangements_of_children/ch7_children_w_gp.csv', index = False)