In [1]:
import numpy as np
import pandas as pd

In [2]:
persons =pd.read_csv('synthetic_persons.csv') # focus on persons
persons.head()

Unnamed: 0,tract,mgra,household_id,SPORDER,AGEP,SEX,ESR,COW,WKHP,SCHG,RAC1P,HISP,MIL,isWorker
0,18100.0,245,1,1,52,2,1.0,1.0,40.0,,1,2,4.0,1
1,18100.0,245,1,2,39,1,3.0,1.0,40.0,,1,2,4.0,0
2,18100.0,245,1,3,79,2,6.0,,,,1,2,4.0,0
3,18100.0,245,2,1,25,2,4.0,5.0,50.0,15.0,6,2,1.0,1
4,18100.0,245,2,2,24,1,1.0,6.0,20.0,15.0,1,1,2.0,1


In [3]:
households=pd.read_csv('synthetic_households.csv')
households.head()


Unnamed: 0,household_id,tract,mgra,NP,HHADJINC,ADJINC,HHT,WIF,HUPAC,VEH,numWorkers,GQ_type
0,1,18100.0,245,3,121500.0,1010145,3.0,2.0,4.0,3.0,1.0,
1,2,18100.0,245,3,75400.0,1010145,1.0,2.0,1.0,2.0,2.0,
2,3,18100.0,245,3,45800.0,1010145,3.0,1.0,4.0,1.0,1.0,
3,4,18100.0,245,3,70000.0,1010145,1.0,2.0,1.0,2.0,2.0,
4,5,18100.0,245,1,35700.0,1010145,6.0,,4.0,1.0,1.0,


11.1 All ESR b, 3, 6 have 'isWorker'=0 
- if less than 16 yo (b) or unemployed (3) or not in labor force (6), their `is_worker` code is 0

In [54]:
persons[(np.isnan(persons['ESR'])) | (persons['ESR'] == 3) | (persons['ESR'] == 6)]['isWorker'].unique()

array([0], dtype=int64)

True

11.2 All ESR b have age < 16 
- if less than 16 (b), age also less than 16

In [57]:
less_than_16 = persons[np.isnan(persons['ESR'])]
less_than_16.head()

Unnamed: 0,tract,mgra,household_id,SPORDER,AGEP,SEX,ESR,COW,WKHP,SCHG,RAC1P,HISP,MIL,isWorker
5,18100.0,245,2,3,4,2,,,,1.0,6,2,,0
11,18100.0,245,4,3,5,2,,,,2.0,1,3,,0
16,18100.0,245,7,3,1,1,,,,,3,2,,0
19,18100.0,245,8,3,10,2,,,,7.0,1,1,,0
20,18100.0,245,8,4,9,1,,,,6.0,1,1,,0


In [56]:
(less_than_16['AGEP']<16).all()

True

True

11.3 All ESR b has COW of b 
- if less than 16, not in universe (b)

In [8]:
(less_than_16['COW']).unique()

array([nan])

True

11.4 All ESR 3 or 6 have 'isWorker'= 0 
- if unemployed (3) or not in labor force (6), they're not workers (0)

In [9]:
persons[(persons['ESR'] == 3) | (persons['ESR'] == 6)]['isWorker'].unique()

array([0], dtype=int64)

True

11.5 All ESR 3 or 6 have COW of b or 9 
- if unemployed (3) or not in labor force (6), they're not in universe (b) or unemployed (9)

In [10]:
persons[(persons['ESR'] == 3) | (persons['ESR'] == 6)]['COW'].unique()

array([ 1., nan,  6.,  3.,  2.,  4.,  9.,  7.,  5.,  8.])

#### False, marking unemployed people in ESR as employed in COW

In [30]:
persons[(persons['ESR'] == 3) | (persons['ESR'] == 6)].to_csv('persons_with_ESR_3_or_6.csv')

see output to learn more about persons whose ESR = 3 or 6

11.6 Range of hours worked for (ESR 1 or 4) and (ESR 2 and 5) is reasonable

In [31]:
persons[(persons['ESR'] == 1) | (persons['ESR'] == 4)]['WKHP'].min()

1.0

In [32]:
persons[(persons['ESR'] == 1) | (persons['ESR'] == 4)]['WKHP'].max()

99.0

In [33]:
persons[(persons['ESR'] == 2) | (persons['ESR'] == 5)]['WKHP'].min()

1.0

In [34]:
persons[(persons['ESR'] == 2) | (persons['ESR'] == 5)]['WKHP'].max()

99.0

99 is a bit too high but if someone is working two jobs and working 14 hours a day, it's possible so pass

11.7 ESR 4 or 5 have MIL of 1 or 3
- if in armed forces (4, 5), MIL says on active duty (1, 3) 

In [13]:
persons[(persons['ESR'] == 4) | (persons['ESR'] == 5)]['MIL'].unique()

array([1., 3.])

11.8 Is there a pattern in COW values for those with MIL code != b or 4
- if served in military (1,2,3 in MIL), how do their work setting vary?

In [39]:
persons[~((np.isnan(persons['MIL'])) | (persons['MIL'] == 4))]['COW'].unique()

array([ 5.,  6.,  1., nan,  2.,  7.,  3.,  4.,  8.,  9.])

No variation; they work in all sorts of industry like private or public or self-employed

11.9 All MIL b and ESR b have age < 17
- if under 17 (MIL.b) or under 16 (ESR.b), does age correctly identify them as under 17?

In [41]:
persons[pd.isna(persons['MIL'])]['AGEP'].unique()

array([ 4,  5,  1, 10,  9,  6,  2,  3, 16, 13,  8,  7,  0, 12, 15, 14, 11],
      dtype=int64)

In [42]:
persons[pd.isna(persons['ESR'])]['AGEP'].unique()

array([ 4,  5,  1, 10,  9,  6,  2,  3, 13,  8,  7,  0, 12, 15, 14, 11],
      dtype=int64)

True

11.10 All MIL 1 have ESR = 4 or 5  
- if in active duty (1), are they under armed forces (4, 5)?

In [25]:
persons[persons['MIL'] == 1]['ESR'].unique()

array([4., 5.])

True

11.11 All ESR 4 or 5 have age <= 60
- if on armed forces (4,5), are they all younger than 61?

In [49]:
persons[(persons['ESR'] == 4) | (persons['ESR'] == 5)]['AGEP'].unique();;///////'''']]'''

array([25, 35, 34, 26, 27, 36, 46, 22, 21, 30, 23, 32, 33, 37, 29, 31, 40,
       28, 48, 24, 20, 52, 39, 47, 54, 41, 42, 38, 49, 50, 44, 19, 43, 45,
       60, 51, 57, 18, 17], dtype=int64)

True

11.12 All MIL 1 has isWorker 1 and ESR 4 or 5. Pattern in their COWs?
- if on active duty (1), they're all working (1) and they are armed forces (4, 5). Plus, is there a variation in the type of job they perform (COWs)?

In [59]:
persons[persons['MIL'] == 1]['isWorker'].unique()

array([1], dtype=int64)

In [60]:
persons[persons['MIL'] == 1]['ESR'].unique()

array([4., 5.])

In [62]:
persons[persons['MIL'] == 1]['COW'].unique() # ans 5 is federal government employee

array([5.])

True

11.13 All MIL = 1 or 3 have age <= 60
- if in active duty, aged 60 and below?

In [64]:
persons[(persons['MIL'] == 1) | (persons['MIL'] == 3)]['AGEP'].unique()

array([25, 35, 34, 26, 27, 36, 37, 72, 46, 83, 22, 21, 30, 23, 32, 33, 29,
       31, 40, 28, 65, 48, 80, 24, 59, 43, 82, 71, 20, 50, 76, 70, 74, 69,
       52, 89, 54, 67, 39, 75, 49, 86, 79, 47, 60, 42, 58, 68, 81, 73, 55,
       57, 78, 84, 90, 38, 41, 53, 66, 64, 19, 44, 77, 61, 45, 63, 94, 56,
       51, 87, 62, 85, 18, 17], dtype=int64)

We notice some who are older than 60. Is it related to those who are now in active duty (1) or only on active duty for training (3)?

In [65]:
persons[(persons['MIL'] == 1)]['AGEP'].unique()

array([25, 35, 34, 26, 27, 36, 46, 22, 21, 30, 23, 32, 33, 37, 29, 31, 40,
       28, 48, 24, 20, 52, 39, 47, 54, 41, 42, 38, 49, 50, 44, 19, 43, 45,
       60, 51, 57, 18, 17], dtype=int64)

In [66]:
persons[(persons['MIL'] == 3)]['AGEP'].unique()

array([37, 72, 83, 34, 27, 25, 65, 80, 33, 59, 43, 82, 71, 30, 50, 76, 70,
       74, 69, 89, 54, 67, 75, 49, 86, 46, 79, 60, 42, 58, 23, 68, 81, 73,
       55, 32, 48, 57, 20, 47, 40, 78, 21, 84, 90, 38, 31, 53, 66, 64, 39,
       19, 44, 35, 77, 52, 61, 28, 63, 29, 26, 94, 56, 87, 36, 62, 85, 45],
      dtype=int64)

Those who are in active duty for training are older than 60

In [28]:
persons[(persons['COW'] == 6) | (persons['COW'] == 7) | (persons['COW'] == 8)].shape[0]

252655