
This file contains code for use with "Think Stats",<br>
by Allen B. Downey, available from greenteapress.com<br>
Copyright 2014 Allen B. Downey<br>
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html<br>


In [None]:
from __future__ import print_function, division

In [None]:
import numpy as np
import sys

In [None]:
import nsfg
import thinkstats2

In [None]:
import os

Set the directory

In [None]:
os.chdir(r"C:\Users\champ\OneDrive\Documents\DSC530 Data Exploration\ThinkStats2\code")

Verify the current working directory

In [None]:
print(os.getcwd())

In [None]:
def ReadFemResp(dct_file='2002FemResp.dct',
                dat_file='2002FemResp.dat.gz',
                nrows=None):
    """Reads the NSFG respondent data.
    dct_file: string file name
    dat_file: string file name
    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    CleanFemResp(df)
    return df

In [None]:
def CleanFemResp(df):
    """Recodes variables from the respondent frame.
    df: DataFrame
    """
    pass

In [None]:
def ValidatePregnum(resp):
    """Validate pregnum in the respondent file.
    resp: respondent DataFrame
    """
    # read the pregnancy frame
    preg = nsfg.ReadFemPreg()

    # make the map from caseid to list of pregnancy indices
    preg_map = nsfg.MakePregMap(preg)
    
    # iterate through the respondent pregnum series
    for index, pregnum in resp.pregnum.items():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from the respondent file equals
        # the number of records in the pregnancy file
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False
    return True

In [None]:
def main(script):
    """Tests the functions in this module.
    script: string script name
    """
    resp = ReadFemResp()
    assert(len(resp) == 7643)
    assert(resp.pregnum.value_counts()[1] == 1267)
    assert(ValidatePregnum(resp))
    print('%s: All tests passed.' % script)

In [None]:
if __name__ == '__main__':
    main(*sys.argv)