In [1]:
import os
import numpy as np
from collections import defaultdict
import pandas as pd
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import networkx as nx

from data_conversions import selectData, transformData
from data_conversions import floatifyData, boolifyData, quantizeData
from sampling import mkSamps, toHisto, toProbV
from sampling import createSparseBinner as createBinner
from metropolis import mutualInfo, genMetropolisSamples
from agent_based_model import createWeightedSamplesGenerator, select_subset, createWeightSer, lnLik
from agent_based_model import load_dataset

In [2]:
    subDF, acesL, boolColL, scalarColL, fixedL, passiveL, ageL = load_dataset()

    ageMin = int(min(ageL))
    ageMax = int(max(ageL))
    scalarColL.remove('AGE')
    print('scalar columns: ', scalarColL)
    ageDFD = {}
    range_d = None
    for age in range(ageMin, ageMax+1):
        ageDF = subDF[subDF.AGE==age].drop(columns=['AGE', 'FIPSST'])
        ageDFD[age], _, _, _, dct = quantizeData(ageDF, acesL, boolColL, scalarColL)
        if range_d is None:
            range_d = dct
        else:
            assert dct == range_d, 'Quantized ranges do not match?'    


Index(['FIPSST', 'HHID', 'FORMTYPE', 'TOTKIDS_R', 'HHLANGUAGE', 'SC_AGE_YEARS',
       'SC_SEX', 'K2Q35A_1_YEARS', 'MOMAGE', 'K6Q41R_STILL',
       ...
       'BIRTHWT', 'CBSAFP_YN', 'FWC', 'CURRINS', 'INSTYPE', 'HOUSE_GEN',
       'HHSIZE_I', 'HHSIZE_IF', 'FPL_IF', 'INSGAP'],
      dtype='object', length=422)
scalar columns:  ['BIRTHORDER', 'FPL', 'MOMAGE', 'TOTACES', 'TOTCSHCN', 'TOTKIDS']


In [3]:
df = ageDFD[ageMin]
df = df[df.RECIDX==42694]
df
proto = df

In [5]:
proto = pd.read_pickle('poor_black_male.pkl')
proto

Unnamed: 0,FWC,RECIDX,DRUGSALCOHOL,MENTALILL,PARENTDIED,PARENTDIVORCED,PARENTJAIL,RACISM,SEEPUNCH,VIOLENCE,...,SC_RACE_WHITE,SPORTSTEAMS,VISIONCARE,BIRTHORDER_minusone,FPL,MOMAGE_LT_20,MOMAGE_GT_39,TOTACES,TOTCSHCN,TOTKIDS_minusone
0,1.0,-1,0,0,0,0,0,0,0,0,...,0,1,0,2,0,0,0,0,1,2


In [6]:
rec_d = {}
for col in ageDFD[ageMin].columns:
    if col in range_d:
        val = input('%s (0 - %d, proto is %d):' % (col, range_d[col]-1, proto[col]))
        rec_d[col] = int(val)
    else:
        print('%s: no range' % col)
        val = {'FWC': 1.0, 'RECIDX':-1}[col]
        rec_d[col] = val

FWC: no range
RECIDX: no range
DRUGSALCOHOL (0 - 1, proto is 0):0
MENTALILL (0 - 1, proto is 0):0
PARENTDIED (0 - 1, proto is 0):0
PARENTDIVORCED (0 - 1, proto is 0):0
PARENTJAIL (0 - 1, proto is 0):0
RACISM (0 - 1, proto is 0):0
SEEPUNCH (0 - 1, proto is 0):0
VIOLENCE (0 - 1, proto is 0):0
BIRTHWT_L (0 - 1, proto is 0):0
BIRTHWT_VL (0 - 1, proto is 0):0
CLUBS (0 - 1, proto is 1):1
DENTALCARE (0 - 1, proto is 1):1
DOCTORVISIT (0 - 1, proto is 1):1
HHLANGUAGE_ENGLISH (0 - 1, proto is 1):1
HHLANGUAGE_SPANISH (0 - 1, proto is 0):0
PREMATURE (0 - 1, proto is 0):0
SC_CSHCN (0 - 1, proto is 0):0
SC_FEMALE (0 - 1, proto is 0):0
SC_RACE_ASIAN (0 - 1, proto is 0):0
SC_RACE_BLACK (0 - 1, proto is 1):0
SC_RACE_HISPANIC (0 - 1, proto is 0):0
SC_RACE_ISLANDS (0 - 1, proto is 0):0
SC_RACE_MIXED (0 - 1, proto is 0):0
SC_RACE_NATIVE (0 - 1, proto is 0):0
SC_RACE_OTHER (0 - 1, proto is 0):0
SC_RACE_WHITE (0 - 1, proto is 0):1
SPORTSTEAMS (0 - 1, proto is 1):1
VISIONCARE (0 - 1, proto is 0):0
BIRTHORDER

In [7]:
df = pd.DataFrame(rec_d, columns=ageDFD[ageMin].columns, index=[0])
df

Unnamed: 0,FWC,RECIDX,DRUGSALCOHOL,MENTALILL,PARENTDIED,PARENTDIVORCED,PARENTJAIL,RACISM,SEEPUNCH,VIOLENCE,...,SC_RACE_WHITE,SPORTSTEAMS,VISIONCARE,BIRTHORDER_minusone,FPL,MOMAGE_LT_20,MOMAGE_GT_39,TOTACES,TOTCSHCN,TOTKIDS_minusone
0,1.0,-1,0,0,0,0,0,0,0,0,...,1,1,0,2,0,0,0,0,1,2


In [8]:
df.to_pickle('poor_white_male.pkl')