In [None]:
import sys
import numpy as np
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
import cPickle as pickle
import glob

In [None]:
fullDF = pd.read_csv('/home/welling/git/synecoace/data/nsch_2016_topical.csv')
fullDF = fullDF.reset_index()
print fullDF.columns

In [None]:
subDF=fullDF[['ACE1', 'ACE3', 'ACE4', 'ACE5', 'ACE6', 'ACE7', 'ACE8', 'ACE9', 'ACE10', 'FWC', 'index', 'YEAR', 'FPL', 'SC_AGE_YEARS','K4Q32X01', 'K7Q30', 'K7Q31', 'AGEPOS4']]
subDF = subDF.dropna()
print len(subDF)


In [None]:
subDF['ACETOT'] = subDF['ACE1'] + 15 - (subDF['ACE3'] + subDF['ACE4'] + subDF['ACE5'] + subDF['ACE6']
                        + subDF['ACE7'] + subDF['ACE8'] + subDF['ACE9'] + subDF['ACE10'])

In [None]:
def mkSamps(df, nSamp):
    fracWt = df['FWC']/df['FWC'].sum()
    choices = np.random.choice(len(df), nSamp, p=fracWt)
    return df.iloc[choices].drop(columns=['FWC'])
nSamp = 10
print mkSamps(subDF, nSamp)


In [None]:
COLUMN_DICT = {key : idx for idx, key in enumerate(mkSamps(subDF, 1).columns)}
print COLUMN_DICT
INV_COLUMN_DICT = {val:key for key, val in COLUMN_DICT.items()}
print INV_COLUMN_DICT

In [None]:
ageDFD = {}
for age in range(6,18):
    ageDFD[age] = subDF[subDF.SC_AGE_YEARS==age]
    print '%s: %s' % (age, len(ageDFD[age]))

In [None]:
plt.hist(subDF['FPL'])
plt.show()

In [None]:
lowFPLageDFD = {}
highFPLageDFD = {}

for age in range(6, 18):
    df = ageDFD[age]
    print age, df.columns
    lowFPLageDFD[age] = df[df.FPL <= 100]
    highFPLageDFD[age] = df[df.FPL >= 300]

In [None]:
lowFPLageDFD[6]

In [None]:
ageL = range(6, 18)
lowL = []
highL = []
for age in ageL:
    samps = mkSamps(lowFPLageDFD[age], 1000)
    lowL.append(samps['ACETOT'].mean())
    samps = mkSamps(highFPLageDFD[age], 1000)
    highL.append(samps['ACETOT'].mean())
plt.plot(ageL, lowL, label='FPL <= 100')
plt.plot(ageL, highL, label='FPL >= 300')
plt.legend()
plt.show()


In [None]:
%config InlineBackend.print_figure_kwargs = {'bbox_inches':None}
plt.rcParams["figure.figsize"] = [16, 10]

def calcRange(samps):
    v = samps['ACETOT'].mean()
    sigma = samps['ACETOT'].std()
    return v - sigma, v, v + sigma

ageL = range(6, 18)
lowL = []
lowLBL = []
lowHBL = []
highL = []
highLBL = []
highHBL = []
artistPairL = []
labelL = []
for age in ageL:
    vL, v, vH = calcRange(mkSamps(lowFPLageDFD[age], 10000))
    lowL.append(v)
    lowLBL.append(vL)
    lowHBL.append(vH)
    vL, v, vH = calcRange(mkSamps(highFPLageDFD[age], 10000))
    highL.append(v)
    highLBL.append(vL)
    highHBL.append(vH)
fig, axes = plt.subplots(1,1)
lineArtist, = axes.plot(ageL, lowL, '-')
fillArtist = axes.fill_between(ageL, lowLBL, lowHBL, alpha=0.4)
artistPairL.append((lineArtist, fillArtist))
labelL.append('FPL <= 100 mean +- stdv')
lineArtist, = axes.plot(ageL, highL, '-')
fillArtist = axes.fill_between(ageL, highLBL, highHBL, alpha=0.4)
artistPairL.append((lineArtist, fillArtist))
labelL.append('FPL >= 300 mean += stdv')
axes.legend(artistPairL, labelL, handler_map={tuple: HandlerTuple()})
axes.set_title('Total ACEs Rise Over Time')
axes.set_xlabel('Age')
axes.set_ylabel('Total ACEs')
plt.show()


In [None]:
with open('sav.pkl', 'rU') as f:
    sampsByYearD = pickle.load(f)
print sampsByYearD.keys()

In [None]:
print sampsByYearD[6].columns

In [None]:
for year in range(6, 18):
    samps = sampsByYearD[year]
    if isinstance(samps, pd.DataFrame):
        samps = samps.values
    print 'year ', year
    #print samps[:, COLUMN_DICT['index']]
    print samps[:, COLUMN_DICT['ACETOT']].mean()
    print samps[:, COLUMN_DICT['FPL']].mean()

In [None]:
sampDL = []
for fname in glob.glob('sav_tweaked.pkl'):
    print fname
    with open(fname, 'rU') as f:
        sampDL.append(pickle.load(f))

In [None]:
sampsByYearD = {}
for dct in sampDL:
    for year in range(6, 18):
        if year not in sampsByYearD:
            sampsByYearD[year] = []
        samps = dct[year]
        if isinstance(samps, pd.DataFrame):
            samps = samps.values
        sampsByYearD[year].append(samps[:, COLUMN_DICT['ACETOT']].mean())
print sampsByYearD

In [None]:
d = sampDL[3]
samps = d[10]
samps

In [None]:
d = sampDL[3]
samps = d[11]
samps[:, COLUMN_DICT['ACETOT']]

In [None]:
%config InlineBackend.print_figure_kwargs = {'bbox_inches':None}
plt.rcParams["figure.figsize"] = [16, 10]

def calcRange(samps):
    v = samps['ACETOT'].mean()
    sigma = samps['ACETOT'].std()
    return v - sigma, v, v + sigma

ageL = range(6, 18)
lowL = []
lowLBL = []
lowHBL = []
highL = []
highLBL = []
highHBL = []
artistPairL = []
labelL = []
for age in ageL:
    vL, v, vH = calcRange(mkSamps(lowFPLageDFD[age], 10000))
    lowL.append(v)
    lowLBL.append(vL)
    lowHBL.append(vH)
    vL, v, vH = calcRange(mkSamps(highFPLageDFD[age], 10000))
    highL.append(v)
    highLBL.append(vL)
    highHBL.append(vH)
fig, axes = plt.subplots(1,1)
lineArtist, = axes.plot(ageL, lowL, '-')
fillArtist = axes.fill_between(ageL, lowLBL, lowHBL, alpha=0.4)
artistPairL.append((lineArtist, fillArtist))
labelL.append('FPL <= 100 mean +- stdv')
lineArtist, = axes.plot(ageL, highL, '-')
fillArtist = axes.fill_between(ageL, highLBL, highHBL, alpha=0.4)
artistPairL.append((lineArtist, fillArtist))
axes.plot(ageL, [np.mean(sampsByYearD[age]) for age in ageL], 'x-')
#axes.boxplot([sampsByYearD[age] for age in ageL], positions=ageL)
labelL.append('FPL >= 300 mean += stdv')
axes.legend(artistPairL, labelL, handler_map={tuple: HandlerTuple()})
axes.set_title('Total ACEs Rise Over Time')
axes.set_xlabel('Age')
axes.set_ylabel('Total ACEs')
plt.show()


In [None]:
    for gpVal, label in zip(gpValL, labelL):
        dayV = allSelMedian[allSelMedian[gpKey]==gpVal][dayKey]
        prevV = allSelMedian[allSelMedian[gpKey]==gpVal][valKey]
        q1V = allSelQ1[allSelQ1[gpKey]==gpVal][valKey]
        q3V = allSelQ3[allSelQ3[gpKey]==gpVal][valKey]
        lineArtist, = axes.plot(dayV, prevV, '-')
        fillArtist = axes.fill_between(dayV, q1V, q3V, alpha=0.4)
        artistPairs.append((lineArtist, fillArtist))
    return artistPairs, [(lbl + ' median and IQR') for lbl in labelL]
