In [3]:
import os
import numpy as np
import pandas as pd
from pathlib import PosixPath
from torch.utils.data import Dataset, DataLoader
from fastai.tabular import *

# GO TO CHECK POINT

# 1. Cohort dataset

In [2]:
cohort = pd.read_csv('/data/knee_mri3/demoInfo/enrollee01.txt', header = 1, 
                     low_memory=False, sep='\t')

In [3]:
cohort.shape

(43164, 26)

In [4]:
cohort.columns

Index(['collection_id', 'enrollee01_id', 'dataset_id',
       'The NDAR Global Unique Identifier (GUID) for research subject',
       'Subject ID how it's defined in lab/project',
       'Date on which the interview/genetic test/sampling/imaging/biospecimen was completed. MM/DD/YYYY',
       'Age in months at the time of the interview/test/sampling/imaging.',
       'Sex of the subject', 'Age in years', 'Ethnicity of participant',
       'Visit name', 'Subcohort assignment', 'Participant in Image Group A',
       'Participant in Image Group B', 'Participant in Image Group C',
       'Participant in Image Group D', 'Participant in Image Group E',
       'Participant in Image Group F', 'Participant in Image Group G',
       'Race of study subject',
       'Participant in first half of cohort, or second half of cohort',
       'Version/code of assessment', 'Site',
       'Indicator variable for having had an interim 6-month visit (18- or 30-month visit)',
       'collection_title', 'promo

In [5]:
cohort_small = cohort.loc[cohort['Visit name'] == 'V00', 
                         [ "Subject ID how it's defined in lab/project",
                          'Sex of the subject',
                         'Age in months at the time of the interview/test/sampling/imaging.',
                         'Ethnicity of participant',
                         'Race of study subject']
                         ]

In [6]:
cohort_small.columns = ['ID', 'sex', 'age_months', 'ethnicity', 'race']

In [7]:
cohort_small.head()

Unnamed: 0,ID,sex,age_months,ethnicity,race
9592,9370237,M,636.0,Not Hispanic or Latino,White
9593,9370366,F,600.0,Not Hispanic or Latino,White
9594,9371094,M,912.0,Not Hispanic or Latino,White
9595,9371267,F,912.0,Not Hispanic or Latino,White
9596,9371355,M,804.0,Not Hispanic or Latino,White


In [8]:
cohort_small.shape

(4796, 5)

In [9]:
cohort_small.to_pickle('/data/knee_mri3/labels/cohort_small.pickle')

In [10]:
cohort_small = pd.read_pickle('/data/knee_mri3/labels/cohort_small.pickle')

In [9]:
cohort_small.loc[cohort_small.ID == 9176485]

Unnamed: 0,ID,sex,age_months,ethnicity,race
9890,9176485,M,900.0,Not Hispanic or Latino,White


In [10]:
900 / 12

75.0

# 2. Clinical dataset 

## 2.1 Relevant clinical, function, symptomatic variables at baseline 

In [89]:
predictors = pd.read_excel('data/OAI_TDA_DataDictionary.xlsx')

In [71]:
predictors.head()

Unnamed: 0,row,variable name,category,oai visit,oai name,oai dataset,unit,data type,specific meaning,comments
0,,id,demographic,,ID,AllClinical,,,,
1,,v00age,demographic,V00,AGE,AllClinical,years,numeric,participant age in years at visit,
2,,v01age,demographic,V01,AGE,AllClinical,years,numeric,participant age in years at visit,
3,,v02age,demographic,V02,AGE,AllClinical,years,numeric,participant age in years at visit,
4,,v03age,demographic,V03,AGE,AllClinical,years,numeric,participant age in years at visit,


In [72]:
predictors = predictors.loc[predictors['oai visit'].isin(['V00', 'P01', 'P02'])]

In [73]:
predictors = predictors.loc[predictors['oai dataset'].isin(['AllClinical', 'Enrollees'])]

In [74]:
predictors.columns

Index(['row', 'variable name', 'category ', 'oai visit', 'oai name',
       'oai dataset', 'unit ', 'data type ', 'specific meaning', 'comments'],
      dtype='object')

In [75]:
predictors = predictors.dropna(subset=['data type '])

In [76]:
predictors['keys'] = predictors['oai visit'] + predictors['oai name']

In [77]:
predictors = predictors.loc[:, ['keys', 'category ', 'specific meaning', 'comments']]

In [78]:
predictors.shape

(770, 4)

In [79]:
predictors.head(20)

Unnamed: 0,keys,category,specific meaning,comments
1,V00AGE,demographic,participant age in years at visit,
13,P02SEX,demographic,participant sex,1=male; 2=female
14,P02RACE,demographic,racial background,Int 0:3
15,P02HISP,demographic,hispanic or latino background,
16,V00EDCV,subject characteristics,highest grade of school completed,Int 0:5
17,V00MARITST,subject characteristics,marital status,Int 1:5
21,V00LIVENO,subject characteristics,how many people in household,Int 0:6
25,V00LIVE1,subject characteristics,live with spouse,
29,V00LIVE2,subject characteristics,live with romantic partner,
33,V00LIVE3,subject characteristics,live with children,


In [80]:
left = predictors.loc[np.logical_or(predictors['specific meaning'].str.contains('left knee'),
                            np.logical_or(predictors['specific meaning'].str.contains('left hip'),
                                          np.logical_or(predictors['specific meaning'].str.contains('left ankle'),
                                                       predictors['specific meaning'].str.contains('left leg'))
                                         )), :]

In [81]:
left.head()

Unnamed: 0,keys,category,specific meaning,comments
132,P02KPNLCV,knee symptoms,"left knee pain, aching or stiffness on most da...",
151,P01KPNLEV,knee symptoms,"left knee pain, aching or stiffness: ever had ...",
152,P01KPNLEVY,knee symptoms,"how long ago was the start of left knee pain, ...",
153,P01KPNL12,knee symptoms,"any left knee pain, aching or stiffness in pas...",
154,P01KPL12CV,knee symptoms,"left knee pain, aching or stiffness on most da...",


In [82]:
right = predictors.loc[np.logical_or(predictors['specific meaning'].str.contains('right knee'),
                            np.logical_or(predictors['specific meaning'].str.contains('right hip'),
                                          np.logical_or(predictors['specific meaning'].str.contains('right ankle'),
                                                       predictors['specific meaning'].str.contains('right leg'))
                                         )), :]

In [83]:
predictors.loc[predictors['category '] == 'knee pain']

Unnamed: 0,keys,category,specific meaning,comments
275,V00KOOSKPR,knee pain,KOOS right knee pain score,
276,V00KPRKN1,knee pain,KOOS right knee pain during twisting/pivoting ...,
277,V00KPRKN2,knee pain,KOOS right knee pain during straightening knee...,
278,V00KPRKN3,knee pain,KOOS right knee pain during bending knee fully...,
279,V00WPRKN1,knee pain,KOOS/WOMAC right knee pain during walking in l...,
280,V00WPRKN2,knee pain,KOOS/WOMAC right knee pain going up or down st...,
281,V00WPRKN3,knee pain,KOOS/WOMAC right knee pain while in bed in las...,
282,V00WPRKN4,knee pain,KOOS/WOMAC right knee pain sitting or lying in...,
283,V00WPRKN5,knee pain,KOOS/WOMAC right knee pain standing upright in...,
284,V00P7RKFR,knee pain,KOOS right knee how often have pain,


In [59]:
predictors['category '].value_counts()

medical history            108
nutrition                  106
strength measures           85
medication                  57
knee function               56
performance measures        56
baseline risk factors       56
physical activity           38
knee symptoms               35
other joint pain            26
knee exam                   24
knee pain                   24
demographic history         18
hip pain                    16
global function             14
back pain                   12
hand exam                   12
subject characteristics     11
demographic                  9
health care access           3
physical exam                3
med                          1
Name: category , dtype: int64

In [84]:
predictors.loc[np.logical_and(predictors['category '] == 'medical history',
                             predictors['specific meaning'].str.contains('knee pain'))]

Unnamed: 0,keys,category,specific meaning,comments


In [85]:
pain_names = predictors.loc[predictors['specific meaning'].str.contains('knee pain')]

In [86]:
pain_names

Unnamed: 0,keys,category,specific meaning,comments
131,P02KPNRCV,knee symptoms,"right knee pain, aching or stiffness on most d...",
132,P02KPNLCV,knee symptoms,"left knee pain, aching or stiffness on most da...",
141,P01KPACDCV,knee symptoms,"days of limited activities due to knee pain, a...",
142,P01KPA30CV,knee symptoms,avoided or changed activities to reduce knee p...,
143,P01KPNREV,knee symptoms,"right knee pain, aching or stiffness: ever had...",
144,P01KPNREVY,knee symptoms,"how long ago was the start of right knee pain,...","Int 1,2,5"
145,P01KPNR12,knee symptoms,"any right knee pain, aching or stiffness in pa...",
146,P01KPR12CV,knee symptoms,"right knee pain, aching or stiffness on most d...",
147,P01KPNR12M,knee symptoms,"how many months with right knee pain, aching o...",
148,P01RKP30CV,knee symptoms,"any right knee pain, aching or stiffness in pa...",


In [87]:
predictors.to_pickle('/data/knee_mri3/JL/predict_pain/data/oai_selected_labels.pickle')

## 2.2 Filter them in AllClinical00 dataset

In [90]:
var_names = predictors['oai visit'] + predictors['oai name']

In [91]:
label_dir = PosixPath('/data/knee_mri3/demoInfo/all_clinical/')

In [92]:
ac = pd.read_csv(label_dir/'AllClinical00.txt', sep='|')

In [93]:
var_names = [v for v in var_names if v in ac.columns]

In [94]:
ac = ac.loc[:, ['ID'] + var_names]

In [95]:
ac.shape

(4796, 733)

In [67]:
ac.head()

Unnamed: 0,ID,V00AGE,V00EDCV,V00MARITST,V00LIVENO,V00LIVE1,V00LIVE2,V00LIVE3,V00LIVE4,V00LIVE5,...,V00VIT4,V00VIT5,V00VIT8,V00VIT9,V00VIT10,V00VIT11,V00VIT12,V00VIT6,V00VIT13,V00VIT7
0,9000099,59,5: Graduate degree,1: Married,2: Two,1: Yes,.: Missing Form/Incomplete Workbook,1: Yes,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,...,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No
1,9000296,69,2: Some college,1: Married,1: One,1: Yes,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,...,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No
2,9000622,71,1: High school graduate,1: Married,1: One,1: Yes,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,...,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,1: Yes,0: No
3,9000798,56,2: Some college,1: Married,1: One,1: Yes,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,...,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No
4,9001104,72,2: Some college,2: Widowed,0: Zero,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,.: Missing Form/Incomplete Workbook,...,0: No,0: No,0: No,1: Yes,1: Yes,0: No,0: No,0: No,1: Yes,0: No


In [68]:
for n, t in zip(ac.columns, ac.dtypes):
    if t == 'O':
#         ac[n] = [e.split(':')[0] for e in ac[n]]
        ac[n] = ac[n].replace({'.: Missing Form/Incomplete Workbook': np.nan})
        ac[n] = ac[n].astype('O')

In [69]:
ac.head()

Unnamed: 0,ID,V00AGE,V00EDCV,V00MARITST,V00LIVENO,V00LIVE1,V00LIVE2,V00LIVE3,V00LIVE4,V00LIVE5,...,V00VIT4,V00VIT5,V00VIT8,V00VIT9,V00VIT10,V00VIT11,V00VIT12,V00VIT6,V00VIT13,V00VIT7
0,9000099,59,5: Graduate degree,1: Married,2: Two,1: Yes,,1: Yes,,,...,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No
1,9000296,69,2: Some college,1: Married,1: One,1: Yes,,,,,...,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No
2,9000622,71,1: High school graduate,1: Married,1: One,1: Yes,,,,,...,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,1: Yes,0: No
3,9000798,56,2: Some college,1: Married,1: One,1: Yes,,,,,...,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No
4,9001104,72,2: Some college,2: Widowed,0: Zero,,,,,,...,0: No,0: No,0: No,1: Yes,1: Yes,0: No,0: No,0: No,1: Yes,0: No


In [70]:
ac.to_pickle('/data/knee_mri3/JL/predict_pain/data/AllClinical00_small.pickle')

In [96]:
ac = pd.read_pickle('/data/knee_mri3/JL/predict_pain/data/AllClinical00_small.pickle')

In [97]:
ac.iloc[:10, :10]

Unnamed: 0,ID,V00AGE,V00EDCV,V00MARITST,V00LIVENO,V00LIVE1,V00LIVE2,V00LIVE3,V00LIVE4,V00LIVE5
0,9000099,59,5: Graduate degree,1: Married,2: Two,1: Yes,,1: Yes,,
1,9000296,69,2: Some college,1: Married,1: One,1: Yes,,,,
2,9000622,71,1: High school graduate,1: Married,1: One,1: Yes,,,,
3,9000798,56,2: Some college,1: Married,1: One,1: Yes,,,,
4,9001104,72,2: Some college,2: Widowed,0: Zero,,,,,
5,9001400,75,4: Some graduate school,2: Widowed,0: Zero,,,,,
6,9001695,52,5: Graduate degree,1: Married,2: Two,1: Yes,,1: Yes,,
7,9001897,72,4: Some graduate school,1: Married,1: One,1: Yes,,,,
8,9002116,61,2: Some college,3: Divorced,0: Zero,,,,,
9,9002316,76,5: Graduate degree,2: Widowed,0: Zero,,,,,


# 3. Combine allClinical and cohort

In [100]:
cohort_small.head()

Unnamed: 0,index,ID,sex,age_months,ethnicity,race
0,9592,9370237,M,636.0,Not Hispanic or Latino,White
1,9593,9370366,F,600.0,Not Hispanic or Latino,White
2,9594,9371094,M,912.0,Not Hispanic or Latino,White
3,9595,9371267,F,912.0,Not Hispanic or Latino,White
4,9596,9371355,M,804.0,Not Hispanic or Latino,White


In [99]:
cohort_small = cohort_small.reset_index()

In [101]:
pred_df = pd.merge(left=cohort_small, 
               right=ac,
               how='left',
               on='ID')

In [73]:
pred_df.shape

(4796, 734)

In [104]:
pred_df = pred_df.drop('index', 1)

In [105]:
pred_df.iloc[:10, :10]

Unnamed: 0,ID,sex,age_months,ethnicity,race,V00AGE,V00EDCV,V00MARITST,V00LIVENO,V00LIVE1
0,9370237,M,636.0,Not Hispanic or Latino,White,53,3: College graduate,1: Married,1: One,1: Yes
1,9370366,F,600.0,Not Hispanic or Latino,White,50,2: Some college,1: Married,1: One,1: Yes
2,9371094,M,912.0,Not Hispanic or Latino,White,76,5: Graduate degree,1: Married,1: One,1: Yes
3,9371267,F,912.0,Not Hispanic or Latino,White,76,2: Some college,2: Widowed,1: One,
4,9371355,M,804.0,Not Hispanic or Latino,White,67,2: Some college,1: Married,1: One,1: Yes
5,9372194,F,936.0,Not Hispanic or Latino,White,78,3: College graduate,3: Divorced,0: Zero,
6,9372219,F,612.0,Not Hispanic or Latino,White,51,4: Some graduate school,2: Widowed,0: Zero,
7,9372390,F,624.0,Not Hispanic or Latino,White,52,3: College graduate,1: Married,3: Three,1: Yes
8,9372474,F,852.0,Not Hispanic or Latino,Black or African American,71,5: Graduate degree,1: Married,1: One,1: Yes
9,9372977,F,,Not Hispanic or Latino,White,56,2: Some college,3: Divorced,0: Zero,


# 5. Load FPCA scores 

In [106]:
eta_scores = pd.read_csv('/data/knee_mri3/JL/pain_trajectory_clustering/data/f_scores_2.csv')

In [107]:
eta_scores = eta_scores.iloc[:, 1:]

In [108]:
eta_scores.head()

Unnamed: 0,combined_id,V1,V2,V3,V4
0,9000099_LEFT,11.184261,-1.695476,-0.338173,0.835294
1,9000296_LEFT,-5.014966,-0.059305,0.079088,0.159873
2,9000296_RIGHT,-4.335862,0.390152,-0.175779,0.229023
3,9000622_LEFT,-2.017891,-0.356249,0.196591,0.129263
4,9000798_RIGHT,-5.327715,-0.223267,0.094702,0.081452


In [18]:
eta_scores.tail()

Unnamed: 0,combined_id,V1,V2,V3,V4
4074,9999510_RIGHT,-1.206465,0.25879,1.056888,0.501807
4075,9999862_LEFT,-4.549688,0.103468,0.278241,-0.076329
4076,9999862_RIGHT,-4.844343,0.067763,0.109085,-0.071602
4077,9999865_LEFT,1.21497,-1.186112,2.288722,-0.400837
4078,9999865_RIGHT,-4.191619,-0.914949,-0.056639,-0.044809


In [109]:
eta_scores.columns = ['ID_side', 'eta_0', 'eta_1', 'eta_2', 'eta_3']

In [110]:
eta_scores.head()

Unnamed: 0,ID_side,eta_0,eta_1,eta_2,eta_3
0,9000099_LEFT,11.184261,-1.695476,-0.338173,0.835294
1,9000296_LEFT,-5.014966,-0.059305,0.079088,0.159873
2,9000296_RIGHT,-4.335862,0.390152,-0.175779,0.229023
3,9000622_LEFT,-2.017891,-0.356249,0.196591,0.129263
4,9000798_RIGHT,-5.327715,-0.223267,0.094702,0.081452


In [36]:
eta_mean = eta_scores.loc[:, ['eta_0', 'eta_1', 'eta_2', 'eta_3']].apply(np.mean)
eta_std = eta_scores.loc[:, ['eta_0', 'eta_1', 'eta_2', 'eta_3']].apply(np.std)

eta_scores['std_eta_0'] = (eta_scores['eta_0'] - eta_scores['eta_0'].mean()) / eta_scores['eta_0'].std()
eta_scores['std_eta_1'] = (eta_scores['eta_1'] - eta_scores['eta_1'].mean()) / eta_scores['eta_1'].std()
eta_scores['std_eta_2'] = (eta_scores['eta_2'] - eta_scores['eta_2'].mean()) / eta_scores['eta_2'].std()
eta_scores['std_eta_3'] = (eta_scores['eta_3'] - eta_scores['eta_3'].mean()) / eta_scores['eta_3'].std()

In [37]:
eta_scores.head()

Unnamed: 0,ID_side,eta_0,eta_1,eta_2,eta_3,std_eta_0,std_eta_1,std_eta_2,std_eta_3
0,9000099_LEFT,11.184261,-1.695476,-0.338173,0.835294,1.677402,-0.996826,-0.331102,1.429741
1,9000296_LEFT,-5.014966,-0.059305,0.079088,0.159873,-0.789935,-0.022829,0.082186,0.278651
2,9000296_RIGHT,-4.335862,0.390152,-0.175779,0.229023,-0.686499,0.244728,-0.170255,0.3965
3,9000622_LEFT,-2.017891,-0.356249,0.196591,0.129263,-0.333444,-0.199597,0.198569,0.226484
4,9000798_RIGHT,-5.327715,-0.223267,0.094702,0.081452,-0.83757,-0.120434,0.097651,0.145002


In [70]:
eta_scores.describe()

Unnamed: 0,eta_0,eta_1,eta_2,eta_3,std_eta_0,std_eta_1,std_eta_2,std_eta_3
count,4079.0,4079.0,4079.0,4079.0,4079.0,4079.0,4079.0,4079.0
mean,0.171327,-0.020955,-0.003888,-0.00363,-9.632457000000001e-17,-7.049467e-18,-7.797963e-18,2.1315110000000003e-17
std,6.565469,1.679853,1.009614,0.586766,1.0,1.0,1.0,1.0
min,-5.368141,-12.222689,-7.175451,-3.560293,-0.8437278,-7.263571,-7.103269,-6.061465
25%,-4.320304,-0.530117,-0.38693,-0.204506,-0.6841295,-0.303099,-0.3793948,-0.3423435
50%,-2.249909,-0.121479,0.061959,0.040602,-0.3687835,-0.05984084,0.0652195,0.07538315
75%,2.256998,0.559492,0.274174,0.17552,0.3176727,0.3455345,0.2754131,0.3053177
max,51.734381,11.519074,6.118943,3.156959,7.853674,6.869664,6.064523,5.386453


# 6. Combine posterior probability labels and predictors

In [9]:
labels_df = pd.read_pickle('data/traj_labels_df.pickle')

In [3]:
labels_df.set.value_counts()

0.0    2665
2.0     812
1.0     600
Name: set, dtype: int64

In [5]:
812 / (2665 + 812 + 600)

0.19916605347068922

In [83]:
labels_df.columns

Index(['ids', 'side', 'dess_path', 'pain_meds', 'narcot', 'womac', 'koos',
       'Inferred_KLG', 'oa_status', 'ID', 'pp_0', 'pp_1', 'pp_2',
       'most_likely', 'set'],
      dtype='object')

In [84]:
labels_df.columns = ['ID', 'side', 'dess_path', 'pain_meds', 'narcot', 'womac', 'koos',
       'Inferred_KLG', 'oa_status', 'ID_side', 'pp_0', 'pp_1', 'pp_2',
       'most_likely', 'set']

In [85]:
labels_df.head()

Unnamed: 0,ID,side,dess_path,pain_meds,narcot,womac,koos,Inferred_KLG,oa_status,ID_side,pp_0,pp_1,pp_2,most_likely,set
0,9477175,LEFT,9477175_LEFT_0.mat,0,0,0.0,100.0,1,0,9477175_LEFT,0.999999,5.458956e-07,6.750792e-08,0,2
1,9794339,LEFT,9794339_LEFT_0.mat,0,0,0.0,97.2,1,0,9794339_LEFT,0.999981,1.873607e-05,7.424682e-07,0,0
2,9282182,RIGHT,9282182_RIGHT_0.mat,0,0,0.0,100.0,0,0,9282182_RIGHT,0.999759,0.0002402764,3.842015e-07,0,0
3,9359202,LEFT,9359202_LEFT_0.mat,0,0,0.0,100.0,3,1,9359202_LEFT,0.999816,0.0001823857,1.635602e-06,0,1
4,9754338,RIGHT,9754338_RIGHT_0.mat,0,0,0.0,100.0,1,0,9754338_RIGHT,0.99982,0.0001789554,8.681482e-07,0,2


In [86]:
labels_df = pd.merge(left=labels_df, 
             right=pred_df,
              how='left',
              on='ID')

In [87]:
labels_df = pd.merge(left = labels_df, 
              right = eta_scores,
              how='right',
              on=['ID_side'])

In [88]:
labels_df.head()

Unnamed: 0,ID,side,dess_path,pain_meds,narcot,womac,koos,Inferred_KLG,oa_status,ID_side,...,V00VIT10,V00VIT11,V00VIT12,V00VIT6,V00VIT13,V00VIT7,eta_0,eta_1,eta_2,eta_3
0,9477175.0,LEFT,9477175_LEFT_0.mat,0,0,0.0,100.0,1,0,9477175_LEFT,...,0: No,0: No,0: No,1: Yes,1: Yes,0: No,-5.05388,0.005395,0.030075,0.127305
1,9794339.0,LEFT,9794339_LEFT_0.mat,0,0,0.0,97.2,1,0,9794339_LEFT,...,0: No,0: No,0: No,0: No,1: Yes,0: No,-2.059586,-0.30649,0.064717,-0.017907
2,9282182.0,RIGHT,9282182_RIGHT_0.mat,0,0,0.0,100.0,0,0,9282182_RIGHT,...,1: Yes,0: No,0: No,0: No,0: No,1: Yes,-1.092739,-0.772949,0.426054,-0.045561
3,9359202.0,LEFT,9359202_LEFT_0.mat,0,0,0.0,100.0,3,1,9359202_LEFT,...,1: Yes,0: No,0: No,1: Yes,0: No,1: Yes,0.971453,0.365539,0.921799,0.319177
4,9754338.0,RIGHT,9754338_RIGHT_0.mat,0,0,0.0,100.0,1,0,9754338_RIGHT,...,0: No,0: No,0: No,0: No,0: No,0: No,1.309819,-3.678719,-1.595404,0.126684


In [89]:
labels_df.shape

(4079, 752)

In [94]:
labels_df = labels_df.dropna(subset=['eta_0', 'eta_1', 'eta_2', 'eta_3', 'dess_path'])

In [95]:
labels_df.shape

(4077, 752)

In [96]:
with open('data/posterior_probabilities_wide_df.pickle', 'wb') as f:
    pickle.dump([labels_df, eta_mean, eta_std], f)

In [97]:
pd.DataFrame(eta_mean).to_csv('data/eta_mean.csv')

In [98]:
pd.DataFrame(eta_std).to_csv('data/eta_std.csv')

In [99]:
labels_df.to_csv('data/standardized_eta_wide_df.csv')

# Checkpoint

# 7 Make data bunch object - standardization and imputation

In [80]:
# with open('data/posterior_probabilities_wide_df.pickle', 'rb') as f:
#     labels_df, eta_mean, eta_std = pickle.load(f)

In [111]:
labels_df = pd.read_csv('data/standardized_eta_wide_df.csv', low_memory=False)

In [112]:
labels_df.loc[:, ['eta_0', 'eta_1', 'eta_2', 'eta_3']].describe()

Unnamed: 0,eta_0,eta_1,eta_2,eta_3
count,4077.0,4077.0,4077.0,4077.0
mean,0.166389,-0.020825,-0.003408,-0.003697
std,6.558821,1.680246,1.009422,0.586829
min,-5.368141,-12.222689,-7.175451,-3.560293
25%,-4.320991,-0.52963,-0.386317,-0.204464
50%,-2.250943,-0.121479,0.061959,0.040602
75%,2.256715,0.561046,0.274333,0.175475
max,51.734381,11.519074,6.118943,3.156959


In [113]:
labels_df = labels_df.iloc[:, 1:]
labels_df.head()

Unnamed: 0,ID,side,dess_path,pain_meds,narcot,womac,koos,Inferred_KLG,oa_status,ID_side,...,V00VIT10,V00VIT11,V00VIT12,V00VIT6,V00VIT13,V00VIT7,eta_0,eta_1,eta_2,eta_3
0,9477175.0,LEFT,9477175_LEFT_0.mat,0,0.0,0.0,100.0,1.0,0.0,9477175_LEFT,...,0: No,0: No,0: No,1: Yes,1: Yes,0: No,-5.05388,0.005395,0.030075,0.127305
1,9794339.0,LEFT,9794339_LEFT_0.mat,0,0.0,0.0,97.2,1.0,0.0,9794339_LEFT,...,0: No,0: No,0: No,0: No,1: Yes,0: No,-2.059586,-0.30649,0.064717,-0.017907
2,9282182.0,RIGHT,9282182_RIGHT_0.mat,0,0.0,0.0,100.0,0.0,0.0,9282182_RIGHT,...,1: Yes,0: No,0: No,0: No,0: No,1: Yes,-1.092739,-0.772949,0.426054,-0.045561
3,9359202.0,LEFT,9359202_LEFT_0.mat,0,0.0,0.0,100.0,3.0,1.0,9359202_LEFT,...,1: Yes,0: No,0: No,1: Yes,0: No,1: Yes,0.971453,0.365539,0.921799,0.319177
4,9754338.0,RIGHT,9754338_RIGHT_0.mat,0,0.0,0.0,100.0,1.0,0.0,9754338_RIGHT,...,0: No,0: No,0: No,0: No,0: No,0: No,1.309819,-3.678719,-1.595404,0.126684


In [114]:
tmp = labels_df.copy()

for l in left['keys']:
#     assert l in tmp.columns, "{} not in labels_df".format(l)
    if l in labels_df.columns:
        c_name = l + '_C'
        i_name = l + '_I'
        tmp[c_name] = np.nan
        tmp[i_name] = np.nan
        tmp.loc[tmp.side == 'LEFT', i_name] = tmp.loc[tmp.side == 'LEFT', l]
        tmp.loc[tmp.side == 'RIGHT', c_name] = tmp.loc[tmp.side == 'RIGHT', l]

In [115]:
for l in right['keys']:
#     assert l in tmp.columns, "{} not in labels_df".format(l)
    if l in labels_df.columns:
        c_name = l + '_C'
        i_name = l + '_I'
        tmp[c_name] = np.nan
        tmp[i_name] = np.nan
        tmp.loc[tmp.side == 'LEFT', c_name] = tmp.loc[tmp.side == 'LEFT', l]
        tmp.loc[tmp.side == 'RIGHT', i_name] = tmp.loc[tmp.side == 'RIGHT', l]

In [25]:
col_to_drop = list(left['keys']) + list(right['keys'])
col_to_drop = [c for c in col_to_drop if c in labels_df.columns]

In [244]:
col_to_drop

['P02KPNLCV',
 'P01KPNLEV',
 'P01KPNLEVY',
 'P01KPNL12',
 'P01KPL12CV',
 'P01KPNL12M',
 'P01LKP30CV',
 'P01KPL30CV',
 'P01PMLKRCV',
 'V00KOOSKPL',
 'V00KPLKN1',
 'V00KPLKN2',
 'V00KPLKN3',
 'V00WPLKN1',
 'V00WPLKN2',
 'V00WPLKN3',
 'V00WPLKN4',
 'V00WPLKN5',
 'V00P7LKFR',
 'V00P7LKRCV',
 'V00WOMKPL',
 'V00KOOSYML',
 'V00KSXLKN1',
 'V00KSXLKN2',
 'V00KSXLKN3',
 'V00KSXLKN4',
 'V00KSXLKN5',
 'V00WOMSTFL',
 'V00WSLKN1',
 'V00WSLKN2',
 'V00WOMADLL',
 'V00DILKN1',
 'V00DILKN2',
 'V00DILKN3',
 'V00DILKN4',
 'V00DILKN5',
 'V00DILKN6',
 'V00DILKN7',
 'V00DILKN8',
 'V00DILKN9',
 'V00DILKN10',
 'V00DILKN11',
 'V00DILKN12',
 'V00DILKN13',
 'V00DILKN14',
 'V00DILKN15',
 'V00DILKN16',
 'V00DILKN17',
 'V00WOMTSL',
 'V00LKABPN',
 'V00LKLTTPN',
 'V00LKMTTPN',
 'V00LKRFXPN',
 'V00LKPATPN',
 'V00LKPGDPN',
 'V00LKPFCRE',
 'V00LKEFFB',
 'V00LKEFFPT',
 'V00LKFHDEG',
 'V00LKALNMT',
 'P01HPNL12',
 'P01HPL12CV',
 'P01HPNLIL',
 'P01HPNLOL',
 'P01HPNLFL',
 'P01HPNLB',
 'P01HPNLLB',
 'P01HPNLDK',
 'P01OJPNLA',
 

In [26]:
tmp = tmp.drop(columns=col_to_drop, axis=1)

In [27]:
labels_df_w_pain_names = tmp.copy()

In [28]:
pain_names.head()

Unnamed: 0,keys,category,specific meaning,comments
131,P02KPNRCV,knee symptoms,"right knee pain, aching or stiffness on most d...",
132,P02KPNLCV,knee symptoms,"left knee pain, aching or stiffness on most da...",
141,P01KPACDCV,knee symptoms,"days of limited activities due to knee pain, a...",
142,P01KPA30CV,knee symptoms,avoided or changed activities to reduce knee p...,
143,P01KPNREV,knee symptoms,"right knee pain, aching or stiffness: ever had...",


In [29]:
pain_keys = pain_names['keys'].tolist()
pain_keys += [k+ '_C' for k in pain_keys if k in left['keys'].tolist()]  
pain_keys += [k+ '_I' for k in pain_keys if k in left['keys'].tolist()] 
pain_keys += [k+ '_C' for k in pain_keys if k in right['keys'].tolist()]  
pain_keys += [k+ '_I' for k in pain_keys if k in right['keys'].tolist()] 

In [30]:
len(pain_keys)

184

In [31]:
col_to_drop = [c for c in pain_keys if c in tmp.columns]

# labels_df_wo_pain_names = tmp.drop(columns=col_to_drop, axis=1)  

In [32]:
print(labels_df_w_pain_names.shape)

(4077, 994)


NameError: name 'labels_df_wo_pain_names' is not defined

In [251]:
labels_df_wo_pain_names.iloc[:5, :20]

Unnamed: 0,ID,side,dess_path,pain_meds,narcot,womac,koos,Inferred_KLG,oa_status,ID_side,pp_0,pp_1,pp_2,most_likely,set,sex,age_months,ethnicity,race,V00AGE
0,9477175.0,LEFT,9477175_LEFT_0.mat,0,0.0,0.0,100.0,1.0,0.0,9477175_LEFT,0.999999,5.458956e-07,6.750792e-08,0,2.0,F,864.0,Not Hispanic or Latino,White,72.0
1,9794339.0,LEFT,9794339_LEFT_0.mat,0,0.0,0.0,97.2,1.0,0.0,9794339_LEFT,0.999981,1.873607e-05,7.424682e-07,0,0.0,F,744.0,Not Hispanic or Latino,White,61.0
2,9282182.0,RIGHT,9282182_RIGHT_0.mat,0,0.0,0.0,100.0,0.0,0.0,9282182_RIGHT,0.999759,0.0002402764,3.842015e-07,0,0.0,F,648.0,Not Hispanic or Latino,White,54.0
3,9359202.0,LEFT,9359202_LEFT_0.mat,0,0.0,0.0,100.0,3.0,1.0,9359202_LEFT,0.999816,0.0001823857,1.635602e-06,0,1.0,M,828.0,Not Hispanic or Latino,White,69.0
4,9754338.0,RIGHT,9754338_RIGHT_0.mat,0,0.0,0.0,100.0,1.0,0.0,9754338_RIGHT,0.99982,0.0001789554,8.681482e-07,0,2.0,F,744.0,Not Hispanic or Latino,White,62.0


## First prepare labels dataframe with pain variables

In [258]:
labels_df_w_pain_names = labels_df_w_pain_names.drop(['ID', 'side', 'ID_side', 'most_likely',
                                                      'pp_0', 'pp_1', 'pp_2',
                                                      'womac', 'koos', 'V00KOOSQOL', 'age_months'] , axis=1)

In [259]:
labels_df_w_pain_names = labels_df_w_pain_names.set_index('dess_path')

In [260]:
labels_df_w_pain_names = labels_df_w_pain_names.dropna(axis=1, thresh=3000)
labels_df_w_pain_names.shape

(4077, 369)

In [261]:
labels_df_w_pain_names.iloc[:5, :10]

Unnamed: 0_level_0,Unnamed: 0,pain_meds,narcot,Inferred_KLG,oa_status,set,sex,ethnicity,race,V00AGE
dess_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
9477175_LEFT_0.mat,0,0,0,1,0,2.0,F,Not Hispanic or Latino,White,72.0
9794339_LEFT_0.mat,1,0,0,1,0,0.0,F,Not Hispanic or Latino,White,61.0
9282182_RIGHT_0.mat,2,0,0,0,0,0.0,F,Not Hispanic or Latino,White,54.0
9359202_LEFT_0.mat,3,0,0,3,1,1.0,M,Not Hispanic or Latino,White,69.0
9754338_RIGHT_0.mat,4,0,0,1,0,2.0,F,Not Hispanic or Latino,White,62.0


# CHECKPOINT

In [4]:
# labels_df_w_pain_names.to_pickle('data/labels_df_w_pain_names_ncols_369.pickle')

In [5]:
labels_df_w_pain_names = pd.read_pickle('data/labels_df_w_pain_names_ncols_369.pickle')

In [6]:
# labels_df_w_pain_names = labels_df_w_pain_names.set_index('dess_path')
cat_names = []
cont_names = []
label_names = ['set', 'eta_0', 'eta_1', 'eta_2', 'eta_3','Unnamed: 0']

for n, t in zip(labels_df_w_pain_names.columns, labels_df_w_pain_names.dtypes):
    if t == 'O' and n not in label_names:
        cat_names.append(n)
    elif t != 'O' and n not in label_names:
        cont_names.append(n)
    else:
        print("Not assigning types: {}".format(n))

Not assigning types: set
Not assigning types: eta_0
Not assigning types: eta_1
Not assigning types: eta_2
Not assigning types: eta_3


In [7]:
cont_names

['V00AGE',
 'P01HEIGHT',
 'V00ABCIRC',
 'V00HT25MM',
 'V00WT25KG',
 'V00WTMAXKG',
 'V00WTMINKG',
 'P01KPACDCV',
 'V00KOOSFSR',
 'P01BPTOT',
 'P01BPBEDCV',
 'P01BPDAYCV',
 'V00BPSYS',
 'V00BPDIAS',
 'V00RPAVG',
 'V00CSPACE',
 'V00CSTIME1',
 'V00CSTIME2',
 'V0020MPACE',
 'V00STEPST1',
 'V00TIMET1',
 'V00STEPST2',
 'V00TIMET2',
 'V00HRB4WLK',
 'V00NUMSTOP',
 'V00400MTR',
 'V00400MTIM',
 'V00HR400WK',
 'V00PASE',
 'V00WKHR7CV',
 'V00HSPSS',
 'V00HSMSS',
 'V00COMORB',
 'V00CESD',
 'V00SMKPKYR',
 'V00PSMKYR',
 'V00DTACAR',
 'V00DTAIU',
 'V00DTANZN',
 'V00DTARE',
 'V00DTB1',
 'V00DTB12',
 'V00DTB6',
 'V00DTBCAR',
 'V00DTCAFFN',
 'V00DTCALC',
 'V00DTCARB',
 'V00DTCHOL',
 'V00DTCRYP',
 'V00DTCYST',
 'V00DTDAID',
 'V00DTDFIB',
 'V00DTFAT',
 'V00DTFE',
 'V00DTFOL',
 'V00DTGEN',
 'V00DTKCAL',
 'V00DTLIN',
 'V00DTLUT',
 'V00DTLYC',
 'V00DTMETH',
 'V00DTMG',
 'V00DTNA',
 'V00DTNIAC',
 'V00DTOLEC',
 'V00DTPHOS',
 'V00DTPOTA',
 'V00DTPROA',
 'V00DTPROT',
 'V00DTRET',
 'V00DTRIBO',
 'V00DTSFAT',
 'V00D

In [8]:
cat_names

['pain_meds',
 'narcot',
 'Inferred_KLG',
 'oa_status',
 'sex',
 'ethnicity',
 'race',
 'V00EDCV',
 'V00MARITST',
 'V00LIVENO',
 'V00INCOME',
 'V00CUREMP',
 'V00CEMPLOY',
 'V00HANDED',
 'P02JBMPCV',
 'P02FAMHXKR',
 'P02CNCR3',
 'P02IKPRISK',
 'P02KPN',
 'P02KINJ',
 'P02KSURG',
 'P02ACTRISK',
 'P02PA1',
 'P02PA2',
 'P02PA3',
 'P02PA4',
 'P01KPACT30',
 'P01KPA30CV',
 'P01SXKOA',
 'P01SVXRELK',
 'V00KOOSFX1',
 'V00KOOSFX4',
 'V00KOOSFX5',
 'V00KQOL1',
 'V00KQOL2',
 'V00KQOL3',
 'V00KQOL4',
 'V00KGLRS',
 'P01TMJE6M',
 'P01TJE30CV',
 'P01TJE30WC',
 'P01TMJF6M',
 'P01TJF30CV',
 'P01TJF30WC',
 'P01BP30',
 'P01BPACTCV',
 'P01RHBE',
 'P01RH1CV',
 'P01RH2CV',
 'P01RH3CV',
 'P01RH4CV',
 'P01RH5CV',
 'P01LHBE',
 'P01LH1CV',
 'P01LH2CV',
 'P01LH3CV',
 'P01LH4CV',
 'P01LH5CV',
 'V00KIKBALL',
 'V00CSTSGL',
 'V00CS5',
 'V00CSTREP1',
 'V00CSTREP2',
 'V00WLK20T1',
 'V00WLK20T2',
 'V00WLKAID',
 'V00WALKER',
 'V00HOSPSUR',
 'V00SAFEWLK',
 'V00CANEUSE',
 'V00400MCMP',
 'V00COMP10',
 'V00400EXCL',
 'V00DISC

In [9]:
with open('data/cont_cat_names_labels_df_w_pain_names.pickle', 'wb') as f:
    pickle.dump([cont_names, cat_names], f)

In [10]:
train_df = labels_df_w_pain_names.loc[labels_df_w_pain_names.set.isin([0, 1])].sort_values(by='set')

In [11]:
val_idx = [i for i, s in enumerate(train_df.set) if s == 1]

In [12]:
test_df = labels_df_w_pain_names.loc[labels_df_w_pain_names.set == 2]

In [13]:
test_tl = (TabularList.from_df(test_df, 
                        cat_names=cat_names,
                        cont_names=cont_names)
          )

In [19]:
test_tdb = (TabularList.from_df(test_df, 
                        cat_names=cat_names,
                        cont_names=cont_names,
                        procs=[FillMissing, Categorify, Normalize])
           .split_none()
           .label_from_df(['eta_0', 'eta_1', 'eta_2', 'eta_3'], label_cls=FloatList, log=False)
           .databunch()
          )

In [20]:
with open('data/test_tabular_tdb.pickle', 'wb') as f:
    pickle.dump(test_tdb, f)

In [17]:
tdb = (TabularList.from_df(train_df, 
                        cat_names=cat_names,
                        cont_names=cont_names,
                        procs=[FillMissing, Categorify, Normalize])
        .split_by_idx(val_idx)
        .label_from_df(['eta_0', 'eta_1', 'eta_2', 'eta_3'], label_cls=FloatList, log=False)
        .add_test(TabularList.from_df(test_df, 
                        cat_names=cat_names,
                        cont_names=cont_names))
        .databunch())

In [18]:
with open('data/train_tabular_tdb.pickle', 'wb') as f:
    pickle.dump(tdb, f)

In [173]:
tdb.show_batch(5)

pain_meds,narcot,Inferred_KLG,oa_status,sex,ethnicity,race,V00EDCV,V00MARITST,V00LIVENO,V00INCOME,V00CUREMP,V00CEMPLOY,V00HANDED,P02JBMPCV,P02FAMHXKR,P02CNCR3,P02IKPRISK,P02KPN,P02KINJ,P02KSURG,P02ACTRISK,P02PA1,P02PA2,P02PA3,P02PA4,P01KPACT30,P01KPA30CV,P01SXKOA,P01SVXRELK,V00KOOSFX1,V00KOOSFX4,V00KOOSFX5,V00KQOL1,V00KQOL2,V00KQOL3,V00KQOL4,V00KGLRS,P01TMJE6M,P01TJE30CV,P01TJE30WC,P01TMJF6M,P01TJF30CV,P01TJF30WC,P01BP30,P01BPACTCV,P01RHBE,P01RH1CV,P01RH2CV,P01RH3CV,P01RH4CV,P01RH5CV,P01LHBE,P01LH1CV,P01LH2CV,P01LH3CV,P01LH4CV,P01LH5CV,V00KIKBALL,V00CSTSGL,V00CS5,V00CSTREP1,V00CSTREP2,V00WLK20T1,V00WLK20T2,V00WLKAID,V00WALKER,V00HOSPSUR,V00SAFEWLK,V00CANEUSE,V00400MCMP,V00COMP10,V00400EXCL,V00DISCOMF,V00400PAIN,V00PASE1,V00PASE1HR,V00PASE2,V00PASE2HR,V00PASE3,V00PASE4,V00PASE5,V00PASE6,V00HOUACT1,V00HOUACT2,V00HOUACT3,V00HOUACT4,V00HOUACT5,V00HOUACT6,V00WORK7,V00PA130,V00PA130CV,V00PA130NM,V00PA230,V00PA230CV,V00PA330,V00PA330CV,V00PA430,V00PA430CV,V00PA530,V00PA530CV,V00SF1,V00SF2,V00SF3,V00SF4,V00SF5,V00SF6,V00SF7,V00SF8,V00SF9,V00SF10,V00SF11,V00SF12,V00HLTHCAR,V00HLTHCOV,V00MEDINS,P01RASTASV,P01RAIA,P01ARTHOTH,P01OADEGCV,P01OAHIPCV,P01OAHNDCV,P01OABCKCV,P01OAOTHCV,P01GOUTCV,P01OTARTCV,P01ARTDOC,P01ARTDRCV,P02KPMED,P02KPMEDCV,V00TYLEN,V00NSAIDS,V00NSAIDRX,V00COXIBS,V00NARCOT,V00SAME,V00MSM,V00DOXYCYC,V00PNMEDT,V00CHON,V00CHNFQCV,V00GLUC,V00GLCFQCV,V00KNINJ,V00HYINJCV,V00STINJCV,V00RXACTM,V00RXANALG,V00RXASPRN,V00RXBISPH,V00RXCHOND,V00RXCLCTN,V00RXCLCXB,V00RXCOX2,V00RXFLUOR,V00RXGLCSM,V00RXIHYAL,V00RXISTRD,V00RXMSM,V00RXNARC,V00RXNSAID,V00RXNTRAT,V00RXOSTRD,V00RXOTHAN,V00RXRALOX,V00RXRFCXB,V00RXSALIC,V00RXSAME,V00RXTPRTD,V00RXVIT_D,V00RXVLCXB,V00GNRH,V00PTH,V00BISPHOS,V00BISPTYP,V00HRTAT,V00HRTFAIL,V00BYPLEG,V00STROKE,V00ASTHMA,V00LUNG,V00ULCER,V00DIAB,V00KIDFXN,V00KIDTRAN,V00RA,V00POLYRH,V00LIVDAM,V00CANCER,V00CESD1,V00CESD2,V00CESD3,V00CESD4,V00CESD5,V00CESD6,V00CESD7,V00CESD8,V00CESD9,V00CESD10,V00CESD11,V00CESD12,V00CESD13,V00CESD14,V00CESD15,V00CESD16,V00CESD17,V00CESD18,V00CESD19,V00CESD20,V00FALL,V00FALLCV,V00BONEFX,V00SPNFX,V00SMOKE,V00SMOKER,V00PIPE,V00PSMOKER,V00DRNKAMT,V00DRKMORE,V00FFQFLG1,V00FFQFLG2,V00FFQFLG3,V00FFQFLG4,V00FFQFLG5,V00FFQ73,V00VIT1,V00VIT2,V00VIT3,V00VIT4,V00VIT5,V00VIT8,V00VIT9,V00VIT10,V00VIT11,V00VIT12,V00VIT6,V00VIT13,V00VIT7,P01HEIGHT_na,V00ABCIRC_na,V00HT25MM_na,V00WT25KG_na,V00WTMAXKG_na,V00WTMINKG_na,P01KPACDCV_na,V00KOOSFSR_na,P01BPTOT_na,P01BPBEDCV_na,P01BPDAYCV_na,V00RPAVG_na,V00CSPACE_na,V00CSTIME1_na,V00CSTIME2_na,V0020MPACE_na,V00STEPST1_na,V00TIMET1_na,V00STEPST2_na,V00TIMET2_na,V00HRB4WLK_na,V00NUMSTOP_na,V00400MTR_na,V00400MTIM_na,V00HR400WK_na,V00PASE_na,V00WKHR7CV_na,V00HSPSS_na,V00HSMSS_na,V00COMORB_na,V00CESD_na,V00SMKPKYR_na,V00PSMKYR_na,V00DTACAR_na,V00DTAIU_na,V00DTANZN_na,V00DTARE_na,V00DTB1_na,V00DTB12_na,V00DTB6_na,V00DTBCAR_na,V00DTCAFFN_na,V00DTCALC_na,V00DTCARB_na,V00DTCHOL_na,V00DTCRYP_na,V00DTCYST_na,V00DTDAID_na,V00DTDFIB_na,V00DTFAT_na,V00DTFE_na,V00DTFOL_na,V00DTGEN_na,V00DTKCAL_na,V00DTLIN_na,V00DTLUT_na,V00DTLYC_na,V00DTMETH_na,V00DTMG_na,V00DTNA_na,V00DTNIAC_na,V00DTOLEC_na,V00DTPHOS_na,V00DTPOTA_na,V00DTPROA_na,V00DTPROT_na,V00DTRET_na,V00DTRIBO_na,V00DTSFAT_na,V00DTVITC_na,V00DTVITD_na,V00DTVITE_na,V00DTVITK_na,V00DTZINC_na,V00DTSF_na,V00PCTALCH_na,V00PCTCARB_na,V00PCTFAT_na,V00PCTPROT_na,V00PCTSWT_na,V00BAPCARB_na,V00BAPFAT_na,V00BAPPROT_na,V00FIBBEAN_na,V00FIBGRN_na,V00FIBVGFR_na,V00SRVDRY_na,V00SRVFAT_na,V00SRVFRT_na,V00SRVGRN_na,V00SRVMEAT_na,V00SRVVEG_na,V00SUPB1_na,V00SUPB12_na,V00SUPB2_na,V00SUPB6_na,V00SUPBCAR_na,V00SUPCA_na,V00SUPCU_na,V00SUPFE_na,V00SUPFOL_na,V00SUPMG_na,V00SUPNIAC_na,V00SUPSE_na,V00SUPVITA_na,V00SUPVITC_na,V00SUPVITD_na,V00SUPVITE_na,V00SUPZINC_na,V00NERRORS_na,V00NFDSDAY_na,V00NNOSERV_na,V00NWARNS_na,V00PCTCOL1_na,V00PCTCOL9_na,V00PCTLARG_na,V00PCTMEDS_na,V00PCTSMAL_na,V00PCTXLS_na,V00AGE,P01HEIGHT,V00ABCIRC,V00HT25MM,V00WT25KG,V00WTMAXKG,V00WTMINKG,P01KPACDCV,V00KOOSFSR,P01BPTOT,P01BPBEDCV,P01BPDAYCV,V00BPSYS,V00BPDIAS,V00RPAVG,V00CSPACE,V00CSTIME1,V00CSTIME2,V0020MPACE,V00STEPST1,V00TIMET1,V00STEPST2,V00TIMET2,V00HRB4WLK,V00NUMSTOP,V00400MTR,V00400MTIM,V00HR400WK,V00PASE,V00WKHR7CV,V00HSPSS,V00HSMSS,V00COMORB,V00CESD,V00SMKPKYR,V00PSMKYR,V00DTACAR,V00DTAIU,V00DTANZN,V00DTARE,V00DTB1,V00DTB12,V00DTB6,V00DTBCAR,V00DTCAFFN,V00DTCALC,V00DTCARB,V00DTCHOL,V00DTCRYP,V00DTCYST,V00DTDAID,V00DTDFIB,V00DTFAT,V00DTFE,V00DTFOL,V00DTGEN,V00DTKCAL,V00DTLIN,V00DTLUT,V00DTLYC,V00DTMETH,V00DTMG,V00DTNA,V00DTNIAC,V00DTOLEC,V00DTPHOS,V00DTPOTA,V00DTPROA,V00DTPROT,V00DTRET,V00DTRIBO,V00DTSFAT,V00DTVITC,V00DTVITD,V00DTVITE,V00DTVITK,V00DTZINC,V00DTSF,V00PCTALCH,V00PCTCARB,V00PCTFAT,V00PCTPROT,V00PCTSWT,V00BAPCARB,V00BAPFAT,V00BAPPROT,V00FIBBEAN,V00FIBGRN,V00FIBVGFR,V00SRVDRY,V00SRVFAT,V00SRVFRT,V00SRVGRN,V00SRVMEAT,V00SRVVEG,V00SUPB1,V00SUPB12,V00SUPB2,V00SUPB6,V00SUPBCAR,V00SUPCA,V00SUPCU,V00SUPFE,V00SUPFOL,V00SUPMG,V00SUPNIAC,V00SUPSE,V00SUPVITA,V00SUPVITC,V00SUPVITD,V00SUPVITE,V00SUPZINC,V00NERRORS,V00NFDSDAY,V00NNOSERV,V00NSKIP,V00NWARNS,V00PCTCOL1,V00PCTCOL9,V00PCTLARG,V00PCTMEDS,V00PCTSMAL,V00PCTXLS,target
#na#,#na#,#na#,#na#,F,Not Hispanic or Latino,White,5: Graduate degree,1: Married,2: Two,4: $50K to < $100K,1: Yes,1: Works for pay,1: Right handed,0: No bumps either hand,0: No,0: No,2: Frequent pain at least one knee at IEI,1: Yes,0: No,0: No,1: Yes,1: Yes,0: No,0: No,1: Yes,1: Yes,1: Yes,3: Both knees,3: Both,1: Mild,1: Mild,1: Mild,2: Weekly,0: Not at all,0: Not at all,1: Mild,1: 1,0: No,0: No,0: Never,0: No,0: No,0: Never,1: Yes,0: No,0: 0,0: Normal,#na#,0: Normal,0: Normal,0: Normal,0: 0,0: Normal,0: Normal,0: Normal,0: Normal,0: Normal,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,1: Yes,3: Both,3: Often (5-7 days),3: 2-4 hours,3: Often (5-7 days),2: 1 hour but less than 2 hours,0: Never,0: Never,0: Never,0: Never,1: Yes,1: Yes,0: No,0: No,0: No,1: Yes,1: Yes,1: Yes,2: 2-3 days per week,5: More than 6 flights,1: Yes,1: One day per week or less,0: No,0: None,1: Yes,4: Nearly every day or every day,0: No,0: None,2: Very Good,3: Not limited at all,3: Not limited at all,5: None of the time,5: None of the time,5: None of the time,5: None of the time,2: A little bit,2: Most of the time,2: Most of the time,5: None of the time,5: None of the time,1: Private Doctor,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,1: Yes,4: Nearly every day or every day,1: Yes,4: Nearly every day or every day,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),0: No,0: None,0: No,0: No,0: No,0: Never,0: No,0: Never,2: 1-3 drinks/week,0: No,0: No,0: No,0: No,1: Yes,0: No,"0: No, not regularly",0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,-0.9136,0.0476,1.4592,0.0932,-0.3509,0.4957,0.0485,-0.1409,-0.7584,-0.2208,-0.076,-0.2174,-0.2928,-0.9196,2.4547,-0.3484,-0.0283,0.074,2.738,-1.3895,-1.8517,-1.2811,-1.727,1.0078,-0.0582,0.0674,-1.2732,0.9137,1.5697,1.8369,0.1053,0.5347,-0.4204,-0.547,-0.5287,-0.2455,1.9016,2.2366,0.2417,2.3958,1.0082,1.3868,1.3839,2.1246,-0.3112,1.9323,1.5752,1.5276,-0.8705,1.4379,-0.1927,2.6016,1.5074,1.6462,2.0934,-0.1927,1.636,1.6796,1.7373,1.1784,1.6161,2.4656,2.4014,1.5404,1.4491,2.3197,2.1371,2.133,1.7031,1.6991,1.9774,1.4022,0.7529,0.2387,2.2849,2.4304,1.6804,2.7526,-0.2824,-0.0637,0.3608,0.1475,-0.1173,-0.0701,0.2455,0.0293,1.5276,1.9393,2.3395,1.2551,0.0254,0.0279,1.6538,1.0604,3.3503,-0.6243,-1.0488,-0.6595,-0.9658,-0.3962,-1.0312,-1.2564,-0.892,-1.1867,-1.255,-0.7093,-0.5304,-1.1841,-0.6073,-1.3241,-0.6506,-0.9113,-0.1793,2.6986,-0.228,-0.2281,-0.4243,0.9267,-0.4142,0.8142,0.2569,-1.1976,0.7502,[ 6.899352 1.313701 -1.579637 0.306282]
0,0.0,2.0,1.0,F,Not Hispanic or Latino,White,5: Graduate degree,3: Divorced,0: Zero,5: $100K or greater,1: Yes,1: Works for pay,2: Left handed,0: No bumps either hand,0: No,0: No,1: Infrequent knee pain only at IEI,1: Yes,1: Yes,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: No,0: Neither,3: Both,2: Moderate,1: Mild,0: None,1: Monthly,0: Not at all,0: Not at all,0: None,0: Very good,0: No,0: No,0: Never,0: No,0: No,0: Never,0: No,0: No,0: 0,0: Normal,0: Normal,0: Normal,0: Normal,0: Normal,0: 0,0: Normal,0: Normal,0: Normal,0: Normal,0: Normal,3: Both right and left,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,0: Neither,3: Often (5-7 days),2: 1 hour but less than 2 hours,3: Often (5-7 days),2: 1 hour but less than 2 hours,1: Seldom (1-2 days),0: Never,0: Never,0: Never,1: Yes,1: Yes,0: No,1: Yes,1: Yes,1: Yes,1: Yes,1: Yes,3: 4-5 days per week,5: More than 6 flights,1: Yes,1: One day per week or less,0: No,0: None,1: Yes,1: One day per week or less,0: No,0: None,2: Very Good,3: Not limited at all,3: Not limited at all,5: None of the time,5: None of the time,5: None of the time,5: None of the time,1: Not at all,2: Most of the time,2: Most of the time,5: None of the time,5: None of the time,3: HMO,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),0: No,0: None,0: No,0: No,1: Yes,2: Former,0: No,0: Never,1: <1/week,1: Yes,0: No,0: No,0: No,0: No,0: No,"1: Yes, fairly regularly",1: Yes,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,1: Yes,1: Yes,0: No,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,-0.3713,-0.4222,2.2672,-0.4363,-0.3509,1.454,-0.5493,-0.3074,-0.4597,-0.2208,-0.076,-0.2174,-0.2928,-0.711,0.4981,-0.1412,-0.25,-0.1439,0.0959,-0.0341,-0.2633,0.0862,-0.1774,0.311,-0.0582,0.0674,1.0362,0.4073,2.2885,1.8369,0.5532,0.3975,-0.4204,-0.8734,-0.3943,-0.2455,0.2427,0.3483,-0.2581,0.2044,-0.6116,-0.5043,0.0115,0.2904,-0.3965,-0.8218,-0.6593,0.3988,0.0058,-0.2423,-0.1927,0.3506,-0.0743,-0.3954,-0.1385,-0.1927,-0.3869,0.5776,0.1982,0.2169,0.02,-0.3494,-0.3099,-0.1789,-0.196,-0.4723,-0.0704,0.4198,-0.0053,-0.8077,-0.7407,-0.3445,0.7338,-0.6971,0.7797,0.8886,-0.4185,0.5763,-0.0979,-0.8102,0.7567,0.9344,-0.8915,-0.8298,0.7093,0.8815,1.2687,-1.0812,1.1495,-1.0101,-0.2559,0.4903,-0.9653,0.5812,1.375,-0.0948,0.3238,-0.0652,0.2285,-0.1103,1.3677,0.9399,0.3339,0.3788,0.9362,-0.0213,-0.0623,0.7065,0.6183,1.4478,-0.5211,0.2687,-0.1793,0.1121,-0.228,-0.2281,-0.4243,-0.4335,-0.8974,0.6272,0.3687,-0.9445,0.2731,[-5.197373 -0.136162 0.023061 0.040672]
0,0.0,0.0,0.0,F,Not Hispanic or Latino,White,3: College graduate,1: Married,1: One,4: $50K to < $100K,0: No,3: Not working in part due to health,1: Right handed,3: Both hands,0: No,0: No,0: No knee pain either knee at IEI,0: No,0: No,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: No,0: Neither,3: Both,0: None,1: Mild,0: None,0: Never,0: Not at all,0: Not at all,0: None,0: Very good,0: No,0: No,0: Never,0: No,0: No,0: Never,1: Yes,0: No,5: 5,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,3: 3,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,0: Normal,0: Normal,2: Left,2: Stands using arms,0: No,#na#,#na#,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,1: Yes,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,0: Neither,3: Often (5-7 days),3: 2-4 hours,3: Often (5-7 days),2: 1 hour but less than 2 hours,0: Never,0: Never,0: Never,2: Sometimes (3-4 days),1: Yes,1: Yes,0: No,0: No,1: Yes,0: No,0: No,0: No,0: None,2: 1 to 2 flights,0: No,0: None,0: No,0: None,0: No,0: None,1: Yes,1: One day per week or less,3: Good,"2: Yes, limited a little",3: Not limited at all,3: Some of the time,4: A little of the time,2: Most of the time,1: All of the time,1: Not at all,1: All of the time,2: Most of the time,3: Some of the time,4: A little of the time,1: Private Doctor,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,1: Yes,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,1: Yes,1: Alendronate only,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,#na#,0: No,0: No,3: Much of the time (3-4 days),2: Some of the time (1-2 days),4: Most or all of the time (5-7 days),2: Some of the time (1-2 days),2: Some of the time (1-2 days),4: Most or all of the time (5-7 days),4: Most or all of the time (5-7 days),3: Much of the time (3-4 days),#na#,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),4: Most or all of the time (5-7 days),4: Most or all of the time (5-7 days),2: Some of the time (1-2 days),3: Much of the time (3-4 days),4: Most or all of the time (5-7 days),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),0: No,0: None,0: No,0: No,0: No,0: Never,0: No,0: Never,1: <1/week,1: Yes,0: No,0: No,0: No,0: No,0: No,"1: Yes, fairly regularly",1: Yes,#na#,#na#,#na#,#na#,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.8216,-1.7332,-1.1432,-1.76,-1.4054,-1.9694,-1.4156,-0.3074,0.4364,-0.2208,-0.076,-0.2174,0.4909,0.5406,-1.4585,-0.1412,-0.1324,-0.1185,0.5944,0.237,-0.7877,0.6331,-0.4095,-0.8504,-0.0582,0.0674,0.4776,0.0907,-0.5743,-1.0317,0.2421,-2.2079,0.9277,4.8388,-0.5287,-0.2455,-0.7722,-0.8753,-0.9501,-0.9872,-0.0717,-0.9108,-1.2236,-0.8794,-0.4353,-0.5937,-0.4586,-1.2089,4.5139,-0.945,-0.1927,-1.0889,-1.1876,-1.2677,-0.13,-0.1927,-0.9836,-1.1191,-0.3402,-1.0926,-1.148,-0.5646,-1.4317,-1.2755,-1.1923,-0.97,-0.2443,-0.8088,-1.1213,-0.971,-1.0427,-1.1421,2.4608,-1.1376,-0.7918,-0.7287,-1.1181,-0.9718,-0.5377,1.8518,-1.5841,-0.763,-0.9826,1.7859,-1.7384,-0.9441,-0.7436,-0.6767,-0.9444,-0.3923,0.3067,-0.4345,-0.5833,-1.3358,-0.9145,-0.0948,0.3238,-0.0652,0.2285,-0.1103,-0.1485,0.9399,0.3339,0.3788,0.9362,-0.0213,-0.0623,0.7065,0.6183,0.5238,-0.2126,0.2687,-0.1793,-1.7242,-0.228,-0.2281,2.2426,0.9267,0.5521,-0.2724,-0.8234,1.2063,-0.5985,[2.226434 0.285417 0.021913 0.169428]
0,0.0,0.0,0.0,F,Not Hispanic or Latino,White,5: Graduate degree,1: Married,3: Three,5: $100K or greater,1: Yes,1: Works for pay,3: Ambidextrous,0: No bumps either hand,0: No,0: No,1: Infrequent knee pain only at IEI,1: Yes,0: No,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,1: Yes,0: Neither,3: Both,1: Mild,1: Mild,#na#,2: Weekly,0: Not at all,0: Not at all,1: Mild,0: Very good,0: No,0: No,0: Never,0: No,0: No,0: Never,1: Yes,0: No,1: 1,0: Normal,0: Normal,0: Normal,1: Bony enlargement,#na#,1: 1,0: Normal,0: Normal,0: Normal,0: Normal,1: Bony enlargement,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,0: Neither,3: Often (5-7 days),3: 2-4 hours,3: Often (5-7 days),2: 1 hour but less than 2 hours,0: Never,1: Seldom (1-2 days),1: Seldom (1-2 days),0: Never,1: Yes,0: No,1: Yes,1: Yes,0: No,0: No,1: Yes,1: Yes,4: Nearly every day or every day,5: More than 6 flights,0: No,0: None,0: No,0: None,0: No,0: None,0: No,0: None,1: Excellent,3: Not limited at all,3: Not limited at all,5: None of the time,5: None of the time,2: Most of the time,3: Some of the time,1: Not at all,3: Some of the time,3: Some of the time,3: Some of the time,5: None of the time,1: Private Doctor,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),2: Some of the time (1-2 days),2: Some of the time (1-2 days),2: Some of the time (1-2 days),3: Much of the time (3-4 days),3: Much of the time (3-4 days),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),0: No,0: None,0: No,0: No,0: No,0: Never,0: No,0: Never,2: 1-3 drinks/week,0: No,0: No,0: No,0: No,0: No,0: No,"0: No, not regularly",0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,-1.1305,-0.9138,-0.65,-0.9658,-0.705,-1.1497,-0.3501,-0.3074,-0.382,-0.2208,-0.076,-0.2174,-1.2072,0.332,0.0633,1.0328,-0.5523,-0.9934,-0.6469,0.237,0.7203,-0.1873,0.2277,-0.5407,-0.0582,0.0674,0.5367,-0.9856,-0.4008,0.8807,2.0368,-2.8562,0.9277,1.9011,-0.5287,-0.2455,-0.7702,-0.8995,-0.681,-1.0288,-1.1515,-0.742,-1.2236,-0.7802,-0.4353,-1.2702,-1.2573,-0.8232,-0.6854,-0.988,-0.1927,-1.0758,-0.5319,-0.9707,-1.0612,-0.1927,-1.0305,-0.1221,-0.379,-0.4617,-0.9915,-1.3224,-0.8823,-0.8016,-0.5525,-1.3786,-1.3077,-0.8223,-1.0411,-1.0921,-1.1937,-0.6474,-1.1316,-1.0721,-0.6148,-0.5144,-0.9082,-1.2556,0.0014,-1.0757,1.3279,-0.2896,-1.0054,-1.1621,1.3263,-0.3202,-0.5817,-0.485,-1.1889,-1.1131,-0.1621,-1.2436,-0.4196,-0.6649,-1.0043,-0.6243,-1.0488,-0.6595,-0.9658,-0.3962,-1.0312,-1.2564,-0.892,-1.1867,-1.255,-0.7093,-0.5304,-1.1841,-0.6073,-1.3241,-0.6506,-0.9113,-0.1793,-1.1577,-0.228,-0.2281,2.2426,-0.7739,-0.8974,-0.9965,0.4883,0.6941,-0.5985,[ 1.137611 1.259054 -0.068522 -0.546473]
0,0.0,1.0,0.0,M,Not Hispanic or Latino,White,5: Graduate degree,3: Divorced,0: Zero,3: $25K to < $50K,1: Yes,1: Works for pay,1: Right handed,0: No bumps either hand,0: No,0: No,0: No knee pain either knee at IEI,0: No,0: No,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: No,0: Neither,3: Both,0: None,#na#,0: None,1: Monthly,0: Not at all,0: Not at all,0: None,0: Very good,0: No,0: No,0: Never,0: No,0: No,0: Never,1: Yes,0: No,0: 0,0: Normal,0: Normal,0: Normal,0: Normal,0: Normal,0: 0,0: Normal,0: Normal,0: Normal,0: Normal,0: Normal,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,0: Neither,3: Often (5-7 days),3: 2-4 hours,1: Seldom (1-2 days),1: Less than 1 hour,0: Never,0: Never,0: Never,0: Never,1: Yes,0: No,0: No,0: No,1: Yes,0: No,1: Yes,1: Yes,4: Nearly every day or every day,5: More than 6 flights,1: Yes,1: One day per week or less,0: No,0: None,1: Yes,2: 2-3 days per week,1: Yes,1: One day per week or less,3: Good,3: Not limited at all,3: Not limited at all,5: None of the time,5: None of the time,4: A little of the time,5: None of the time,1: Not at all,2: Most of the time,3: Some of the time,4: A little of the time,5: None of the time,1: Private Doctor,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,0: No,0: No,#na#,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),2: Some of the time (1-2 days),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),3: Much of the time (3-4 days),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Yes,1: One,0: No,0: No,0: No,0: Never,0: No,0: Never,2: 1-3 drinks/week,1: Yes,0: No,0: No,0: No,0: No,0: No,"1: Yes, fairly regularly",1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,-0.1544,1.8558,1.2466,1.9464,2.2739,2.0024,2.1709,-0.3074,0.3647,-0.2208,-0.076,-0.2174,-0.8153,0.7492,2.0199,-0.7628,0.2404,0.811,-1.5791,-0.0341,1.5432,0.0862,1.6435,1.6272,-0.0582,0.0674,0.7442,-0.0992,-1.4542,1.3588,0.4758,-0.6524,-0.4204,0.5954,-0.5287,-0.2455,-0.5828,-0.7678,-1.027,-0.8621,-1.3315,-0.9625,-1.3609,-0.8204,-0.3527,-0.7288,-1.2838,-1.1729,0.0058,-1.492,-0.1927,-1.1543,-1.2524,-1.2491,-1.2152,-0.1927,-1.388,-1.364,-0.6206,-0.7737,-1.2799,-1.4479,-1.4823,-1.3296,-1.2105,-1.294,-1.2825,-0.7108,-1.3812,-0.8203,-1.1937,-1.0715,-0.2649,-0.7263,-0.8804,-0.5783,-1.328,-1.0542,0.6824,0.2852,-0.7708,-0.3891,-0.2539,0.6247,-0.6009,-0.17,-0.7382,-1.2624,-0.63,-0.2894,-1.1936,-0.3189,-1.3472,-1.24,-0.69,-0.0948,0.3238,-0.0652,0.2285,-0.1103,-0.7552,0.9399,0.3339,0.3788,0.9362,-0.0213,-0.0623,0.7065,-0.4759,0.5238,-0.5211,0.2687,5.5764,-1.7711,0.3336,-0.2281,-0.4243,0.4162,-0.8974,0.829,0.3209,-0.9832,0.0371,[ 0.820574 -1.245313 1.110883 -0.132854]


In [174]:
with open('data/labels_df_w_pain_names_ncols_362.pickle', 'wb') as f:
    pickle.dump(tdb, f)

# Now labels dataframe that doesn't contain current pain-related var

In [34]:
labels_df_wo_pain_names = tmp.drop(['ID', 'side', 'ID_side', 'most_likely',
                                                      'pp_0', 'pp_1', 'pp_2',
                                                      'womac', 'koos', 'V00KOOSQOL', 'age_months'] , axis=1)

In [35]:
labels_df_wo_pain_names = labels_df_wo_pain_names.set_index('dess_path')

In [36]:
labels_df_wo_pain_names.iloc[:5, :10]

Unnamed: 0_level_0,pain_meds,narcot,Inferred_KLG,oa_status,set,sex,ethnicity,race,V00AGE,V00EDCV
dess_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
9477175_LEFT_0.mat,0,0.0,1.0,0.0,2.0,F,Not Hispanic or Latino,White,72.0,4: Some graduate school
9794339_LEFT_0.mat,0,0.0,1.0,0.0,0.0,F,Not Hispanic or Latino,White,61.0,5: Graduate degree
9282182_RIGHT_0.mat,0,0.0,0.0,0.0,0.0,F,Not Hispanic or Latino,White,54.0,3: College graduate
9359202_LEFT_0.mat,0,0.0,3.0,1.0,1.0,M,Not Hispanic or Latino,White,69.0,5: Graduate degree
9754338_RIGHT_0.mat,0,0.0,1.0,0.0,2.0,F,Not Hispanic or Latino,White,62.0,2: Some college


In [37]:
labels_df_wo_pain_names = labels_df_wo_pain_names.dropna(axis=1, thresh=3000)
labels_df_wo_pain_names.shape

(4077, 368)

In [38]:
labels_df_wo_pain_names.to_pickle('data/labels_df_wo_pain_names_ncols_368.pickle')

In [176]:
labels_df_wo_pain_names = pd.read_pickle('data/labels_df_wo_pain_names_ncols_368.pickle')

In [183]:
tmp = pd.read_pickle('data/labels_df_wo_pain_names_ncols_368.pickle')
tmp = tmp.sort_values(by='P01BMI', ascending=False)
tmp.iloc[40:60]

Unnamed: 0_level_0,pain_meds,narcot,Inferred_KLG,oa_status,set,sex,ethnicity,race,V00AGE,V00EDCV,...,V00VIT10,V00VIT11,V00VIT12,V00VIT6,V00VIT13,V00VIT7,eta_0,eta_1,eta_2,eta_3
dess_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9450507_RIGHT_0.mat,0.0,0.0,0.0,0.0,0.0,F,Not Hispanic or Latino,White,53.0,3: College graduate,...,0: No,0: No,0: No,0: No,0: No,0: No,9.234454,0.247862,-0.41083,0.337508
9878596_LEFT_0.mat,0.0,0.0,3.0,1.0,0.0,F,Not Hispanic or Latino,Black or African American,66.0,5: Graduate degree,...,0: No,0: No,0: No,0: No,0: No,0: No,2.071753,2.372712,-0.517709,1.22566
9878596_RIGHT_0.mat,0.0,0.0,3.0,1.0,0.0,F,Not Hispanic or Latino,Black or African American,66.0,5: Graduate degree,...,0: No,0: No,0: No,0: No,0: No,0: No,7.431667,-8.058194,-1.015473,-1.031921
9847829_RIGHT_0.mat,,,,,0.0,M,Not Hispanic or Latino,White,59.0,5: Graduate degree,...,0: No,0: No,0: No,0: No,0: No,0: No,24.902867,-3.02733,2.353961,0.564103
9355016_RIGHT_0.mat,0.0,0.0,0.0,0.0,0.0,F,Not Hispanic or Latino,White,58.0,5: Graduate degree,...,0: No,0: No,0: No,0: No,0: No,0: No,-4.731232,-0.203121,0.40039,-0.059551
9064631_LEFT_0.mat,0.0,0.0,1.0,0.0,1.0,F,Not Hispanic or Latino,White,54.0,2: Some college,...,0: No,0: No,0: No,1: Yes,0: No,0: No,7.117206,-3.486272,-0.530693,-0.261248
9355016_LEFT_0.mat,0.0,0.0,1.0,0.0,0.0,F,Not Hispanic or Latino,White,58.0,5: Graduate degree,...,0: No,0: No,0: No,0: No,0: No,0: No,5.09189,0.927082,3.584981,0.67656
9464409_RIGHT_0.mat,0.0,0.0,2.0,1.0,0.0,F,Not Hispanic or Latino,White,58.0,5: Graduate degree,...,0: No,0: No,0: No,1: Yes,1: Yes,0: No,-5.197373,-0.136162,0.023061,0.040672
9464409_LEFT_0.mat,0.0,0.0,2.0,1.0,0.0,F,Not Hispanic or Latino,White,58.0,5: Graduate degree,...,0: No,0: No,0: No,1: Yes,1: Yes,0: No,-3.853341,0.956026,-0.837154,-0.320773
9064631_RIGHT_0.mat,0.0,0.0,1.0,0.0,1.0,F,Not Hispanic or Latino,White,54.0,2: Some college,...,0: No,0: No,0: No,1: Yes,0: No,0: No,11.54183,-5.315825,1.420181,-0.440288


In [188]:
tmp.loc['9990698_RIGHT_0.mat']

pain_meds                                         0
narcot                                            0
Inferred_KLG                                      0
oa_status                                         0
set                                               0
sex                                               F
ethnicity                    Not Hispanic or Latino
race                                          White
V00AGE                                           51
V00EDCV                          5: Graduate degree
V00MARITST                               1: Married
V00LIVENO                                    1: One
V00INCOME                        4: $50K to < $100K
V00CUREMP                                    1: Yes
V00CEMPLOY                         1: Works for pay
V00HANDED                           1: Right handed
P01BMI                                         16.9
P01HEIGHT                                      1643
P01WEIGHT                                      45.7
V00ABCIRC   

In [20]:
labels_df_wo_pain_names.iloc[:5, :15]

Unnamed: 0_level_0,pain_meds,narcot,Inferred_KLG,oa_status,set,sex,ethnicity,race,V00AGE,V00EDCV,V00MARITST,V00LIVENO,V00INCOME,V00CUREMP,V00CEMPLOY
dess_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
9477175_LEFT_0.mat,0,0,1,0,2.0,F,Not Hispanic or Latino,White,72.0,4: Some graduate school,1: Married,1: One,5: $100K or greater,1: Yes,1: Works for pay
9794339_LEFT_0.mat,0,0,1,0,0.0,F,Not Hispanic or Latino,White,61.0,5: Graduate degree,1: Married,1: One,,1: Yes,1: Works for pay
9282182_RIGHT_0.mat,0,0,0,0,0.0,F,Not Hispanic or Latino,White,54.0,3: College graduate,1: Married,2: Two,5: $100K or greater,1: Yes,1: Works for pay
9359202_LEFT_0.mat,0,0,3,1,1.0,M,Not Hispanic or Latino,White,69.0,5: Graduate degree,1: Married,1: One,4: $50K to < $100K,0: No,4: Not working other reasons
9754338_RIGHT_0.mat,0,0,1,0,2.0,F,Not Hispanic or Latino,White,62.0,2: Some college,1: Married,1: One,4: $50K to < $100K,1: Yes,1: Works for pay


In [21]:
labels_df_wo_pain_names.loc[:, ['narcot', 'Inferred_KLG', 'oa_status']] = labels_df_wo_pain_names.loc[:, 
                                                                          ['narcot', 'Inferred_KLG', 'oa_status']].astype('O')

In [22]:
labels_df_wo_pain_names.to_pickle('data/labels_df_wo_pain_names_ncols_368.pickle')

In [53]:
labels_df_wo_pain_names = pd.read_pickle('data/labels_df_wo_pain_names_ncols_368.pickle')

In [54]:
labels_df_wo_pain_names = labels_df_wo_pain_names.drop(['P01BMI', 'P01WEIGHT'], 1)

In [55]:
# labels_df_wo_pain_names = labels_df_wo_pain_names.set_index('dess_path')
cat_names = []
cont_names = []
label_names = ['set', 'eta_0', 'eta_1', 'eta_2', 'eta_3']

for n, t in zip(labels_df_wo_pain_names.columns, labels_df_wo_pain_names.dtypes):
    if t == 'O' and n not in label_names:
        cat_names.append(n)
    elif t != 'O' and n not in label_names:
        cont_names.append(n)
    else:
        print("Not assigning types: {}".format(n))

Not assigning types: set
Not assigning types: eta_0
Not assigning types: eta_1
Not assigning types: eta_2
Not assigning types: eta_3


In [56]:
cont_names

['V00AGE',
 'P01HEIGHT',
 'V00ABCIRC',
 'V00HT25MM',
 'V00WT25KG',
 'V00WTMAXKG',
 'V00WTMINKG',
 'P01KPACDCV',
 'V00KOOSFSR',
 'P01BPTOT',
 'P01BPBEDCV',
 'P01BPDAYCV',
 'V00BPSYS',
 'V00BPDIAS',
 'V00RPAVG',
 'V00CSPACE',
 'V00CSTIME1',
 'V00CSTIME2',
 'V0020MPACE',
 'V00STEPST1',
 'V00TIMET1',
 'V00STEPST2',
 'V00TIMET2',
 'V00HRB4WLK',
 'V00NUMSTOP',
 'V00400MTR',
 'V00400MTIM',
 'V00HR400WK',
 'V00PASE',
 'V00WKHR7CV',
 'V00HSPSS',
 'V00HSMSS',
 'V00COMORB',
 'V00CESD',
 'V00SMKPKYR',
 'V00PSMKYR',
 'V00DTACAR',
 'V00DTAIU',
 'V00DTANZN',
 'V00DTARE',
 'V00DTB1',
 'V00DTB12',
 'V00DTB6',
 'V00DTBCAR',
 'V00DTCAFFN',
 'V00DTCALC',
 'V00DTCARB',
 'V00DTCHOL',
 'V00DTCRYP',
 'V00DTCYST',
 'V00DTDAID',
 'V00DTDFIB',
 'V00DTFAT',
 'V00DTFE',
 'V00DTFOL',
 'V00DTGEN',
 'V00DTKCAL',
 'V00DTLIN',
 'V00DTLUT',
 'V00DTLYC',
 'V00DTMETH',
 'V00DTMG',
 'V00DTNA',
 'V00DTNIAC',
 'V00DTOLEC',
 'V00DTPHOS',
 'V00DTPOTA',
 'V00DTPROA',
 'V00DTPROT',
 'V00DTRET',
 'V00DTRIBO',
 'V00DTSFAT',
 'V00D

In [57]:
cat_names

['pain_meds',
 'narcot',
 'Inferred_KLG',
 'oa_status',
 'sex',
 'ethnicity',
 'race',
 'V00EDCV',
 'V00MARITST',
 'V00LIVENO',
 'V00INCOME',
 'V00CUREMP',
 'V00CEMPLOY',
 'V00HANDED',
 'P02JBMPCV',
 'P02FAMHXKR',
 'P02CNCR3',
 'P02IKPRISK',
 'P02KPN',
 'P02KINJ',
 'P02KSURG',
 'P02ACTRISK',
 'P02PA1',
 'P02PA2',
 'P02PA3',
 'P02PA4',
 'P01KPACT30',
 'P01KPA30CV',
 'P01SXKOA',
 'P01SVXRELK',
 'V00KOOSFX1',
 'V00KOOSFX4',
 'V00KOOSFX5',
 'V00KQOL1',
 'V00KQOL2',
 'V00KQOL3',
 'V00KQOL4',
 'V00KGLRS',
 'P01TMJE6M',
 'P01TJE30CV',
 'P01TJE30WC',
 'P01TMJF6M',
 'P01TJF30CV',
 'P01TJF30WC',
 'P01BP30',
 'P01BPACTCV',
 'P01RHBE',
 'P01RH1CV',
 'P01RH2CV',
 'P01RH3CV',
 'P01RH4CV',
 'P01RH5CV',
 'P01LHBE',
 'P01LH1CV',
 'P01LH2CV',
 'P01LH3CV',
 'P01LH4CV',
 'P01LH5CV',
 'V00KIKBALL',
 'V00CSTSGL',
 'V00CS5',
 'V00CSTREP1',
 'V00CSTREP2',
 'V00WLK20T1',
 'V00WLK20T2',
 'V00WLKAID',
 'V00WALKER',
 'V00HOSPSUR',
 'V00SAFEWLK',
 'V00CANEUSE',
 'V00400MCMP',
 'V00COMP10',
 'V00400EXCL',
 'V00DISC

In [58]:
with open('data/cont_cat_names_labels_df_wo_pain_names.pickle', 'wb') as f:
    pickle.dump([cont_names, cat_names], f)

In [59]:
train_df = labels_df_wo_pain_names.loc[labels_df_wo_pain_names.set.isin([0, 1])].sort_values(by='set')

In [60]:
val_idx = [i for i, s in enumerate(train_df.set) if s == 1]

In [61]:
test_df = labels_df_wo_pain_names.loc[labels_df_wo_pain_names.set == 2]

In [62]:
test_tl = (TabularList.from_df(test_df, 
                        cat_names=cat_names,
                        cont_names=cont_names)
          )

In [63]:
tdb = (TabularList.from_df(train_df, 
                        cat_names=cat_names,
                        cont_names=cont_names,
                        procs=[FillMissing, Categorify, Normalize])
        .split_by_idx(val_idx)
        .label_from_df(['eta_0', 'eta_1', 'eta_2', 'eta_3'], label_cls=FloatList, log=False)
        .add_test(TabularList.from_df(test_df, 
                        cat_names=cat_names,
                        cont_names=cont_names))
        .databunch())

In [64]:
tdb.show_batch(5)

pain_meds,narcot,Inferred_KLG,oa_status,sex,ethnicity,race,V00EDCV,V00MARITST,V00LIVENO,V00INCOME,V00CUREMP,V00CEMPLOY,V00HANDED,P02JBMPCV,P02FAMHXKR,P02CNCR3,P02IKPRISK,P02KPN,P02KINJ,P02KSURG,P02ACTRISK,P02PA1,P02PA2,P02PA3,P02PA4,P01KPACT30,P01KPA30CV,P01SXKOA,P01SVXRELK,V00KOOSFX1,V00KOOSFX4,V00KOOSFX5,V00KQOL1,V00KQOL2,V00KQOL3,V00KQOL4,V00KGLRS,P01TMJE6M,P01TJE30CV,P01TJE30WC,P01TMJF6M,P01TJF30CV,P01TJF30WC,P01BP30,P01BPACTCV,P01RHBE,P01RH1CV,P01RH2CV,P01RH3CV,P01RH4CV,P01RH5CV,P01LHBE,P01LH1CV,P01LH2CV,P01LH3CV,P01LH4CV,P01LH5CV,V00KIKBALL,V00CSTSGL,V00CS5,V00CSTREP1,V00CSTREP2,V00WLK20T1,V00WLK20T2,V00WLKAID,V00WALKER,V00HOSPSUR,V00SAFEWLK,V00CANEUSE,V00400MCMP,V00COMP10,V00400EXCL,V00DISCOMF,V00400PAIN,V00PASE1,V00PASE1HR,V00PASE2,V00PASE2HR,V00PASE3,V00PASE4,V00PASE5,V00PASE6,V00HOUACT1,V00HOUACT2,V00HOUACT3,V00HOUACT4,V00HOUACT5,V00HOUACT6,V00WORK7,V00PA130,V00PA130CV,V00PA130NM,V00PA230,V00PA230CV,V00PA330,V00PA330CV,V00PA430,V00PA430CV,V00PA530,V00PA530CV,V00SF1,V00SF2,V00SF3,V00SF4,V00SF5,V00SF6,V00SF7,V00SF8,V00SF9,V00SF10,V00SF11,V00SF12,V00HLTHCAR,V00HLTHCOV,V00MEDINS,P01RASTASV,P01RAIA,P01ARTHOTH,P01OADEGCV,P01OAHIPCV,P01OAHNDCV,P01OABCKCV,P01OAOTHCV,P01GOUTCV,P01OTARTCV,P01ARTDOC,P01ARTDRCV,P02KPMED,P02KPMEDCV,V00TYLEN,V00NSAIDS,V00NSAIDRX,V00COXIBS,V00NARCOT,V00SAME,V00MSM,V00DOXYCYC,V00PNMEDT,V00CHON,V00CHNFQCV,V00GLUC,V00GLCFQCV,V00KNINJ,V00HYINJCV,V00STINJCV,V00RXACTM,V00RXANALG,V00RXASPRN,V00RXBISPH,V00RXCHOND,V00RXCLCTN,V00RXCLCXB,V00RXCOX2,V00RXFLUOR,V00RXGLCSM,V00RXIHYAL,V00RXISTRD,V00RXMSM,V00RXNARC,V00RXNSAID,V00RXNTRAT,V00RXOSTRD,V00RXOTHAN,V00RXRALOX,V00RXRFCXB,V00RXSALIC,V00RXSAME,V00RXTPRTD,V00RXVIT_D,V00RXVLCXB,V00GNRH,V00PTH,V00BISPHOS,V00BISPTYP,V00HRTAT,V00HRTFAIL,V00BYPLEG,V00STROKE,V00ASTHMA,V00LUNG,V00ULCER,V00DIAB,V00KIDFXN,V00KIDTRAN,V00RA,V00POLYRH,V00LIVDAM,V00CANCER,V00CESD1,V00CESD2,V00CESD3,V00CESD4,V00CESD5,V00CESD6,V00CESD7,V00CESD8,V00CESD9,V00CESD10,V00CESD11,V00CESD12,V00CESD13,V00CESD14,V00CESD15,V00CESD16,V00CESD17,V00CESD18,V00CESD19,V00CESD20,V00FALL,V00FALLCV,V00BONEFX,V00SPNFX,V00SMOKE,V00SMOKER,V00PIPE,V00PSMOKER,V00DRNKAMT,V00DRKMORE,V00FFQFLG1,V00FFQFLG2,V00FFQFLG3,V00FFQFLG4,V00FFQFLG5,V00FFQ73,V00VIT1,V00VIT2,V00VIT3,V00VIT4,V00VIT5,V00VIT8,V00VIT9,V00VIT10,V00VIT11,V00VIT12,V00VIT6,V00VIT13,V00VIT7,P01HEIGHT_na,V00ABCIRC_na,V00HT25MM_na,V00WT25KG_na,V00WTMAXKG_na,V00WTMINKG_na,P01KPACDCV_na,V00KOOSFSR_na,P01BPTOT_na,P01BPBEDCV_na,P01BPDAYCV_na,V00RPAVG_na,V00CSPACE_na,V00CSTIME1_na,V00CSTIME2_na,V0020MPACE_na,V00STEPST1_na,V00TIMET1_na,V00STEPST2_na,V00TIMET2_na,V00HRB4WLK_na,V00NUMSTOP_na,V00400MTR_na,V00400MTIM_na,V00HR400WK_na,V00PASE_na,V00WKHR7CV_na,V00HSPSS_na,V00HSMSS_na,V00COMORB_na,V00CESD_na,V00SMKPKYR_na,V00PSMKYR_na,V00DTACAR_na,V00DTAIU_na,V00DTANZN_na,V00DTARE_na,V00DTB1_na,V00DTB12_na,V00DTB6_na,V00DTBCAR_na,V00DTCAFFN_na,V00DTCALC_na,V00DTCARB_na,V00DTCHOL_na,V00DTCRYP_na,V00DTCYST_na,V00DTDAID_na,V00DTDFIB_na,V00DTFAT_na,V00DTFE_na,V00DTFOL_na,V00DTGEN_na,V00DTKCAL_na,V00DTLIN_na,V00DTLUT_na,V00DTLYC_na,V00DTMETH_na,V00DTMG_na,V00DTNA_na,V00DTNIAC_na,V00DTOLEC_na,V00DTPHOS_na,V00DTPOTA_na,V00DTPROA_na,V00DTPROT_na,V00DTRET_na,V00DTRIBO_na,V00DTSFAT_na,V00DTVITC_na,V00DTVITD_na,V00DTVITE_na,V00DTVITK_na,V00DTZINC_na,V00DTSF_na,V00PCTALCH_na,V00PCTCARB_na,V00PCTFAT_na,V00PCTPROT_na,V00PCTSWT_na,V00BAPCARB_na,V00BAPFAT_na,V00BAPPROT_na,V00FIBBEAN_na,V00FIBGRN_na,V00FIBVGFR_na,V00SRVDRY_na,V00SRVFAT_na,V00SRVFRT_na,V00SRVGRN_na,V00SRVMEAT_na,V00SRVVEG_na,V00SUPB1_na,V00SUPB12_na,V00SUPB2_na,V00SUPB6_na,V00SUPBCAR_na,V00SUPCA_na,V00SUPCU_na,V00SUPFE_na,V00SUPFOL_na,V00SUPMG_na,V00SUPNIAC_na,V00SUPSE_na,V00SUPVITA_na,V00SUPVITC_na,V00SUPVITD_na,V00SUPVITE_na,V00SUPZINC_na,V00NERRORS_na,V00NFDSDAY_na,V00NNOSERV_na,V00NWARNS_na,V00PCTCOL1_na,V00PCTCOL9_na,V00PCTLARG_na,V00PCTMEDS_na,V00PCTSMAL_na,V00PCTXLS_na,V00AGE,P01HEIGHT,V00ABCIRC,V00HT25MM,V00WT25KG,V00WTMAXKG,V00WTMINKG,P01KPACDCV,V00KOOSFSR,P01BPTOT,P01BPBEDCV,P01BPDAYCV,V00BPSYS,V00BPDIAS,V00RPAVG,V00CSPACE,V00CSTIME1,V00CSTIME2,V0020MPACE,V00STEPST1,V00TIMET1,V00STEPST2,V00TIMET2,V00HRB4WLK,V00NUMSTOP,V00400MTR,V00400MTIM,V00HR400WK,V00PASE,V00WKHR7CV,V00HSPSS,V00HSMSS,V00COMORB,V00CESD,V00SMKPKYR,V00PSMKYR,V00DTACAR,V00DTAIU,V00DTANZN,V00DTARE,V00DTB1,V00DTB12,V00DTB6,V00DTBCAR,V00DTCAFFN,V00DTCALC,V00DTCARB,V00DTCHOL,V00DTCRYP,V00DTCYST,V00DTDAID,V00DTDFIB,V00DTFAT,V00DTFE,V00DTFOL,V00DTGEN,V00DTKCAL,V00DTLIN,V00DTLUT,V00DTLYC,V00DTMETH,V00DTMG,V00DTNA,V00DTNIAC,V00DTOLEC,V00DTPHOS,V00DTPOTA,V00DTPROA,V00DTPROT,V00DTRET,V00DTRIBO,V00DTSFAT,V00DTVITC,V00DTVITD,V00DTVITE,V00DTVITK,V00DTZINC,V00DTSF,V00PCTALCH,V00PCTCARB,V00PCTFAT,V00PCTPROT,V00PCTSWT,V00BAPCARB,V00BAPFAT,V00BAPPROT,V00FIBBEAN,V00FIBGRN,V00FIBVGFR,V00SRVDRY,V00SRVFAT,V00SRVFRT,V00SRVGRN,V00SRVMEAT,V00SRVVEG,V00SUPB1,V00SUPB12,V00SUPB2,V00SUPB6,V00SUPBCAR,V00SUPCA,V00SUPCU,V00SUPFE,V00SUPFOL,V00SUPMG,V00SUPNIAC,V00SUPSE,V00SUPVITA,V00SUPVITC,V00SUPVITD,V00SUPVITE,V00SUPZINC,V00NERRORS,V00NFDSDAY,V00NNOSERV,V00NSKIP,V00NWARNS,V00PCTCOL1,V00PCTCOL9,V00PCTLARG,V00PCTMEDS,V00PCTSMAL,V00PCTXLS,target
#na#,#na#,#na#,#na#,F,Not Hispanic or Latino,White,3: College graduate,5: Never married,0: Zero,3: $25K to < $50K,0: No,4: Not working other reasons,#na#,0: No bumps either hand,0: No,0: No,2: Frequent pain at least one knee at IEI,1: Yes,1: Yes,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: No,2: Left knee only,3: Both,0: None,0: None,0: None,0: Never,0: Not at all,0: Not at all,0: None,0: Very good,0: No,0: No,0: Never,0: No,0: No,0: Never,0: No,0: No,1: 1,1: Bony enlargement,0: Normal,0: Normal,0: Normal,0: Normal,1: 1,1: Bony enlargement,0: Normal,0: Normal,0: Normal,0: Normal,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,1: Yes,0: Neither,3: Often (5-7 days),3: 2-4 hours,1: Seldom (1-2 days),1: Less than 1 hour,0: Never,0: Never,3: Often (5-7 days),0: Never,1: Yes,1: Yes,0: No,0: No,1: Yes,0: No,0: No,0: No,0: None,5: More than 6 flights,1: Yes,#na#,1: Yes,#na#,1: Yes,2: 2-3 days per week,1: Yes,1: One day per week or less,2: Very Good,3: Not limited at all,3: Not limited at all,5: None of the time,5: None of the time,3: Some of the time,3: Some of the time,2: A little bit,3: Some of the time,2: Most of the time,2: Most of the time,4: A little of the time,1: Private Doctor,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,1: Yes,0: No,1: Yes,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,1: Yes,2: Risedronate only,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),4: Most or all of the time (5-7 days),2: Some of the time (1-2 days),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),4: Most or all of the time (5-7 days),2: Some of the time (1-2 days),2: Some of the time (1-2 days),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),0: No,0: None,0: No,0: No,1: Yes,1: Current,0: No,0: Never,1: <1/week,1: Yes,0: No,0: No,0: No,0: No,0: No,"0: No, not regularly",0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1709,-0.5642,-0.4714,-0.4363,-1.0513,-1.1196,-1.329,-0.3074,0.7351,-0.2208,-0.076,-0.2174,-0.1622,-0.0852,-0.4802,0.3422,-0.5825,-0.5723,2.6781,0.237,-2.1923,0.3596,-1.2354,0.0787,-0.0582,0.0674,-0.3177,-0.5424,-0.4132,-1.0317,1.4533,-2.8257,2.2758,2.2275,0.009,-0.2455,0.1543,-0.1993,-1.0655,-0.2426,-0.9715,-0.8764,-1.0864,-0.1876,-0.4316,-0.8585,-1.1695,-0.3608,-0.8088,-1.0972,-0.1927,-0.6832,-1.1948,-0.4139,-0.6048,-0.1927,-1.3513,-1.329,-0.2006,-0.265,-1.1252,-1.028,-1.064,-1.0859,-1.1283,-1.1163,-1.344,-0.1758,-1.2169,-0.3196,-0.8917,-1.0715,-1.0838,-0.4014,-1.146,-0.5302,-1.0481,-0.8111,-0.5661,0.6828,-0.5715,0.3254,-0.0262,0.5749,-0.7581,0.1015,0.2059,-0.3747,-0.5885,-0.7012,-1.0061,-1.3592,-0.0923,-0.9524,-0.286,-0.6243,-1.0488,-0.6595,-0.9658,-0.3962,-1.0312,-1.2564,-0.892,-1.1867,-1.255,-0.7093,-0.5304,-1.1841,-0.6073,-1.3241,-0.6506,-0.9113,-0.1793,-1.3921,-0.228,-0.2281,2.2426,1.6063,-0.8974,2.0289,-1.0719,-0.5585,-0.5985,[ 3.342992 2.253831 -0.972666 0.17061 ]
0,0.0,2.0,1.0,M,Not Hispanic or Latino,White,3: College graduate,1: Married,4: Four,2: $10K to < $25K,1: Yes,1: Works for pay,1: Right handed,0: No bumps either hand,0: No,0: No,2: Frequent pain at least one knee at IEI,1: Yes,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Neither,3: Both,#na#,#na#,#na#,4: Constantly,4: Totally,1: Mildly,3: Severe,2: 2,0: No,0: No,0: Never,0: No,0: No,0: Never,0: No,0: No,1: 1,0: Normal,1: Bony enlargement,0: Normal,0: Normal,0: Normal,3: 3,0: Normal,1: Bony enlargement,1: Bony enlargement,0: Normal,1: Bony enlargement,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,0: Neither,3: Often (5-7 days),3: 2-4 hours,1: Seldom (1-2 days),3: 2-4 hours,0: Never,0: Never,0: Never,2: Sometimes (3-4 days),1: Yes,1: Yes,0: No,1: Yes,0: No,0: No,1: Yes,0: No,0: None,5: More than 6 flights,0: No,0: None,0: No,0: None,1: Yes,1: One day per week or less,0: No,0: None,3: Good,"2: Yes, limited a little","2: Yes, limited a little",4: A little of the time,3: Some of the time,5: None of the time,5: None of the time,2: A little bit,2: Most of the time,4: A little of the time,5: None of the time,5: None of the time,1: Private Doctor,0: No,0: No,0: Does not report RA/inflam arth,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Yes,1: One,0: No,0: No,0: No,0: Never,0: No,0: Never,1: <1/week,0: No,0: No,0: No,0: No,0: No,0: No,"0: No, not regularly",1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,-0.8051,0.6595,-0.9986,0.6227,-0.0045,-0.6012,0.239,-0.3074,0.3647,-0.2208,-0.076,-0.2174,1.9278,1.5835,1.4764,-0.8318,0.7174,0.7311,-1.5293,-0.0341,1.4016,0.3596,1.6514,1.1627,-0.0582,0.0674,2.8137,-0.0359,-0.723,0.7373,-2.0254,0.6857,-0.4204,-0.547,-0.5287,-0.2455,-0.7234,-0.9596,-0.335,-1.1276,-0.9715,-0.5181,-1.0864,-0.8212,0.2894,-1.3179,-0.9966,-0.6668,-0.9914,-0.916,-0.1927,-0.9449,-0.503,-0.8779,-1.2018,-0.1927,-0.8747,-0.8743,-0.6905,0.7921,-0.8544,-1.2317,-0.8364,-0.6257,-0.2874,-1.1658,-1.1622,-0.8605,-0.7659,-1.365,-1.3447,-0.4253,-1.3526,-1.0584,-1.0132,-0.8649,-0.605,-0.9159,-0.6087,-0.5794,0.825,0.1958,0.7481,-0.7032,0.5856,-0.0371,-0.0423,-0.6715,-0.7348,-1.1131,-0.631,-1.5904,-0.6925,-0.3773,-0.5105,-0.6243,-1.0488,-0.6595,-0.9658,-0.3962,-1.0312,-1.2564,-0.892,-1.1867,-1.255,-0.7093,-0.5304,-1.1841,-0.6073,-1.3241,-0.6506,-0.9113,-0.1793,-1.6383,-0.228,-0.2281,2.2426,-0.0932,-0.8974,1.3963,-1.413,-0.5539,1.5089,[22.18066 -2.56101 3.635217 0.921759]
0,0.0,3.0,1.0,M,Not Hispanic or Latino,White,3: College graduate,2: Widowed,0: Zero,3: $25K to < $50K,0: No,4: Not working other reasons,1: Right handed,1: Right hand,0: No,1: Yes,0: No knee pain either knee at IEI,0: No,1: Yes,0: No,1: Yes,0: No,0: No,0: No,1: Yes,0: No,0: No,0: Neither,3: Both,#na#,0: None,2: Moderate,1: Monthly,0: Not at all,0: Not at all,1: Mild,1: 1,0: No,0: No,0: Never,0: No,0: No,0: Never,0: No,0: No,3: 3,0: Normal,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,0: Normal,4: 4,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,0: Normal,1: Bony enlargement,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,0: Neither,3: Often (5-7 days),3: 2-4 hours,2: Sometimes (3-4 days),3: 2-4 hours,0: Never,0: Never,0: Never,0: Never,1: Yes,1: Yes,0: No,1: Yes,1: Yes,1: Yes,1: Yes,0: No,0: None,5: More than 6 flights,0: No,0: None,0: No,0: None,1: Yes,1: One day per week or less,0: No,0: None,2: Very Good,3: Not limited at all,"2: Yes, limited a little",5: None of the time,4: A little of the time,5: None of the time,5: None of the time,1: Not at all,2: Most of the time,2: Most of the time,4: A little of the time,5: None of the time,1: Private Doctor,1: Yes,0: No,0: Does not report RA/inflam arth,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),0: No,0: None,1: Yes,0: No,1: Yes,2: Former,1: Yes,2: Former,4: 8-14 drinks/week,0: No,0: No,0: No,0: No,0: No,0: No,"1: Yes, fairly regularly",1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1.5807,-0.7445,0.4471,-0.4363,-0.0045,-0.0528,-0.3501,-0.3074,0.3647,-0.2208,-0.076,-0.2174,-0.6847,-0.711,0.2807,-0.2793,-0.2802,0.4915,0.066,-0.0341,-0.2021,0.0862,-0.1892,0.6207,-0.0582,0.0674,0.4184,0.5339,0.3056,-0.7448,0.0112,0.3394,0.9277,-0.7102,0.8155,-0.2455,-0.5575,-0.5633,-0.2965,-0.4616,-0.6116,-0.3631,-0.4002,-0.6701,-0.4002,0.4437,-0.6467,-0.4522,-0.919,-0.6149,-0.1927,-0.7356,-0.2112,0.1429,-0.5525,-0.1927,-0.3804,-0.227,-0.5074,-0.659,-0.4524,-0.2441,-0.517,-0.1924,-0.1595,-0.0832,-0.4207,-0.6114,-0.4257,0.5125,0.3163,-0.1527,-0.7769,0.6423,-0.0171,-0.3771,-0.5584,-0.5533,0.9094,-0.7929,0.3461,-0.2203,-0.6638,-0.5052,0.6887,0.0958,0.2167,-0.8238,-0.6671,0.8432,-1.0998,-0.5501,-0.747,-0.569,-0.4656,-0.0948,0.3238,-0.0652,0.2285,-0.1103,-0.7552,0.9399,0.3339,0.3788,0.9362,-0.0213,-0.0623,0.7065,-0.4759,0.5238,-0.5211,0.2687,-0.1793,-0.3567,-0.228,-0.2281,-0.4243,-0.6037,0.5521,0.1957,-0.6166,0.5112,-0.3136,[ 6.827854 2.262962 -1.075746 0.697151]
0,0.0,1.0,0.0,F,Not Hispanic or Latino,White,3: College graduate,1: Married,1: One,5: $100K or greater,1: Yes,1: Works for pay,1: Right handed,2: Left hand,1: Yes,0: No,1: Infrequent knee pain only at IEI,1: Yes,1: Yes,1: Yes,1: Yes,0: No,0: No,0: No,1: Yes,0: No,0: No,0: Neither,3: Both,2: Moderate,1: Mild,2: Moderate,2: Weekly,1: Mildly,1: Mildly,1: Mild,0: Very good,0: No,0: No,0: Never,0: No,0: No,0: Never,0: No,0: No,5: 5,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,5: 5,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,1: Bony enlargement,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,1: Right,3: Often (5-7 days),4: More than 4 hours,3: Often (5-7 days),3: 2-4 hours,3: Often (5-7 days),3: Often (5-7 days),2: Sometimes (3-4 days),2: Sometimes (3-4 days),1: Yes,0: No,0: No,1: Yes,1: Yes,1: Yes,1: Yes,1: Yes,1: One day per week or less,4: 5 to 6 flights,0: No,0: None,0: No,0: None,1: Yes,1: One day per week or less,1: Yes,2: 2-3 days per week,1: Excellent,3: Not limited at all,3: Not limited at all,5: None of the time,5: None of the time,5: None of the time,5: None of the time,1: Not at all,2: Most of the time,2: Most of the time,5: None of the time,5: None of the time,1: Private Doctor,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,1: Yes,1: Yes,0: No,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,1: Yes,0: No,0: No,0: No,1: Yes,0: No,1: Yes,0: No,0: Not used,1: Yes,4: Nearly every day or every day,0: No,0: No,0: No,0: Not used in last 30 days,1: Used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,1: Used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),0: No,0: None,0: No,0: No,1: Yes,2: Former,0: No,0: Never,2: 1-3 drinks/week,0: No,0: No,0: No,0: No,0: No,0: No,"1: Yes, fairly regularly",1: Yes,1: Yes,1: Yes,0: No,0: No,0: No,1: Yes,1: Yes,0: No,1: Yes,0: No,0: No,0: No,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,-0.3713,-1.0886,-1.713,-0.9658,-0.882,-1.228,-0.8612,-0.3074,-1.9531,-0.2208,-0.076,-0.2174,-0.8153,0.332,-0.1541,-0.6937,0.3614,0.4806,1.8905,-1.1184,-1.3886,-0.7342,-1.3927,-0.9278,-0.0582,0.0674,-0.9752,-2.0618,2.3505,0.4026,0.7758,0.3837,-0.4204,-0.7102,0.2106,-0.2455,0.1103,-0.0896,-1.1424,-0.2828,-1.1515,-0.8729,-1.0864,-0.1759,-0.4088,-1.0701,-1.0942,-1.1079,-0.5179,-1.0386,-0.1927,-0.5131,-1.1335,-0.9707,-0.7835,-0.1927,-1.2149,-0.8568,-0.1368,-0.507,-0.9027,-0.9083,-1.1834,-0.8423,-1.0826,-1.1114,-0.9902,0.0122,-0.9914,-1.2592,-1.1937,-1.2229,-0.3937,-0.8631,-0.2827,0.1131,-1.1181,-0.4546,0.1291,0.3372,-0.7253,0.7294,-0.6638,0.4688,-0.7181,0.7688,-0.6195,-0.6636,-0.0907,-0.9072,-0.9123,0.1435,-0.4742,-0.7607,0.2527,3.4351,3.0691,3.4305,3.2142,0.6046,-0.1485,0.9399,0.3339,1.9442,0.9362,3.4186,4.9701,0.7065,1.1654,1.4478,0.4714,4.2019,-0.1793,-0.0246,0.3336,-0.2281,-0.4243,1.4362,-0.4142,-1.3489,1.3341,0.2654,-0.5985,[-4.460113 -0.094947 0.58811 -0.017589]
0,0.0,0.0,0.0,F,Hispanic or Latino,White,3: College graduate,1: Married,1: One,#na#,0: No,4: Not working other reasons,1: Right handed,0: No bumps either hand,0: No,0: No,0: No knee pain either knee at IEI,0: No,#na#,0: No,1: Yes,1: Yes,1: Yes,0: No,0: No,0: No,0: No,0: Neither,3: Both,0: None,#na#,0: None,0: Never,0: Not at all,0: Not at all,0: None,0: Very good,0: No,0: No,0: Never,0: No,0: No,0: Never,0: No,0: No,0: 0,0: Normal,0: Normal,0: Normal,0: Normal,0: Normal,1: 1,0: Normal,0: Normal,1: Bony enlargement,0: Normal,0: Normal,1: Right,1: Stands without using arms,1: Yes,1: Completes 5 stands without using arms,1: Completes 5 stands without using arms,1: Completed,1: Completed,0: No,0: No,0: Does not meet criterion,1: Yes,0: No,1: Completed test without stopping,1: Yes,0: Not excluded,0: No,0: Neither,3: Often (5-7 days),3: 2-4 hours,0: Never,#na#,0: Never,0: Never,0: Never,0: Never,1: Yes,1: Yes,0: No,1: Yes,1: Yes,1: Yes,0: No,0: No,0: None,5: More than 6 flights,1: Yes,2: 2-3 days per week,0: No,0: None,1: Yes,1: One day per week or less,1: Yes,2: 2-3 days per week,3: Good,3: Not limited at all,3: Not limited at all,5: None of the time,5: None of the time,5: None of the time,5: None of the time,1: Not at all,2: Most of the time,2: Most of the time,4: A little of the time,5: None of the time,1: Private Doctor,1: Yes,1: Yes,0: Does not report RA/inflam arth,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: Not used,0: No,0: Not used,0: No,0: No,0: No,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: Not used in last 30 days,0: No,0: No,0: No,0: No bisphosphonates taken,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,0: No,2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),2: Some of the time (1-2 days),3: Much of the time (3-4 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),2: Some of the time (1-2 days),4: Most or all of the time (5-7 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),3: Much of the time (3-4 days),1: Rarely/none of the time (<1 day),2: Some of the time (1-2 days),1: Rarely/none of the time (<1 day),1: Rarely/none of the time (<1 day),1: Yes,1: One,0: No,0: No,0: No,0: Never,0: No,0: Never,3: 4-7 drinks/week,0: No,0: No,0: No,0: No,0: No,0: No,"1: Yes, fairly regularly",0: No,1: Yes,1: Yes,0: No,0: No,1: Yes,0: No,0: No,0: No,0: No,0: No,0: No,0: No,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1.0385,-0.4112,-0.0121,-0.1716,-0.882,-1.1497,-0.9392,-0.3074,0.3647,-0.2208,-0.076,-0.2174,2.9727,0.9578,-0.8063,0.0659,-0.0081,-0.3472,-1.4246,1.3214,1.8188,1.18,1.0024,-1.3149,-0.0582,0.0674,1.0304,-2.2518,-0.2892,-1.0317,0.3666,0.0111,-0.4204,0.5954,-0.5287,-0.2455,2.1537,3.4218,0.78,3.2632,1.9081,0.3155,1.6583,3.9306,-0.4316,1.3396,1.1069,0.0083,3.5342,1.7677,-0.1927,2.6932,0.0338,1.739,3.1885,-0.1927,0.7598,0.77,5.6238,-0.4527,1.2824,2.9376,1.0034,0.9989,-0.3057,1.4405,2.7737,3.4735,1.5425,-0.0008,1.3734,-0.1527,4.49,-0.4955,1.7979,5.4883,1.4938,3.0308,-0.1405,0.5777,-0.9661,1.3655,-1.1648,0.6398,-1.0337,1.2974,0.1897,1.3379,3.6168,0.2254,-0.5372,1.5306,0.8353,0.5812,4.338,-0.6243,-1.0488,-0.6595,-0.9658,0.3187,-1.0312,-1.2564,-0.892,0.3788,-1.255,-0.7093,-0.1793,-1.1841,-0.0601,-1.3241,0.2125,-0.9113,-0.1793,2.0109,0.3336,-0.2281,-0.4243,0.5864,1.9984,1.023,-0.4909,-0.3242,-0.2748,[ 1.361042 -0.013139 0.426783 0.422741]


In [65]:
with open('data/labels_df_wo_pain_names_ncols_365.pickle', 'wb') as f:
    pickle.dump(tdb, f)

In [122]:
# tmp = labels_df.loc[cat_names[0]].apply(lambda x:x.cat.codes)

In [6]:
def preprocess(labels_df, cat_names, cont_names):
    # preprocess categorical variables

    c = Categorify(cat_names, cont_names)
    c(labels_df)

    labels_df[cat_names] = labels_df[cat_names].apply(lambda x: x.cat.codes)
    
    
    # preprocess continuous vaiables
    f = FillMissing(cat_names, cont_names)
    n = Normalize(cat_names, cont_names)
    f(labels_df)
    n(labels_df)
    
    for t, cname in zip(labels_df.dtypes, labels_df.columns):
        if t == bool:
            labels_df[cname] = labels_df[cname].astype('int8')
    
    return labels_df

In [7]:
labels_df = preprocess(labels_df, cat_names, cont_names)

## Let's make dataset and dataloader

In [8]:
labels_df = labels_df.reset_index()

In [21]:
train_set = TabularData(labels_df, 'train')
val_set = TabularData(labels_df, 'val')

In [22]:
trainset_loader = DataLoader(train_set, 
                             batch_size=32,
                             shuffle=True,
                             num_workers=1,
                             drop_last=True)

valset_loader = DataLoader(val_set, 
                             batch_size=32,
                             shuffle=False,
                             num_workers=1,
                             drop_last=True)

In [27]:
from fastai.tabular import data

In [47]:
def def_emb_sz(classes, n, sz_dict={}):
    "Pick an embedding size for `n` depending on `classes` if not given in `sz_dict`."
    n_cat = len(classes[n].unique())
    sz = sz_dict.get(n, int(data.emb_sz_rule(n_cat)))  # rule of thumb
    return n_cat,sz

In [53]:
emb_sz = [def_emb_sz(train_set.labels_df, n) for n in cat_names]

In [54]:
model = TabularModel(emb_sz, len(cont_names), 
                     out_sz=3, 
                     layers=[256,128], 
                     ps=None, 
                     emb_drop=0.,
                     y_range=torch.tensor([-0.1, 1.1], device=defaults.device), 
                     use_bn=True)

In [63]:
learn = Learner(tdb, 
               model, 
               metrics=[mean_absolute_error])

In [None]:
tdb.

In [64]:
learn.model.

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.


TypeError: forward() missing 1 required positional argument: 'x_cont'

In [None]:
def tabular_learner(data:DataBunch, layers:Collection[int], emb_szs:Dict[str,int]=None, metrics=None,
        ps:Collection[float]=None, emb_drop:float=0., y_range:OptRange=None, use_bn:bool=True, **learn_kwargs):
    "Get a `Learner` using `data`, with `metrics`, including a `TabularModel` created using the remaining params."
    emb_szs = data.get_emb_szs(ifnone(emb_szs, {}))
    model = TabularModel(emb_szs, len(data.cont_names), out_sz=data.c, layers=layers, ps=ps, emb_drop=emb_drop,
                         y_range=y_range, use_bn=use_bn)
    return Learner(data, model, metrics=metrics, **learn_kwargs)

In [13]:
xb, yb = tdb.one_batch()

In [59]:
data_dir = PosixPath('data/')
dep_vars = ['pp_0', 'pp_1', 'pp_2']

In [60]:
procs = [FillMissing, Categorify, Normalize]    

In [63]:
test_tdb[0][:5]

pain_meds         0
narcot            0
womac             0
koos            100
Inferred_KLG      1
Name: 9477175_LEFT_0.mat, dtype: object

In [89]:
ItemList??

In [85]:
train_df.head()

Unnamed: 0_level_0,pain_meds,narcot,womac,koos,Inferred_KLG,oa_status,pp_0,pp_1,pp_2,sex,...,V00KQOL2,V00KQOL3,V00KQOL4,V00KGLRS,P02KPNRCV,P02KPNLCV,P01KPR30CV,P01KPL30CV,P01KPACTCV,P01HPR12CV
dess_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9794339_LEFT_0.mat,0,0,0.0,97.2,1,0,0.999981,1.873607e-05,7.424682e-07,F,...,0: Not at all,1: Mildly,1: Mild,0: Very good,0: No,1: Yes,0: No,0: No,0: No Limits or avoidance,0: No
9282182_RIGHT_0.mat,0,0,0.0,100.0,0,0,0.999759,0.0002402764,3.842015e-07,F,...,0: Not at all,0: Not at all,1: Mild,0: Very good,0: No,0: No,0: No,1: Yes,0: No Limits or avoidance,0: No
9359202_LEFT_0.mat,0,0,0.0,100.0,3,1,0.999816,0.0001823857,1.635602e-06,M,...,0: Not at all,0: Not at all,0: None,0: Very good,0: No,0: No,0: No,0: No,0: No Limits or avoidance,0: No
9371094_LEFT_0.mat,0,0,0.0,100.0,0,0,0.999998,5.268256e-07,1.587867e-06,M,...,0: Not at all,0: Not at all,0: None,0: Very good,0: No,0: No,0: No,0: No,0: No Limits or avoidance,0: No
9362978_LEFT_0.mat,1,0,0.0,97.2,2,1,0.87574,0.1242253,3.452085e-05,F,...,1: Mildly,0: Not at all,1: Mild,2: 2,0: No,0: No,0: No,0: No,1: Avoids,0: No


In [88]:
tmp = ((TabularList.from_df(train_df,
                           path=data_dir,
                           cat_names=cat_names,
                           cont_names=cont_names,
                           procs=procs))
        .split_by_idx(list(range(2665, len(train_df))))
        ._label_from_list(labels = train_df.loc[:, ['pp_0', 'pp_1']].values)
        .add_test(test_tdb)
        .databunch()
      )

KeyError: 'pp_0'

In [83]:
tmp2 = tmp._label_from_list(labels = train_df.loc[:, ['pp_0', 'pp_1']])

Exception: Your data isn't split, if you don't want a validation set, please use `split_none`.

In [27]:
small_data = ((TabularList.from_df(train_df,
                           path=data_dir,
                           cat_names=cat_names,
                           cont_names=cont_names,
                           procs=procs))
        .split_by_idx(list(range(2665, len(train_df))))
        .label_from_df(cols=dep_vars)
        .add_test(test_tdb)
        .databunch())

In [53]:
xb, yb = small_data.one_batch()