In [3]:
import pickle
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesRegressor
import operator

In [10]:
def adjust_to_day(steps):
    steps = steps.between_time('9:00','21:00')
    return steps.fillna(0).tz_localize(None).resample('30T').sum().fillna(0)

In [2]:
def adjust_to_thirty_minute(steps):
    return steps.fillna(0).tz_localize(None).resample('30T').sum().fillna(0)

In [14]:
def get_processed(file_name):
    
    root = '../../../../Volumes/dav/HeartSteps/pooling_rl_shared_data/processed/'
    with open('{}{}.pkl'.format(root,file_name),'rb') as f:
        return pickle.load(f)

In [33]:
def make_feature_matrix():
    other = get_processed('other_activity_no_missing')
    agg_consc = get_processed('agg_conscientiousness_measures')
    agg_selfeff = get_processed('agg_self_efficacy_measures')
    agg_actchoice = get_processed('agg_activity_choice_measures')
    other_keys = list(other[1].keys())
    #other_keys = ['vigact.days','vigact.hrs','vigact.min','modact.days']
    pids = [p for p in other.keys() if p!=4]
    
    to_return = []
    feature_names = other_keys+['conscientiousness']+['self_efficacy']+['activity_choice']
    
    for p in pids:
        temp = [other[p][o] for o in other_keys]
        temp.append(agg_consc[p])
        temp.append(agg_selfeff[p])
        temp.append(agg_actchoice[p])
        to_return.append(temp)
    return to_return,feature_names,pids

In [5]:
merged = get_processed('merged_est')

In [6]:
test  = merged[1]['steps']

In [34]:
def fix(x):
    to_return = []
    for i in x:
        if type(i)==str:
            if i=='no':
                to_return.append(0)
            elif i=='yes':
                to_return.append(1)
            elif i=='X':
                to_return.append(0)
            else:
                #print(i)
                if pd.isnull(float(i)):
                    print(i)
                to_return.append(float(i))
        else:
            if pd.isnull(float(i)):
                    i = 0
            to_return.append(i)
    return to_return

In [35]:
X,fn,pids = make_feature_matrix()

In [36]:
X = [fix(x) for x in X]

In [37]:
X_scaled = preprocessing.scale(X)

In [38]:
def get_training_data_step_counts(pids):
    merged = get_processed('merged_est')
    to_return = {}
    for p in pids:
        if p in merged:
            df  = merged[p]
            steps = adjust_to_day(df['steps'])
            to_return[p]=steps
    return to_return

In [39]:
steps = get_training_data_step_counts(pids)

In [40]:
pid_lookup = {pids[i]:i for i in range(len(pids))}

In [41]:
def make_matrix_for_first_analysis(steps,pid_lookup,X):
    
    big_matrix_X = []
    big_matrix_y = []
    for pid,stepcount in steps.items():
        #for s in stepcount:
        big_matrix_X.append(X[pid_lookup[pid]])
        big_matrix_y.append(stepcount.mean())
    return big_matrix_X,big_matrix_y
        

In [42]:
bx,by = make_matrix_for_first_analysis(steps,pid_lookup,X_scaled)

In [43]:
by_scaled = preprocessing.scale(by)

In [44]:
#selector.support_

In [49]:
et = ExtraTreesRegressor(n_estimators=10, max_features=9,
                                       random_state=0)

In [50]:
len(fn)

9

In [51]:
et.fit(bx, by)

ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
          max_features=9, max_leaf_nodes=None, min_impurity_decrease=0.0,
          min_impurity_split=None, min_samples_leaf=1, min_samples_split=2,
          min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
          oob_score=False, random_state=0, verbose=0, warm_start=False)

In [52]:
def nice_dict(feats,fns):
    
    to_return = {fns[i]:feats[i] for i in range(len(feats))}
    return sorted(to_return.items(), key=operator.itemgetter(1),reverse=True)

In [53]:
nice_dict(et.feature_importances_,fn)

[('modact.days', 0.30184954240302453),
 ('conscientiousness', 0.16214303207770994),
 ('vigact.min', 0.15988951810608681),
 ('vigact.days', 0.10980673670251151),
 ('activity_choice', 0.10237091330875173),
 ('self_efficacy', 0.07667607541177926),
 ('fittracker', 0.039956318641505083),
 ('fitapp', 0.030317289372331302),
 ('vigact.hrs', 0.016990573976299751)]