In [10]:
run FunctionsLoader.ipynb

0.2.0_4
0.2.0_4


In [11]:
class Predictions():
    
    
    def __init__(self, min_days=30):
        self.min_days = min_days
            
        
    # time_window should be in mins (e.g; 30mins)
    # sd_ratio should be between 0 and 1 (e.g; 0.25 for using 1/4 of sd)

    def loadData(self, time_window, sd_ratio, t_hist):
        
        ### read mobility data ###
        ld = LocationData()
        ld.readLocationData(precision_value=4) # read file
        ld.filterUser(self.min_days) #filter users with min 30 days
        #print len(ld.users)
        ld.convertToEqualTimeSeries(time_window * 60 * 1000) # convert to time series of 30mins
        dt = ld.createUserDailyTrajectoryLists() # create daily trajectories of users 

        ### read mood data ###
        pd = PhqData()
        pd.readPhqData()
        pd.computeScores(ld.users, self.min_days)
        pd.computeDivergence()
        pd.convertTo2Labels(sd_ratio=sd_ratio) 
        #len(list(set(m[0] for m in pd.phq_scores)))

        ### merge data ###
        mlm = PhqLocMerge()
        mlm.mergeData(dt, pd.phq_scores, t_hist)
        
        ### set data as a class variable ###
        self.data = mlm.data

    
    
    def prepareInput(self):
    
        ###  transform mobility data ### 
        users = list(set([m[0] for m in self.data]))
        dt2 = approachAvg2(lm_data=self.data, users=users) # Computing displacement changes
        dt3 = approachAvg3(lm_data=self.data, users=users) # Computing displacement [normalised]
        dt4 = approachAvg4(lm_data=self.data, users=users, topn=20) # Computing displacement [normalised]

        ###  combinations of data for using different combinations of features ### 
        self.data1 = [dt2] 
        self.data2 = [dt3] 
        self.data3 = [dt4] 
        self.data4 = [dt2,dt3] 
        self.data5 = [dt2,dt4] 
        self.data6 = [dt3,dt4] 
        self.data7 = [dt2,dt3,dt4]
    
    
    def computePredictions(self, is_generic=False, predict_raw_score=False, layers=5, act_func='', dropout_rate=0.20):

        ### filter users ### 
        filtered_users = []
        users = list(set([m[0] for m in self.data]))
        for u in users:
            ud = [d for d in self.data1[0] if d[0]==u]
            if len(ud) > self.min_days:
                filtered_users.append(u)
        print 'Number of users', len(filtered_users)
        
        ### computing prdiction accuracy ### 
        if is_generic:
            model1 = ClassifierMultiDimenHelper.compute_svm_accuracy(self.data1, iteration=20, users=filtered_users, min_days=self.min_days) 
            model2 = ClassifierMultiDimenHelper.compute_svm_accuracy(self.data2, iteration=20, users=filtered_users, min_days=self.min_days) 
            model3 = ClassifierMultiDimenHelper.compute_svm_accuracy(self.data3, iteration=20, users=filtered_users, min_days=self.min_days) 
            model4 = ClassifierMultiDimenHelper.compute_svm_accuracy(self.data4, iteration=20, users=filtered_users, min_days=self.min_days) 
            model5 = ClassifierMultiDimenHelper.compute_svm_accuracy(self.data5, iteration=20, users=filtered_users, min_days=self.min_days) 
            model6 = ClassifierMultiDimenHelper.compute_svm_accuracy(self.data6, iteration=20, users=filtered_users, min_days=self.min_days) 
            model7 = ClassifierMultiDimenHelper.compute_svm_accuracy(self.data7, iteration=20, users=filtered_users, min_days=self.min_days) 

        else:
            model1 = ClassifierPersonalisedHelper.compute_svm_accuracy(self.data1, iteration=20, users=filtered_users, min_days=self.min_days, predict_raw_score=predict_raw_score, layers=layers, act_func=act_func, dropout_rate=dropout_rate) 
            model2 = ClassifierPersonalisedHelper.compute_svm_accuracy(self.data2, iteration=20, users=filtered_users, min_days=self.min_days, predict_raw_score=predict_raw_score, layers=layers, act_func=act_func, dropout_rate=dropout_rate) 
            model3 = ClassifierPersonalisedHelper.compute_svm_accuracy(self.data3, iteration=20, users=filtered_users, min_days=self.min_days, predict_raw_score=predict_raw_score, layers=layers, act_func=act_func, dropout_rate=dropout_rate) 
            model4 = ClassifierPersonalisedHelper.compute_svm_accuracy(self.data4, iteration=20, users=filtered_users, min_days=self.min_days, predict_raw_score=predict_raw_score, layers=layers, act_func=act_func, dropout_rate=dropout_rate) 
            model5 = ClassifierPersonalisedHelper.compute_svm_accuracy(self.data5, iteration=20, users=filtered_users, min_days=self.min_days, predict_raw_score=predict_raw_score, layers=layers, act_func=act_func, dropout_rate=dropout_rate) 
            model6 = ClassifierPersonalisedHelper.compute_svm_accuracy(self.data6, iteration=20, users=filtered_users, min_days=self.min_days, predict_raw_score=predict_raw_score, layers=layers, act_func=act_func, dropout_rate=dropout_rate) 
            model7 = ClassifierPersonalisedHelper.compute_svm_accuracy(self.data7, iteration=20, users=filtered_users, min_days=self.min_days, predict_raw_score=predict_raw_score, layers=layers, act_func=act_func, dropout_rate=dropout_rate) 

        self.models = [model1, model2, model3, model4, model5, model6, model7]

        
    def plotPredictions(self, include_mt=False, t_hist=14, file_path=''):
        model_names = ['IR 1','IR 2','IR 3','IR 1+2','IR 1+3','IR 2+3','IR 1+2+3']
        label_dict = {1:'Sensitivity', 0:'Specificity'}
        Plots.multiModel(models=p.models, model_names=model_names, label_dict=label_dict, include_mt=include_mt, t_hist=t_hist, file_path=file_path)
        
        


In [12]:
# all_models = [] # store models of all iterations (for debugging)
# for layers in [5,7,9]:
#     for time_window in [10,30,60]:
#         for t_hist in [1,7,14]:
#             print 'Iteration values:', time_window, t_hist
#             p = Predictions(30)
#             p.loadData(time_window=time_window, sd_ratio=0.25, t_hist=t_hist)
#             p.prepareInput()
#             p.computePredictions(is_generic=False, predict_raw_score=False, layers=layers,act_func='tanh')
#             all_models = p.models
#             file_path = 'plots/classification/tanh/prediction_time_window_' + str(time_window) + '_t_hist_' + str(t_hist)  + '_ae_' + str(layers)  + '.pdf'
#             p.plotPredictions(include_mt=True, t_hist=t_hist, file_path=file_path)

In [None]:
time_window = 10
t_hist = 14
layers=5
#for dropout_rate in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]:
for dropout_rate in [0.0, 0.7,0.8, 0.9,1]:
    print 'dropout_rate', dropout_rate
    p = Predictions(30)
    p.loadData(time_window=time_window, sd_ratio=0.25, t_hist=t_hist)
    p.prepareInput()
    p.computePredictions(is_generic=False, predict_raw_score=False, layers=layers,act_func='elu', dropout_rate=dropout_rate)
    file_path = 'plots/svm/classification/dropouts/prediction_dropout_rate_' + str(int(dropout_rate*100))  + '.pdf'
    p.plotPredictions(include_mt=True, t_hist=t_hist, file_path=file_path)

dropout_rate 0.0
Initial user count 6705
User count 5616
Min time ratio 0.500023134225
Number of users 44
Pos count: 900
Neg count: 1489
Number of users 24
[0, 1]
dropout_rate 0.7
Initial user count 6705
User count 5616
Min time ratio 0.500023134225
Number of users 44
Pos count: 900
Neg count: 1489
Number of users 24
