## Importing the associated modules

In [45]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib as plot
import sklearn as sk
from sklearn import metrics, linear_model, ensemble, neural_network, svm, dummy
from helpers import *

# Importing the different features
from ema_features import get_EMA_features_and_target_for_patient
from module_features import get_module_features_for_patient
from context_features import get_weekend_days

# Importing the machine learning module
from predicting import train_algorithms, test_algorithms, eval_algorithms, plot_algorithms, make_algorithms
from feature_selection import backward_selection, forward_selection, correlate_features, precalculated_feature_selection

# Importing multiple patient wrapper
from individual_wrappers import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Initializing constants

In [2]:
SLIDING_WINDOW = 7
CV_ALPHAS = (0.1, 0.3, 0.5, 0.7, 0.9)
MAX_PATIENTS = 10
FEATURE_PATH = "data/features/"

## Defining our Machine Learning Algorithms

In [3]:
ml_algorithms = [
    {
        "name": "Lasso",
        "model": linear_model.LassoCV(alphas=CV_ALPHAS)
    },
    {
        "name": "Ridge",
        "model": linear_model.RidgeCV(alphas=CV_ALPHAS)
    },
    {
        "name": "Random Forest",
        "model": ensemble.RandomForestRegressor(n_estimators=1000, max_depth=2)
    },
    {
        "name": "Dummy Mean Regressor",
        "model": dummy.DummyRegressor()
    },
    {
        "name": "SVR RBF",
        "model": svm.SVR()
    }
]

## Initializing patients

In [4]:
proper_patients = get_proper_patients(MAX_PATIENTS)

## Init-Step: Getting Features of the patients

In [8]:
sample_patient = proper_patients[1:6]

# sample_scores = get_patients_scores_and_features(sample_patient, ml_algorithms, 20)

In [20]:
# sample_patient
sample_scores = load_patients_object(sample_patient, '_top5_featureselection_')

In [29]:
# sample_scores[0]['top_features']

['avg_average_ema_q_3_7_days',
 'std_prior_engagement_7_days',
 'max_mod_total_pages_7_days',
 'avg_average_ema_q_2_7_days',
 'avg_mod_total_time_7_days',
 'std_mod_total_time_7_days',
 'max_mod_total_time_7_days',
 'min_count_ema_q_4_7_days',
 'avg_mod_total_pages_7_days',
 'min_average_ema_q_2_7_days',
 'max_average_ema_q_3_7_days',
 'avg_mod_nr_sessions_7_days',
 'std_average_ema_q_5_7_days',
 'weekendDay',
 'min_average_ema_q_3_7_days',
 'max_count_ema_q_2_7_days',
 'std_count_ema_q_1_7_days',
 'avg_average_ema_q_5_7_days',
 'max_prior_engagement_7_days',
 'std_count_ema_q_6_7_days']

In [111]:
end_results = learn_patients_setups(sample_scores, ml_algorithms, max_features=20)



In [115]:
get_patients_mean_MAE_score(end_results[0])

2.3157797341587703

In [116]:
get_patients_mean_MAE_score(end_results[1])

2.4262548123130991

In [117]:
get_patients_mean_MAE_score(end_results[2])

2.347204656122031

In [85]:
load_patient_object('800005', '_top5_featureselection_')

array({'patient_id': '800005', 'pearson_correlated_features': (['max_average_ema_q_2_7_days', 'std_average_ema_q_2_7_days', 'avg_average_ema_q_3_7_days', 'avg_average_ema_q_4_7_days', 'avg_average_ema_q_2_7_days', 'avg_count_ema_q_3_7_days', 'avg_count_ema_q_4_7_days', 'avg_count_ema_q_5_7_days', 'avg_count_ema_q_7_7_days', 'avg_count_ema_q_6_7_days', 'avg_prior_engagement_7_days', 'avg_average_ema_q_7_7_days', 'avg_average_ema_q_5_7_days', 'avg_average_ema_q_6_7_days', 'avg_count_ema_q_1_7_days', 'avg_average_ema_q_1_7_days', 'max_mod_nr_sessions_7_days', 'max_mod_total_pages_7_days', 'std_mod_nr_sessions_7_days', 'std_mod_total_pages_7_days'], [('avg_average_ema_q_3_7_days', 0.43236881354859064), ('avg_average_ema_q_4_7_days', 0.41824607805789676), ('avg_count_ema_q_3_7_days', 0.41558938215021424), ('avg_count_ema_q_4_7_days', 0.40237054667133393), ('avg_count_ema_q_5_7_days', 0.4014495026435872), ('avg_count_ema_q_7_7_days', 0.39923853779194346), ('avg_count_ema_q_6_7_days', 0.39767

In [13]:
load_patient_object('102066', '_top5_featureselection_')

array({'patient_id': '102066', 'pearson_correlated_features': (['avg_average_ema_q_3_7_days', 'avg_count_ema_q_4_7_days', 'avg_prior_engagement_7_days', 'avg_count_ema_q_3_7_days', 'avg_count_ema_q_7_7_days', 'avg_count_ema_q_5_7_days', 'avg_count_ema_q_6_7_days', 'avg_average_ema_q_4_7_days', 'avg_count_ema_q_1_7_days', 'avg_average_ema_q_7_7_days', 'avg_average_ema_q_5_7_days', 'avg_average_ema_q_1_7_days', 'std_count_ema_q_1_7_days', 'avg_average_ema_q_6_7_days', 'max_average_ema_q_3_7_days', 'std_average_ema_q_2_7_days', 'std_average_ema_q_1_7_days', 'max_average_ema_q_2_7_days', 'max_count_ema_q_4_7_days', 'max_count_ema_q_3_7_days'], [('avg_average_ema_q_3_7_days', 0.42338790457125247), ('avg_count_ema_q_4_7_days', 0.42120187016615135), ('avg_prior_engagement_7_days', 0.42103477289876234), ('avg_count_ema_q_3_7_days', 0.41936879136045457), ('avg_count_ema_q_7_7_days', 0.41580217951290915), ('avg_count_ema_q_5_7_days', 0.39789289802679201), ('avg_count_ema_q_6_7_days', 0.394801678