## Importing the associated modules

In [237]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib as plot
import sklearn as sk
from sklearn import metrics, linear_model, ensemble, neural_network, svm, dummy
from helpers import *

# Importing the different features
from ema_features import get_EMA_features_and_target_for_patient
from module_features import get_module_features_for_patient
from context_features import get_weekend_days

# Importing the machine learning module
from predicting import train_algorithms, test_algorithms, eval_algorithms, plot_algorithms, make_algorithms
from feature_selection import backward_selection, forward_selection, correlate_features, precalculated_feature_selection

# Importing multiple patient wrapper
from individual_wrappers import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Initializing constants

In [225]:
SLIDING_WINDOW = 7
RIDGE_ALPHAS = (0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
LASSO_ALPHAS = (0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5)
MAX_PATIENTS = 10
FEATURE_PATH = "data/features/"

## Defining our Machine Learning Algorithms

In [226]:
ml_algorithms = [
    {
        "name": "Lasso",
        "model": linear_model.LassoCV(alphas=CV_ALPHAS)
    },
    {
        "name": "Ridge",
        "model": linear_model.RidgeCV(alphas=RIDGE_ALPHAS)
    },
    {
        "name": "Random Forest",
        "model": ensemble.RandomForestRegressor(n_estimators=1000, max_depth=2)
    },
    {
        "name": "Dummy Mean Regressor",
        "model": dummy.DummyRegressor()
    },
    {
        "name": "SVR RBF",
        "model": svm.SVR()
    }
]

## Initializing patients

In [233]:
proper_patients = get_proper_patients(MAX_PATIENTS)

## Init-Step: Getting Features of the patients

In [228]:
sample_patient = proper_patients[1:6]

# sample_scores = get_patients_scores_and_features(sample_patient, ml_algorithms, 20)

In [229]:
# sample_patient
sample_scores = load_patients_object(sample_patient, '_top5_featureselection_')

In [173]:
# sample_scores[0]['top_features']

In [230]:
end_results = learn_patients_setups(sample_scores, ml_algorithms, max_features=20)



### Convert the performance results to one DataFrame

In [248]:
topf_avg_performance = calc_avg_performance_from_models(ml_algorithms, end_results[0])
fcorrelation_avg_performance = calc_avg_performance_from_models(ml_algorithms, end_results[1])
allf_avg_performance = calc_avg_performance_from_models(ml_algorithms, end_results[2])
topf_df = pd.DataFrame(topf_avg_performance)
topf_df['feature_setup'] = pd.Series(['top_features' for i in range(1, len(topf_avg_performance) + 1)])

allf_df = pd.DataFrame(allf_avg_performance)
allf_df['feature_setup'] = pd.Series(['all_features' for i in range(1, len(allf_avg_performance) + 1)])
allf_df

corrf_df = pd.DataFrame(fcorrelation_avg_performance)
corrf_df['feature_setup'] = pd.Series(['corr_features' for i in range(1, len(fcorrelation_avg_performance) + 1)])

grouped_performances = topf_df.append(allf_df).append(corrf_df)
groupby_statement = grouped_performances.groupby('feature_setup').mean()
groupby_statement

Unnamed: 0_level_0,average_mae
feature_setup,Unnamed: 1_level_1
all_features,2.5342
corr_features,2.477782
top_features,2.491556


In [117]:
get_patients_mean_MAE_score(end_results[2])

2.347204656122031

In [235]:
len(proper_patients)

162

In [238]:
all_patients = get_all_patients()

In [246]:
len(all_patients[all_patients['xEmaNRatings'] > 7])
# len(all_patients)

311

In [270]:
from functools import reduce
x,y = get_top_features(sample_scores)

total_score = reduce(lambda x,y:x+y, y)

y_2 = np.array(y) / total_score
print(pd.DataFrame(x,y_2).to_latex())

\begin{tabular}{ll}
\toprule
{} &                            0 \\
\midrule
0.061905 &   min\_average\_ema\_q\_2\_7\_days \\
0.056190 &  avg\_prior\_engagement\_7\_days \\
0.052381 &  max\_prior\_engagement\_7\_days \\
0.051429 &  std\_prior\_engagement\_7\_days \\
0.043810 &     min\_count\_ema\_q\_4\_7\_days \\
0.037143 &   max\_average\_ema\_q\_2\_7\_days \\
0.032381 &     std\_count\_ema\_q\_1\_7\_days \\
0.030476 &   max\_mod\_total\_pages\_7\_days \\
0.030476 &     min\_count\_ema\_q\_7\_7\_days \\
0.029524 &    std\_mod\_total\_time\_7\_days \\
0.028571 &                   weekendDay \\
0.027619 &   max\_average\_ema\_q\_1\_7\_days \\
0.024762 &     min\_count\_ema\_q\_6\_7\_days \\
0.024762 &   std\_average\_ema\_q\_1\_7\_days \\
0.023810 &   avg\_average\_ema\_q\_3\_7\_days \\
0.023810 &   avg\_average\_ema\_q\_1\_7\_days \\
0.022857 &   avg\_mod\_total\_pages\_7\_days \\
0.022857 &     min\_count\_ema\_q\_3\_7\_days \\
0.020952 &   min\_average\_ema\_q\_4\_7\_days \\
0.020000 