**Load data**

<a href="https://colab.research.google.com/github/Blistt/bp-recommender/blob/Romasa/BP_Recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
path = 'data/'
baseline = pd.read_csv(path + 'baseline.csv')
augmented_k = pd.read_csv(path + 'augmented_k.csv')
augmented_inter = pd.read_csv(path + 'augmented_inter.csv')
augmented_intra = pd.read_csv(path + 'augmented_intra.csv')

**Experiments with non-personalized model**

In [2]:
from trainers.train_nonpersonalized import train_nonpersonazlied

############################################# MODEL PARAMETERS #############################################
N = 5                               # Number of most important features to display
model = 'xgb'                       # rf or xgb (Random Forest or XGBoost)
ntrees = 100                         # Number of trees in the forest
second_run = False                  # Whether to use a second run with top N features or not
bootstrap = True                    # Whether to use bootstrap samples
bootstrap_size = 0.8                # Portion of the dataset to sample for bootstrap
historical = True                  # Whether to use historical BP or not

############################################# DATASAET PARAMETERS #############################################
key = ['healthCode', 'date']        # Columns to use as key
target = ['systolic', 'diastolic']  # Columns to predict
log_path = 'exp_log.csv'            # Path of file to log experiment results


############################################# EXPERIMENTS #############################################
# Predicting systolic BP using baseline with non NaN values
print('BASELINE - NO AUGMENTATION')
dataset = baseline
aug = 'None'
train_nonpersonazlied(dataset, model, ntrees, N, key, target, log_path=log_path, second_run=second_run, 
           bootstrap=bootstrap, bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')

# Predicting systolic BP using k-roll augmentation with non NaN values
print('K-ROLL AUGMENTATION')
dataset = augmented_k
aug = 'K-roll'
train_nonpersonazlied(dataset, model, ntrees, N, key, target, log_path=log_path, second_run=second_run,
           bootstrap=bootstrap, bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')


# Predicting systolic BP using knn intra augmentation with non NaN values
print('KNN INTRA AUGMENTATION')
dataset = augmented_intra
aug = 'KNN-intra'
train_nonpersonazlied(dataset, model, ntrees, N, key, target, log_path=log_path, second_run=second_run,
           bootstrap=bootstrap, bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')


# Predicting systolic BP using knn inter augmentation with non NaN values
print('KNN INTER AUGMENTATION')
dataset = augmented_inter
aug = 'KNN-inter'
train_nonpersonazlied(dataset, model, ntrees, N, key, target, log_path=log_path, second_run=second_run,
           bootstrap=bootstrap, bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')


BASELINE - NO AUGMENTATION
dataset size: 224, model: xgb, ntrees: 100, sys_mae: 10.25,
           dias_mae: 8.192, top_n: awake_count; wo_calories; sleep_minutes; steps; active_calories, second run: False, bootstrap: True, 
           historical: True
--------------------------------------------------------------------------------
K-ROLL AUGMENTATION
dataset size: 275, model: xgb, ntrees: 100, sys_mae: 10.273,
           dias_mae: 8.029, top_n: awake_count; wo_calories; steps; bed_time; active_calories, second run: False, bootstrap: True, 
           historical: True
--------------------------------------------------------------------------------
KNN INTRA AUGMENTATION
dataset size: 224, model: xgb, ntrees: 100, sys_mae: 10.625,
           dias_mae: 7.183, top_n: awake_count; wo_calories; bed_time; steps; active_calories, second run: False, bootstrap: True, 
           historical: True
--------------------------------------------------------------------------------
KNN INTER AUGMENTATI

**Experiments with personalized model**

In [3]:
from trainers.train_personalized import train_personalized

############################################# PARAMETERS #############################################
N = 5                               # Number of most important features to display
ntrees = 100                         # Number of trees in the forest
bootstrap = True                    # Whether to use bootstrap samples
bootstrap_size = 0.8                # Portion of the dataset to sample for bootstrap
key = ['healthCode', 'date']        # Columns to use as key
target = ['systolic', 'diastolic']  # Columns to predict
log_path = 'exp_log.csv'            # Path of file to log experiment results
historical = True                  # Whether to use historical BP or not


############################################# EXPERIMENTS #############################################
# Predicting BP using baseline with non NaN values
print('BASELINE - NO AUGMENTATION')
aug = 'None'
dataset = baseline
train_personalized(dataset, 'xgb', ntrees, N, key, target, log_path=log_path, bootstrap=bootstrap, 
                        bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')

# Predicting BP using k-roll augmentation with non NaN values
print('K-ROLL AUGMENTATION')
dataset = augmented_k
aug = 'K-roll'
train_personalized(dataset, 'xgb', ntrees, N, key, target, log_path=log_path, bootstrap=bootstrap, 
                        bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')


# Predicting BP using knn intra augmentation with non NaN values
print('KNN INTRA AUGMENTATION')
dataset = augmented_intra
aug = 'KNN-intra'
train_personalized(dataset, 'xgb', ntrees, N, key, target, log_path=log_path, bootstrap=bootstrap, 
                        bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')


# Predicting BP using knn inter augmentation with non NaN values
print('KNN INTER AUGMENTATION')
dataset = augmented_inter
aug = 'KNN-inter'
train_personalized(dataset, 'xgb', ntrees, N, key, target, log_path=log_path, bootstrap=bootstrap, 
                        bootstrap_size=bootstrap_size, aug=aug, historical=historical)
print('--------------------------------------------------------------------------------')


BASELINE - NO AUGMENTATION
dataset size: 193, model: xgb, ntrees: 100, sys_mae: 15.0,
           dias_mae: 1.0, top_n: N/A, second run: False, bootstrap: True, 
           historical: True
--------------------------------------------------------------------------------
K-ROLL AUGMENTATION
dataset size: 245, model: xgb, ntrees: 100, sys_mae: 9.0,
           dias_mae: 2.0, top_n: N/A, second run: False, bootstrap: True, 
           historical: True
--------------------------------------------------------------------------------
KNN INTRA AUGMENTATION
dataset size: 193, model: xgb, ntrees: 100, sys_mae: 12.19,
           dias_mae: 7.357, top_n: N/A, second run: False, bootstrap: True, 
           historical: True
--------------------------------------------------------------------------------
KNN INTER AUGMENTATION
dataset size: 193, model: xgb, ntrees: 100, sys_mae: 9.713,
           dias_mae: 7.831, top_n: N/A, second run: False, bootstrap: True, 
           historical: True
-----------

In [5]:
from recommenders.per_recommender import get_per_recommendations
# Get recommendations for one of the users in the testing set
entry = 26                          # Choose from 0 to 58
n = 5                               # Number of most important features to display
var_adjust = False                  # Whether to adjust recs according to total variance explained or not
verbose = True                      # Whether to print the recommendations and list of training entries for the user
key = ['healthCode', 'date']        # Columns to use as key
target = ['systolic', 'diastolic']  # Columns to predict

recs = get_per_recommendations(entry, key, target, n=n, var_adjust=var_adjust, verbose=verbose)

Predicted value: [139.04291]
Predicted value: [89.75199]
Weighted correction: [6.734179]

 Recommendations:
Activity: active_calories  -   Value: 0.0   -  imp_score: 0.09586142748594284-  Rec: 0.0
Activity: awake_count  -   Value: 2.0   -  imp_score: 0.09347192943096161-  Rec: 0.18694385886192322
Activity: sleep_minutes  -   Value: 367.0   -  imp_score: 0.09103571623563766-  Rec: 33.41010785847902
Activity: bed_time  -   Value: 36.03333333333333   -  imp_score: 0.08908497542142868-  Rec: 3.210028614352147
Activity: wo_calories  -   Value: 0.0   -  imp_score: 0.06027833744883537-  Rec: 0.0

 Target user training entries:
                               healthCode        date  distance_cycling  \
99   3d9522ed-bc28-485a-a4ad-27b4d94343b4  2015-07-28               0.0   
100  3d9522ed-bc28-485a-a4ad-27b4d94343b4  2015-07-24               0.0   
101  3d9522ed-bc28-485a-a4ad-27b4d94343b4  2015-07-26               0.0   

     distance_walking  floors  active_calories    steps  heart_rate  \


In [4]:
import pandas as pd
from recommenders.nonper_recommender import get_nonper_recommendations

distance_cycling = 0.0
distance_walking = 0.0
floors = 20.0
active_calories = 300.0
steps = 7000.0
heart_rate = 0.0
sleep_minutes = 300.0
awake_count = 0.0
bed_time = 0.0
active_minutes = 0.0
wo_calories = 0.0
systolic_hist = 140.0
diastolic_hist = 80.0

entry = [distance_cycling, distance_walking, floors, active_calories, steps, heart_rate,
         sleep_minutes, awake_count, bed_time, active_minutes, wo_calories, systolic_hist, diastolic_hist]
keys = ['distance_cycling', 'distance_walking', 'floors', 'active_calories', 'steps', 'heart_rate',
        'sleep_minutes', 'awake_count', 'bed_time', 'active_minutes', 'wo_calories', 'systolic_hist', 'diastolic_hist']
entry_dict = dict(zip(keys, entry))
entry_df = pd.DataFrame([entry_dict])

key = ['healthCode', 'date']
target = ['systolic', 'diastolic']
n = 5

get_nonper_recommendations(entry_df, key, target, n=n, var_adjust=False, verbose=True)

Predicted value: [130.96594]
Predicted value: [85.26246]
Weighted correction: [3.7719262]

 Recommendations:
Activity: steps  -   Value: 7000.0   -  imp_score: 0.09366404265165329-  Rec: 655.648298561573
Activity: wo_calories  -   Value: 0.0   -  imp_score: 0.08854963630437851-  Rec: 0.0
Activity: awake_count  -   Value: 0.0   -  imp_score: 0.08681640028953552-  Rec: 0.0
Activity: sleep_minutes  -   Value: 300.0   -  imp_score: 0.08606331795454025-  Rec: 25.818995386362076
Activity: bed_time  -   Value: 0.0   -  imp_score: 0.07801978290081024-  Rec: 0.0


{'steps': 0    655.648299
 Name: steps, dtype: float64,
 'wo_calories': 0    0.0
 Name: wo_calories, dtype: float64,
 'awake_count': 0    0.0
 Name: awake_count, dtype: float64,
 'sleep_minutes': 0    25.818995
 Name: sleep_minutes, dtype: float64,
 'bed_time': 0    0.0
 Name: bed_time, dtype: float64}