## Splitting Evaluation
The objective of this notebook is to obtain a standard way of evaluating models. For each person, the model is expected to predict a probability.  The referal is then evaluated under what time period it occurs. 




In [1]:
%reload_ext autoreload
%autoreload 2
#ignore warnings.
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn



In [2]:
import sys, torch, datetime, os
from pathlib import Path
sys.path.append('../modules')
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, accuracy_score, recall_score, balanced_accuracy_score, f1_score, roc_auc_score, log_loss, roc_curve
#import evaluation as e
pd.set_option('display.max_columns', 9999)

In [3]:
#This could be used for benchmarking data
df=pd.read_csv('https://raw.githubusercontent.com/HealthINCITE/patient_panel/master/data/processed/test.csv')
df.head()

Unnamed: 0,person_id,myr,cad0,cad1,dv9,date,lab_1,lab_2,lab_3,ref_m,ref,class,labels,lag1_lab_1,lag1_lab_2,lag1_lab_3,sum_lab_1,sum_lab_2,sum_lab_3,win6_lab_1,win6_lab_2,win6_lab_3
0,1000,201601,1,1,0,2016-01-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
1,1000,201602,1,1,0,2016-02-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
2,1000,201603,1,1,0,2016-03-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
3,1000,201604,1,1,0,2016-04-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
4,1000,201605,1,1,0,2016-05-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0


## Train test split based on time window.


In [4]:
def train_test_split(df, date_col, date_format, split_time):
    """
    Provide an train/test split based on a timestamp.
    df = Dataframe (Pandas dataframe).
    date_col = Date column (string).
    date_format = The date format.
    split_time = A specific place to date. (date format)
    """
    split =pd.Timestamp(split_time)
    #Let's convert this to datetime while we are at it.
    date = pd.to_datetime(df[date_col], format=date_format)
    train=df.loc[date<=split]
    test=df.loc[date>split]
    return train, test

date_col='myr'
date_format='%Y%m'
split_time=datetime.date(2016, 12,30)

train, test = train_test_split(df, date_col, date_format, split_time) 
test.head()

Unnamed: 0,person_id,myr,cad0,cad1,dv9,date,lab_1,lab_2,lab_3,ref_m,ref,class,labels,lag1_lab_1,lag1_lab_2,lag1_lab_3,sum_lab_1,sum_lab_2,sum_lab_3,win6_lab_1,win6_lab_2,win6_lab_3
12,1000,201701,1,1,0,2017-01-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
13,1000,201702,1,1,0,2017-02-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
14,1000,201703,1,1,0,2017-03-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
15,1000,201704,1,1,0,2017-04-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0
16,1000,201705,1,1,0,2017-05-01,0,0,0,0,0,h,0,0,0,0,0,0,0,0,0,0


### Predictions 
The predictions are easy to assess for the toy model. 
For the toy model:
    - The first 4 individuals are not referrals.
    - The next 2 are positive in the first three months.
    - The next 2 are positive in the first 6 months.
    - The final 2 are positive in the 12th month. 

We set the windows according to the following. 
`windows= [[0,3], [0,6], [0,12]]`

In [12]:
#currently assumes predictions are in a dataframe. 
#With the person and category.
pred_df=pd.read_csv('https://raw.githubusercontent.com/HealthINCITE/patient_panel/master/data/predictions/test.csv')
pred_df

Unnamed: 0,person_id,ref,class
0,1000,0.0,healthy
1,1001,0.0,healthy
2,1002,0.0,healthy
3,1003,0.0,healthy
4,1004,0.7,diabetes
5,1005,0.7,diabetes
6,1006,0.8,liver
7,1007,0.8,liver
8,1008,0.9,pnemonia
9,1009,0.9,pnemonia


In [13]:
from sklearn.metrics import precision_score, accuracy_score, recall_score, balanced_accuracy_score, f1_score
from sklearn.metrics import roc_auc_score, log_loss
from sklearn.metrics import roc_curve
def score_landmarks(exp, per_col, date_col, df, pred_df, capacity, landmarks, results_file, target, save=True, append=True):
    """
    exp = Experiment name.
    per_col = Person Column 
    date_col= Date Col
    df = Evaluation dataframe
    capacity = The maximum number of the positive class.
    landmarks = Specific windows, such that [[0,3], [0,6], [0,12]] coresponds to col 0-3, 0-6, 0-12. 

    """
    #This creates a wide version of the dataset with columns equal to dates and rows equal to personid
    ir=df.pivot_table(index=per_col, columns=date_col, values=target, aggfunc='sum')
    c=ir.columns
    #Initialize a results data frame. 
    results=pd.DataFrame() #final results
    row=0
    #Sort by capacity and set positive class based on top probabilities
    pred_df=pred_df.sort_values(by=[target], ascending=False)
    pred_df['ref_bin']=0
    pred_df['ref_bin'][0:capacity]=1
    pred_df=pred_df.sort_values(by=[per_col])
    
    #Filter out people we don't have a prediction for
    ir=ir[ir.index.isin(pred_df[per_col])]
    
    # Loop through the windows
    for w in landmarks:
        sl=slice(w[0],w[1])
        y= ir.iloc[:,sl].sum(axis=1) #take slice based on window
        #If more than 1 referral in window, recode to 1
        y[y>1]=1
        if pred_df.shape[0]!=len(y):
            print("df with ",y, " people;",pred_df.shape[0], " predictions" )
            exit
        label=str(c[w[0]])+'-'+ str(c[w[1]-1])
        #label=c[w[0]].strftime('%Y%m')+'-'+c[w[1]-1].strftime('%Y%m')
        results.loc[row, 'experiment']=exp
        results.loc[row, 'date']=pd.Timestamp.now(tz=None)
        results.loc[row, 'n'] = pred_df.shape[0]
        results.loc[row, 'range']=label
        results.loc[row, 'log_loss'] = log_loss(y, pred_df[target])
        results.loc[row, 'precision']=precision_score(y, pred_df['ref_bin'])
        results.loc[row, 'recall']=recall_score(y, pred_df['ref_bin'])
        results.loc[row, 'accuracy']=accuracy_score(y, pred_df['ref_bin'])
        results.loc[row, 'balanced_accuracy']=balanced_accuracy_score(y, pred_df['ref_bin'])
        results.loc[row, 'f1']=f1_score(y, pred_df['ref_bin'])
        row=row+1
    if save:
        if append and os.path.exists(results_file):
            with open(results_file, 'a') as f:
                results.to_csv(f, header=False) 
        else: 
            results.to_csv(results_file, index = False)
           
    return results

#define the windows.  For example [0,3] is including between 0-3 months.
exp="Test"
results_file='results.csv'
target='ref'
capacity=6 #Capacity is used the set the conversion from prob to class prediction
per_col='person_id'

landmarks= [[0,3], [0,6], [0,12]]
#Score windows
results=score_landmarks(exp, per_col, date_col, test, pred_df, capacity, landmarks, results_file,  target, save=True, append=True)
results

Unnamed: 0,experiment,date,n,range,log_loss,precision,recall,accuracy,balanced_accuracy,f1
0,Test,2019-12-04 12:09:42.857590,12.0,201701-201703,0.770895,0.333333,1.0,0.666667,0.8,0.5
1,Test,2019-12-04 12:09:42.883250,12.0,201701-201706,0.539846,0.666667,1.0,0.833333,0.875,0.8
2,Test,2019-12-04 12:09:42.898967,12.0,201701-201712,0.173642,1.0,1.0,1.0,1.0,1.0


In [7]:
pred_df['ref_bin']=0
pred_df.loc[0:capacity,'ref_bin']=1
pred_df

Unnamed: 0,person_id,ref,class,ref_bin
0,1000,0.0,healthy,1
1,1001,0.0,healthy,1
2,1002,0.0,healthy,1
3,1003,0.0,healthy,1
4,1004,0.7,diabetes,1
5,1005,0.7,diabetes,1
6,1006,0.8,liver,1
7,1007,0.8,liver,0
8,1008,0.9,pnemonia,0
9,1009,0.9,pnemonia,0


In [8]:
#This creates a wide version of the dataset with columns equal to dates and rows equal to personid
ir=test.pivot_table(index=per_col, columns=date_col, values=target, aggfunc='sum')
ir

myr,201701,201702,201703,201704,201705,201706,201707,201708,201709,201710,201711,201712
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1004,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1005,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1006,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [14]:
#This gives the truth
sl=slice(0,12)
y=ir.iloc[:,sl].sum(axis=1)
y[y>1]=1
y

person_id
1000    0.0
1001    0.0
1002    0.0
1003    0.0
1004    1.0
1005    1.0
1006    1.0
1007    1.0
1008    1.0
1009    1.0
2000    0.0
2001    0.0
dtype: float64