# Test Evaluation

In [49]:
import os
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
from pathlib import Path
pd.set_option("display.max_rows", None, "display.max_columns", None, 'display.max_colwidth', None)

## Train

In [50]:
path_data = Path(os.environ.get('NLP_RAW_DATA')) / 'n2c2'
path_results = Path(os.environ.get('NLP_EXPERIMENT_PATH')) / 'submission_generation' / '03_12_2020_20_18_37_original_data'
df_train = pd.read_csv(path_data / 'clinicalSTS2019.train.txt', delimiter='\t', names=['sentence_a', 'sentence_b', 'score_true'])
df_train['score_step2'] = pd.read_csv(path_results / 'normal' / 'step2_train_scores.csv', header=None)[0].to_numpy()
df_train['score_step4'] = pd.read_csv(path_results / 'normal' / 'step4_train_scores.csv', header=None)[0].to_numpy()
df_train['score_diff'] = (abs(df_train['score_step4'] - df_train['score_true'])) - abs((df_train['score_step2'] - df_train['score_true']))

df_train_med = df_train[np.abs(df_train['score_step2'] - df_train['score_step4']) > 0.001]
#df_train_med = df_train[df_train['score_step2'] != df_train['score_step4']]

In [51]:
(df_train['score_true'].mean(), df_train['score_true'].std())

(2.787484774665043, 1.388712509029767)

In [69]:
(len(df_train_med), 100 * len(df_train_med) / len(df_train))

(147, 8.95249695493301)

In [53]:
(df_train_med['score_true'].mean(), df_train_med['score_true'].std())

(2.0285714285714285, 1.048857826269141)

In [54]:
np.sum(np.abs(df_train_med['score_step2'] - df_train_med['score_true'])**2)/len(df_train_med)

0.6959206986788156

In [55]:
np.sum(np.abs(df_train_med['score_step4'] - df_train_med['score_true'])**2)/len(df_train_med)

0.5808822487618591

In [56]:
np.random.seed(5)
df_train_med.sample(5)

Unnamed: 0,sentence_a,sentence_b,score_true,score_step2,score_step4,score_diff
1351,Prozac 20 mg capsule 1 capsule by mouth one time daily.,ibuprofen [ADVIL] 200 mg tablet 3 tablets by mouth one time daily as needed.,1.5,1.71823,1.699896,-0.018334
1289,ondansetron [ZOFRAN] 4 mg tablet 1 tablet by mouth three times a day as needed.,amoxicillin [AMOXIL] 500 mg capsule 2 capsules by mouth three times a day.,3.0,1.683182,1.699137,-0.015955
873,hydrochlorothiazide 25 mg tablet one-half tablet by mouth every morning.,ibuprofen [MOTRIN] 600 mg tablet 1 tablet by mouth four times a day.,1.5,1.590499,1.697998,0.107499
479,Prozac 20 mg capsule 3 capsules by mouth one time daily.,Aleve 220 mg tablet 1 tablet by mouth two times a day.,0.5,2.019769,1.678388,-0.341381
1253,amlodipine [NORVASC] 5 mg tablet 2 tablets by mouth one time daily.,hydrochlorothiazide 12.5 mg tablet 1 tablet by mouth one time daily.,1.5,1.880972,1.700071,-0.180902


## Test

In [57]:
df_test = pd.read_csv(path_data / 'clinicalSTS2019.test.txt', delimiter='\t', names=['sentence_a', 'sentence_b'])
df_test['score_step2'] = pd.read_csv(path_results / 'normal' / 'step2_test_scores.csv', header=None)[0].to_numpy()
df_test['score_step4'] = pd.read_csv(path_results / 'normal' / 'step4_test_scores.csv', header=None)[0].to_numpy()
df_test['score_true'] = pd.read_csv(path_data / 'clinicalSTS2019.test.gs.sim.txt', header=None)[0].to_numpy()
df_test['score_diff'] = (abs(df_test['score_step4'] - df_test['score_true'])) - abs((df_test['score_step2'] - df_test['score_true']))

df_test_med = df_test[np.abs(df_test['score_step2'] - df_test['score_step4']) > 0.001]
#df_test_med = df_test[df_test['score_step2'] != df_test['score_step4']]

In [58]:
(df_test['score_true'].mean(), df_test['score_true'].std())

(1.7645631067961165, 1.5208707178893903)

In [70]:
(len(df_test_med), 100 * len(df_test_med) / len(df_test))

(94, 22.815533980582526)

In [60]:
(df_test_med['score_true'].mean(), df_test_med['score_true'].std())

(1.0957446808510638, 0.5041579573299865)

In [61]:
np.sum(np.abs(df_test_med['score_step2'] - df_test_med['score_true'])**2)/len(df_test_med)

2.432894107188975

In [62]:
np.sum(np.abs(df_test_med['score_step4'] - df_test_med['score_true'])**2)/len(df_test_med)

0.5612688029859126

In [77]:
np.random.seed(9)
df_test_med.sample(5)

Unnamed: 0,sentence_a,sentence_b,score_step2,score_step4,score_true,score_diff
30,Qsymia 3.75-23 mg capsule multiphasic release 24 hour 1 capsule by mouth one time daily.,Aleve 220 mg tablet 2 tablets by mouth one time daily as needed.,2.324836,1.66135,0.0,-0.663486
205,Aleve 220 mg tablet 1 tablet by mouth two times a day.,acetaminophen [TYLENOL] 500 mg tablet 2 tablets by mouth three times a day.,2.736895,1.680362,1.5,-1.056533
117,lisinopril [PRINIVIL/ZESTRIL] 10 mg tablet 2 tablets by mouth one time daily.,naproxen [NAPROSYN] 500 mg tablet 1 tablet by mouth two times a day.,2.287069,1.691543,1.0,-0.595525
338,Tylenol Extra Strength 500 mg tablet 1 tablet by mouth as directed by prescriber as needed.,furosemide [LASIX] 20 mg tablet 3 tablets by mouth one time daily.,1.877145,1.694849,1.0,-0.182296
121,ibuprofen [MOTRIN] 600 mg tablet 1 tablet by mouth every 6 hours as needed.,ibuprofen [ADVIL] 200 mg tablet 2-3 tablets by mouth every 4 hours as needed.,3.907524,4.261679,3.0,0.354155


## M-Heads

In [64]:
df_trainh = pd.read_csv(path_data / 'clinicalSTS2019.train.txt', delimiter='\t', names=['sentence_a', 'sentence_b', 'score_true'])
df_trainh['score_step2'] = pd.read_csv(path_results / 'heads' / 'step1_train_scores.csv', header=None)[0].to_numpy()
df_trainh['score_step4'] = pd.read_csv(path_results / 'heads' / 'step4_train_scores.csv', header=None)[0].to_numpy()
df_trainh['score_diff'] = (abs(df_trainh['score_step4'] - df_trainh['score_true'])) - abs((df_trainh['score_step2'] - df_trainh['score_true']))

df_trainh_med = df_trainh[np.abs(df_trainh['score_step2'] - df_trainh['score_step4']) > 0.001]
#df_trainh_med = df_trainh[df_trainh['score_step2'] != df_trainh['score_step4']]

In [65]:
len(df_trainh_med)

146

In [66]:
np.sum(np.abs(df_trainh_med['score_step2'] - df_trainh_med['score_true'])**2)/len(df_trainh_med)

0.6101250129135304

In [67]:
np.sum(np.abs(df_trainh_med['score_step4'] - df_trainh_med['score_true'])**2)/len(df_trainh_med)

0.5969832368522427