# Make Predictions from Skimlit Model

In [14]:
import pandas as pd
import tensorflow as tf

In [7]:
train_path = 'pubmed-rct/PubMed_20k_RCT_numbers_replaced_with_at_sign\\train.txt'
test_path = 'pubmed-rct/PubMed_20k_RCT_numbers_replaced_with_at_sign\\test.txt'

## Preprocessing

In [3]:
# read in all line of target text file
def get_lines(filepath):
    '''
    Read text file and return the lines as a list.
    '''
    with open(filepath, 'r') as file :
        return file.readlines()

In [4]:
def preprocess_text(filepath):
    input_lines = get_lines(filepath)
    abstract_lines = '' # to save 1 abstract 
    abstract_samples = []

    for line in input_lines :
        if line.startswith('###') : #heading of each Abstract
            abstract_id = line 
            abstract_lines = '' # reset old Abstract
        elif line.isspace(): # if a new line (last abstract has space)
            abstract_split = abstract_lines.splitlines() # split Abstract into seperate line

            # loop for content in each Abstract
            for abstract_line_number, abstract_line in enumerate(abstract_split) :
                line_data = {} # for each target and text
                target_text = abstract_line.split('\t') # target and text
                line_data['line_number'] = abstract_line_number
                line_data['target'] = target_text[0]
                line_data['text'] = target_text[1].lower()
                line_data['total_lines'] = len(abstract_split) - 1 # start from zero
                abstract_samples.append(line_data) # add to list

        # content for each abtract
        else :
            abstract_lines += line

    return abstract_samples

In [10]:
# make function into character
def split_char(sentence):
    return ' '.join(list(sentence))

In [8]:
train_df = pd.DataFrame(preprocess_text(train_path))
test_df = pd.DataFrame(preprocess_text(test_path))
test_df.head()

Unnamed: 0,line_number,target,text,total_lines
0,0,BACKGROUND,this study analyzed liver function abnormaliti...,8
1,1,RESULTS,a post hoc analysis was conducted with the use...,8
2,2,RESULTS,liver function tests ( lfts ) were measured at...,8
3,3,RESULTS,survival analyses were used to assess the asso...,8
4,4,RESULTS,the percentage of patients with abnormal lfts ...,8


In [16]:
# token
train_sentences = train_df['text'].to_list() # long list of sentences
test_sentences = test_df['text'].to_list()

#char
test_char = [split_char(sentence) for sentence in test_sentences]

#line
test_line_numbers_onehot = tf.one_hot(test_df['line_number'].to_numpy(), depth=15)

#total_line
test_total_lines = tf.one_hot(test_df['total_lines'].to_numpy(), depth=18)

#labels 
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

onehot_encoder = OneHotEncoder(sparse=False)
label_encoder = LabelEncoder()
train_labels = onehot_encoder.fit_transform(train_df['target'].to_numpy().reshape(-1,1))
train_labels_ = label_encoder.fit_transform(train_df['target'])
test_labels = onehot_encoder.transform(test_df['target'].to_numpy().reshape(-1,1))
test_labels_ = label_encoder.transform(test_df['target'])

In [19]:
#create dataset
def create_dataset(p1,p2,p3,p4,labels):
    train_token_char_pos = tf.data.Dataset.from_tensor_slices((p1, p2, p3, p4))
    train_token_char_labels = tf.data.Dataset.from_tensor_slices(labels)
    dataset = tf.data.Dataset.zip((train_token_char_pos, train_token_char_labels)).batch(32).prefetch(tf.data.AUTOTUNE)
    return dataset

In [20]:
test_token_char_pos_dataset = create_dataset(test_line_numbers_onehot, test_total_lines, test_sentences, test_char, test_labels)

## Predictions

In [18]:
model = tf.keras.models.load_model('skimlit_tribid_model')
model

<keras.engine.functional.Functional at 0x224e15133a0>

In [21]:
proba = model.predict(test_token_char_pos_dataset)
proba[:10]

array([[0.25670815, 0.02733543, 0.15132844, 0.471059  , 0.09356892],
       [0.0708793 , 0.01556053, 0.7628104 , 0.09931289, 0.0514369 ],
       [0.06359179, 0.03620072, 0.6777798 , 0.07469099, 0.14773674],
       [0.04317857, 0.04163879, 0.7901038 , 0.06620786, 0.05887096],
       [0.05092471, 0.04088417, 0.0657573 , 0.05181859, 0.79061526],
       [0.03276863, 0.02000857, 0.03530328, 0.03306977, 0.8788498 ],
       [0.01799538, 0.02992826, 0.03883842, 0.02532754, 0.88791037],
       [0.2066518 , 0.6249687 , 0.0338528 , 0.07197975, 0.06254692],
       [0.05673619, 0.75619805, 0.03417084, 0.06342678, 0.08946813],
       [0.34441224, 0.06553377, 0.03335064, 0.5231211 , 0.0335822 ]],
      dtype=float32)

In [22]:
preds = tf.argmax(proba, axis=1)
preds[:10]

<tf.Tensor: shape=(10,), dtype=int64, numpy=array([3, 2, 2, 2, 4, 4, 4, 1, 1, 3], dtype=int64)>

In [24]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def evaluate_score(y_true, y_preds):
    accuracy = accuracy_score(y_true, y_preds)
    precision, recall, f1_score = precision_recall_fscore_support(y_true, y_preds, average='weighted')[:-1]
    evaluation_dict = {'accuracy':accuracy,'precision':precision,'recall':recall,'f1_score':f1_score}
    return evaluation_dict

In [25]:
score = evaluate_score(test_labels_, preds)
score

{'accuracy': 0.8438692550190808,
 'precision': 0.8453540649207408,
 'recall': 0.8438692550190808,
 'f1_score': 0.8428440779148704}

## Find most wrong predictions

In [43]:
import numpy as np

confident = np.max(proba, axis=1)
class_name = label_encoder.classes_
df = pd.DataFrame({
    'sentence':test_sentences,
    'label': [class_name[index] for index in test_labels_],
    'predict': [class_name[index] for index in preds],
    'confident' : confident
})

df.head(10)

Unnamed: 0,sentence,label,predict,confident
0,this study analyzed liver function abnormaliti...,BACKGROUND,OBJECTIVE,0.471059
1,a post hoc analysis was conducted with the use...,RESULTS,METHODS,0.76281
2,liver function tests ( lfts ) were measured at...,RESULTS,METHODS,0.67778
3,survival analyses were used to assess the asso...,RESULTS,METHODS,0.790104
4,the percentage of patients with abnormal lfts ...,RESULTS,RESULTS,0.790615
5,when mean hemodynamic profiles were compared i...,RESULTS,RESULTS,0.87885
6,multivariable analyses revealed that patients ...,RESULTS,RESULTS,0.88791
7,abnormal lfts are common in the adhf populatio...,CONCLUSIONS,CONCLUSIONS,0.624969
8,elevated meld-xi scores are associated with po...,CONCLUSIONS,CONCLUSIONS,0.756198
9,minimally invasive endovascular aneurysm repai...,BACKGROUND,OBJECTIVE,0.523121


In [49]:
# find most wrong
most_wrong = df[df['label'] != df['predict']].sort_values('confident', ascending=False)
most_wrong.head(10)

Unnamed: 0,sentence,label,predict,confident
29294,"baseline measures included sociodemographics ,...",RESULTS,METHODS,0.95426
671,to determine whether the insulin resistance th...,METHODS,OBJECTIVE,0.945404
8527,participants were randomly allocated to a trea...,OBJECTIVE,METHODS,0.937138
5394,"a randomized , double-blind , crossover study ...",RESULTS,METHODS,0.935637
29005,"we designed a patient-level , randomized , con...",RESULTS,METHODS,0.929154
1221,data were collected prospectively for @ months...,RESULTS,METHODS,0.922035
4699,the primary efficacy end point was primary pat...,RESULTS,METHODS,0.917132
16840,the primary endpoint was a composite of cardio...,RESULTS,METHODS,0.917017
16347,to evaluate the effects of the lactic acid bac...,BACKGROUND,OBJECTIVE,0.917013
19048,a total of eighty migraineurs without aura wer...,RESULTS,METHODS,0.916828
