In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install simpletransformers

In [None]:
import pandas as pd
from pathlib import Path
import numpy as np

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, average_precision_score
import numpy as np
from pathlib import Path
from sklearn.metrics import roc_auc_score, roc_curve, average_precision_score, precision_recall_curve
import matplotlib.pyplot as plt

In [None]:
!pip install openai
!pip install tiktoken

In [None]:
import tiktoken
import openai
from openai import OpenAI

## Functions

In [None]:
def calculate_values(result, model):

  precision = result['tp'] / (result['tp'] + result['fp'])
  recall = result['tp'] / (result['tp'] + result['fn'])
  f1 = 2 * (precision * recall) / (precision + recall)

  # Calculate Accuracy
  accuracy = (result['tp'] + result['tn']) / (result['tp'] + result['tn'] + result['fp'] + result['fn'])

  # Calculate Specificity
  specificity = result['tn'] / (result['tn'] + result['fp'])

  # Calculate AUC-ROC
  auc_roc = result['auroc']

  # Calculate ROC
  roc = {
      'fpr': result['fp'] / (result['fp'] + result['tn']),
      'tpr': recall
  }

  # Print the calculated metrics
  print("Precision:",model, ": ", precision)
  print("Recall:",model, ": ", recall)
  print("F1 Score:", model, ": ", f1)
  print("Accuracy:", model, ": ", accuracy)
  print("Sensitivity:", model, ": ", recall)
  print("Specificity:", model, ": ", specificity)
  print("AUC-ROC:", model, ": ", auc_roc)
  print("ROC:", model, ": ", roc)

  return f1

In [None]:
def draw_plots(predicted_probabilities, true_labels, model):

  auc_score = roc_auc_score(true_labels, predicted_probabilities)
  fpr, tpr, _ = roc_curve(true_labels, predicted_probabilities)

  plt.figure()
  plt.plot(fpr, tpr, label=f"auroc = {auc_score:.2f}")
  plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing random classification
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  plt.title('Rceiver Operating Characteristic Curve for ' + model)
  plt.legend(loc='lower right')
  plt.show()

  print(" ")

  average_precision = average_precision_score(true_labels, predicted_probabilities)
  precision, recall, _ = precision_recall_curve(true_labels, predicted_probabilities)

  plt.figure()
  plt.plot(recall, precision, label=f"auprc = {average_precision:.2f}")
  plt.xlabel('Recall')
  plt.ylabel('Precision')
  plt.title('Precision-Recall Curve for ' + model)
  plt.legend(loc='upper right')
  plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (8, 6)

def draw_roc_curve(predicted_probabilities, true_labels, model_name):
    auc_score = roc_auc_score(true_labels, predicted_probabilities)
    fpr, tpr, _ = roc_curve(true_labels, predicted_probabilities)

    plt.plot(fpr, tpr, label=f"{model_name} (AUC = {auc_score:.2f})")

def draw_precision_recall_curve(predicted_probabilities, true_labels, model_name):
    average_precision = average_precision_score(true_labels, predicted_probabilities)
    precision, recall, _ = precision_recall_curve(true_labels, predicted_probabilities)

    plt.plot(recall, precision, label=f"{model_name} (AP = {average_precision:.2f})")

def draw_plots_multiple(predicted_probabilities_list, true_labels_list, model_names):
    plt.figure()

    for i, predicted_probabilities in enumerate(predicted_probabilities_list):
        model_name = model_names[i]
        true_labels = true_labels_list[i]
        draw_roc_curve(predicted_probabilities, true_labels, model_name)

    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic Curve')
    plt.xticks(np.arange(0, 1.1, 0.1))
    plt.yticks(np.arange(0, 1.1, 0.1))
    plt.legend(loc='lower right')
    plt.show()
    plt.figure()

    print(" ")

    for i, predicted_probabilities in enumerate(predicted_probabilities_list):
        model_name = model_names[i]
        true_labels = true_labels_list[i]
        draw_precision_recall_curve(predicted_probabilities, true_labels, model_name)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc='lower left')
    plt.show()

## Mimic Data

### Data Pre-processing

In [None]:
df_renal = pd.read_csv('data_renal_failure.csv')

In [None]:
df_renal.head(3)

In [None]:
df_renal['renal_failure'].value_counts()

In [None]:
df_renal.shape

In [None]:
df_renal = df_renal.dropna(subset=['TEXT'])
df_renal = df_renal.dropna(subset=['renal_failure'])

In [None]:
df_renal.shape

In [None]:
for index, row in df_renal.iterrows():
  df_renal['TEXT'][index] = df_renal['TEXT'][index].replace('\n', '')

In [None]:
df_renal['TEXT']= df_renal['TEXT'].str.replace(r'\s+', ' ').str.strip()

In [None]:
renal_text = df_renal['TEXT'].tolist()
renal_label = df_renal['renal_failure'].tolist()

In [None]:
renal_label = [int(x) for x in renal_label]

In [None]:
bhc_1 = list()
bhc_2 = list()
bhc_3 = list()
no_bhc = list()
for i in range(len(renal_text)):
  t = renal_text[i].lower()
  if t.find('brief summary of hospital course')!=-1: bhc_1.append(i)
  elif t.find('brief hospital course')!=-1: bhc_2.append(i)
  elif t.find('hospital course:')!=-1: bhc_3.append(i)
  else: no_bhc.append(i)

In [None]:
print(len(bhc_1))
print(len(bhc_2))
print(len(bhc_3))

In [None]:
def get_bhc(text):
  t = text.lower()

  istart = t.find('brief hospital course')
  offset = 22
  if istart==-1:
    istart = t.find('brief summary of hospital course')
    offset = 33
  if istart==-1:
    istart = t.find('hospital course:')
    offset = 16

  if istart!=-1:
    t_split = t[istart+offset:].split('.')
    text_li = list()
    count = 0
    j = 0
    while j<len(t_split) and count<=512:
      count += len(t_split[j].split(' '))
      text_li.append(t_split[j])
      j+=1
    text_li = '. '.join(text_li)
    return text_li
  else:
    return text

In [None]:
bhc_list = list()
for i in range(len(renal_text)):
  bhc_list.append(get_bhc(renal_text[i]))

In [None]:
train_X, test_X, train_y, test_y = train_test_split(bhc_list, renal_label, test_size=0.25, random_state=42)

In [None]:
train_data = {'text': train_X, 'labels': train_y}
test_data = {'text': test_X, 'labels': test_y}

df_train_data_mimic = pd.DataFrame(train_data)
df_test_data_mimic = pd.DataFrame(test_data)

In [None]:
df_test_data_mimic['labels'].value_counts()

In [None]:
df_train_data_mimic['labels'].value_counts()

## Mimic Model

In [None]:
# Epochs

n = 20

### Roberta

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True, max_seq_length=512)

model_roberta = ClassificationModel(
    "roberta", "roberta-base", args=model_args)

In [None]:
best_f1 = 0
roberta_mimic_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_roberta.train_model(df_train_data_mimic)
  result, model_outputs, wrong_predictions = model_roberta.eval_model(df_test_data_mimic)
  roberta_mimic_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'Roberta')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_roberta_mimic = model_outputs[:, 1]
    true_labels_roberta_mimic = np.array(df_test_data_mimic['labels'].tolist())

### BioClincal BERT

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_Bio_ClinicalBERT= ClassificationModel(
    "bert", "emilyalsentzer/Bio_ClinicalBERT", args=model_args)

In [None]:
best_f1 = 0
biobert_mimic_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_Bio_ClinicalBERT.train_model(df_train_data_mimic)
  result, model_outputs, wrong_predictions = model_Bio_ClinicalBERT.eval_model(df_test_data_mimic)
  biobert_mimic_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'Bert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_biobert_mimic = model_outputs[:, 1]
    true_labels_biobert_mimic = np.array(df_test_data_mimic['labels'].tolist())

### PubMedBert

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_microsoft_bio_pubmed =  ClassificationModel(
    "bert", "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext", args=model_args)

In [None]:
best_f1 = 0
pubbert_mimic_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_microsoft_bio_pubmed.train_model(df_train_data_mimic)
  result, model_outputs, wrong_predictions = model_microsoft_bio_pubmed.eval_model(df_test_data_mimic)
  pubbert_mimic_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'Bert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_pubbert_mimic = model_outputs[:, 1]
    true_labels_pubbert_mimic = np.array(df_test_data_mimic['labels'].tolist())

## GPT Data

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
import os
os.environ["OPENAI_API_KEY"] = ""

In [None]:
client = OpenAI()

In [None]:
command_pos = """Create a patient summary with 15 sentences describing a patient's medical history who is planning to have any type of
surgery. Also describe patient having acute renal failure complication after surgery."""

In [None]:
command_neg = """create a patient summary with 15 sentences describing a patient's medical history who is planning to have any type of
surgery. Also describe patient 's postoperative course and if they had complications. Do not include acute renal failure as a complication."""

In [None]:
def generate_data(command, file_label, n):
  for i in range(n):
    message = [{"role": "system", "content": command}]
    response = client.chat.completions.create(model="gpt-3.5-turbo", messages = message)
    file_name = file_label + str(783 + i) + '.txt'
    with open(file_name, "w") as file:
      file.write(response.choices[0].message.content)
    print('Done: ', 783 +  i)

In [None]:
# %cd GPT_data_Positive_15_Sentences

# generate_data(command_pos, 'Positive', 100)

In [None]:
%cd GPT_data_Negative_15_Sentences

generate_data(command_neg, 'Negative',117)

In [None]:
def read_data(directory_path, X, y, keyword):

  c = list()
  for file_path in directory_path.iterdir():
    with open(file_path) as f:
      data = f.read().strip()
      data = data.replace("\n\n", "")
      data = data.replace("\n", "")

      X.append(data)
      if keyword == 'Positive':
        y.append(1)
      elif keyword == 'Negative':
        y.append(0)

      if len(data.split(" ")) > 512:
        c.append(len(data.split(" ")))

  print(keyword, "Labels greater than 512: ", len(c))
  print(c)

  return X, y

In [None]:
# Take 567 negative and 100 positive labels for train dataset to have same ratio as that of MIMIC

In [None]:
def get_train_test_gpt(keyword):
  X = list()
  y = list()

  directory_path = Path("GPT_data_Negative_" + keyword + "_Sentences")
  X, y = read_data(directory_path, X, y, "Negative")

  directory_path = Path("GPT_data_Positive_" + keyword + "_Sentences")
  X, y = read_data(directory_path, X, y, "Positive")

  df = pd.DataFrame({'text': X, 'labels': y})

  np.random.seed(0)
  df = df.sample(frac=1).reset_index(drop=True)

  selected_label_0 = df[df['labels'] == 0].head(567)
  selected_label_1 = df[df['labels'] == 1]
  selected_rows = pd.concat([selected_label_0, selected_label_1])

  train_data, test_data = train_test_split(selected_rows, test_size=0.25, random_state=42)
  train_data = train_data.reset_index(drop = True)
  test_data = test_data.reset_index(drop = True)

  return train_data, test_data

  # return selected_rows

## 15 Sentences

In [None]:
train_data, test_data = get_train_test_gpt("15")

In [None]:
print(train_data['labels'].value_counts())

print(test_data['labels'].value_counts())

In [None]:
# Epochs

n = 20

### Roberta

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True, max_seq_length=512)

model_roberta = ClassificationModel(
    "roberta", "roberta-base", args=model_args)

In [None]:
best_f1 = 0
roberta_gpt15_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_roberta.train_model(train_data)
  result, model_outputs, wrong_predictions = model_roberta.eval_model(df_test_data_mimic)
  roberta_gpt15_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'Roberta')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_roberta_gpt15 = model_outputs[:, 1]
    true_labels_roberta_gpt15 = np.array(df_test_data_mimic['labels'].tolist())

### BioBert

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_Bio_ClinicalBERT= ClassificationModel(
    "bert", "emilyalsentzer/Bio_ClinicalBERT", args=model_args)

In [None]:
best_f1 = 0
biobert_gpt15_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_Bio_ClinicalBERT.train_model(train_data)
  result, model_outputs, wrong_predictions = model_Bio_ClinicalBERT.eval_model(df_test_data_mimic)
  biobert_gpt15_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'BioBert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_biobert_gpt15 = model_outputs[:, 1]
    true_labels_bert_biogpt15 = np.array(df_test_data_mimic['labels'].tolist())

### PubMedBert

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_microsoft_bio_pubmed =  ClassificationModel(
    "bert", "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext", args=model_args)

In [None]:
best_f1 = 0
pubbert_gpt15_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_microsoft_bio_pubmed.train_model(train_data)
  result, model_outputs, wrong_predictions = model_microsoft_bio_pubmed.eval_model(df_test_data_mimic)
  pubbert_gpt15_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'BioBert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_pubbert_gpt15 = model_outputs[:, 1]
    true_labels_pubbert_gpt15 = np.array(df_test_data_mimic['labels'].tolist())

## 30 Sentences

In [None]:
train_data, test_data = get_train_test_gpt("30")

In [None]:
print(train_data['labels'].value_counts())

print(test_data['labels'].value_counts())

In [None]:
# Epochs

n = 20

### Roberta

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True, max_seq_length=512)

model_roberta = ClassificationModel(
    "roberta", "roberta-base", args=model_args)

In [None]:
best_f1 = 0
roberta_gpt30_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_roberta.train_model(train_data)
  result, model_outputs, wrong_predictions = model_roberta.eval_model(df_test_data_mimic)
  roberta_gpt30_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'Roberta')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_roberta_gpt30 = model_outputs[:, 1]
    true_labels_roberta_gpt30 = np.array(df_test_data_mimic['labels'].tolist())

### BioBert

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_Bio_ClinicalBERT= ClassificationModel(
    "bert", "emilyalsentzer/Bio_ClinicalBERT", args=model_args)

In [None]:
best_f1 = 0
biobert_gpt30_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_Bio_ClinicalBERT.train_model(train_data)
  result, model_outputs, wrong_predictions = model_Bio_ClinicalBERT.eval_model(df_test_data_mimic)
  biobert_gpt30_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'BioBert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_biobert_gpt30 = model_outputs[:, 1]
    true_labels_bioert_gpt30 = np.array(df_test_data_mimic['labels'].tolist())

### PubMedBert

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_microsoft_bio_pubmed =  ClassificationModel(
    "bert", "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext", args=model_args)

In [None]:
best_f1 = 0
pubbert_gpt30_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_microsoft_bio_pubmed.train_model(train_data)
  result, model_outputs, wrong_predictions = model_microsoft_bio_pubmed.eval_model(df_test_data_mimic)
  pubbert_gpt30_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'BioBert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_pubbert_gpt30 = model_outputs[:, 1]
    true_labels_pubbert_gpt30 = np.array(df_test_data_mimic['labels'].tolist())

## 45 Sentences

In [None]:
train_data, test_data = get_train_test_gpt("45")

In [None]:
print(train_data['labels'].value_counts())

print(test_data['labels'].value_counts())

In [None]:
# Epochs

n = 20

### Roberta

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True, max_seq_length=512)

model_roberta = ClassificationModel(
    "roberta", "roberta-base", args=model_args)

In [None]:
best_f1 = 0
roberta_gpt45_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_roberta.train_model(train_data)
  result, model_outputs, wrong_predictions = model_roberta.eval_model(df_test_data_mimic)
  roberta_gpt45_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'Roberta')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_roberta_gpt45 = model_outputs[:, 1]
    true_labels_roberta_gpt45 = np.array(df_test_data_mimic['labels'].tolist())

### BioBert

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_Bio_ClinicalBERT= ClassificationModel(
    "bert", "emilyalsentzer/Bio_ClinicalBERT", args=model_args)

In [None]:
best_f1 = 0
biobert_gpt45_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_Bio_ClinicalBERT.train_model(train_data)
  result, model_outputs, wrong_predictions = model_Bio_ClinicalBERT.eval_model(df_test_data_mimic)
  biobert_gpt45_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'BioBert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_biobert_gpt45 = model_outputs[:, 1]
    true_labels_biobert_gpt45 = np.array(df_test_data_mimic['labels'].tolist())

### PubMedBert

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir = True)

model_microsoft_bio_pubmed =  ClassificationModel(
    "bert", "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext", args=model_args)

In [None]:
best_f1 = 0
pubbert_gpt45_graph_list = list()

for i in range(n):
  print('Epoch: ', i)
  model_microsoft_bio_pubmed.train_model(train_data)
  result, model_outputs, wrong_predictions = model_microsoft_bio_pubmed.eval_model(df_test_data_mimic)
  pubbert_gpt45_graph_list.append([model_outputs[:, 1], df_test_data_mimic['labels'].tolist()])
  print('Result')
  print(result)
  evaluation_metric_f1 = calculate_values(result, 'BioBert')
  print(" ")

  if evaluation_metric_f1>=best_f1:
    best_f1 = evaluation_metric_f1
    print('Stored for Epoch: ', i)
    predicted_probabilities_pubbert_gpt45 = model_outputs[:, 1]
    true_labels_pubbert_gpt45 = np.array(df_test_data_mimic['labels'].tolist())

## Graphs

### Roberta

In [None]:
predicted_probabilities_list = [predicted_probabilities_roberta_mimic, predicted_probabilities_roberta_gpt15, predicted_probabilities_roberta_gpt30, predicted_probabilities_roberta_gpt45]
true_labels_list = [true_labels_roberta_mimic, true_labels_roberta_gpt15, true_labels_roberta_gpt30, true_labels_roberta_gpt45]
model_names = ["Mimic", "GPT-15 Sentences", "GPT-30 Sentences", "GPT-45 Sentences"]

draw_plots_multiple(predicted_probabilities_list, true_labels_list, model_names)

### Bert

In [None]:
predicted_probabilities_list = [predicted_probabilities_bert_mimic, bert_gpt15_graph_list[-1][0], bert_gpt30_graph_list[-1][0], bert_gpt45_graph_list[-1][0]]
true_labels_list = [true_labels_bert_mimic, bert_gpt15_graph_list[-1][1], bert_gpt30_graph_list[-1][1], bert_gpt45_graph_list[-1][1]]
model_names = ["Mimic", "GPT-15 Sentences", "GPT-30 Sentences", "GPT-45 Sentences"]

draw_plots_multiple(predicted_probabilities_list, true_labels_list, model_names)

### BioBert

In [None]:
predicted_probabilities_list = [predicted_probabilities_biobert_mimic, predicted_probabilities_biobert_gpt15, predicted_probabilities_biobert_gpt30, predicted_probabilities_biobert_gpt45]
true_labels_list = [true_labels_biobert_mimic, true_labels_bert_biogpt15, true_labels_bioert_gpt30, true_labels_biobert_gpt45]
model_names = ["Mimic", "GPT-15 Sentences", "GPT-30 Sentences", "GPT-45 Sentences"]

draw_plots_multiple(predicted_probabilities_list, true_labels_list, model_names)

### PubMedBert

In [None]:
predicted_probabilities_list = [predicted_probabilities_pubbert_mimic, predicted_probabilities_pubbert_gpt15, predicted_probabilities_pubbert_gpt30, predicted_probabilities_bert_gpt45]
true_labels_list = [true_labels_pubbert_mimic, true_labels_pubbert_gpt15, true_labels_pubbert_gpt30, true_labels_pubbert_gpt45]
model_names = ["Mimic", "GPT-15 Sentences", "GPT-30 Sentences", "GPT-45 Sentences"]

draw_plots_multiple(predicted_probabilities_list, true_labels_list, model_names)

## Data Comparison

In [None]:
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
import statistics
nltk.download('stopwords')
import numpy as np

In [None]:
# bhc_list

In [None]:
# Make modificaitons in the above function to just return list of sentences

gpt15 = get_train_test_gpt("15")
gpt30 = get_train_test_gpt("30")
gpt45 = get_train_test_gpt("45")

In [None]:
gpt15 = gpt15['text'].tolist()
gpt30 = gpt30['text'].tolist()
gpt45 = gpt45['text'].tolist()

In [None]:
def calculate_average_sentence_count(note_list):
    total_sentence_count = list()

    for note_content in note_list:
        sentences = sent_tokenize(note_content)
        c = 0
        for i in range(len(sentences)):
          if len(sentences[i]) >=5:
            c+=1
        total_sentence_count.append(c)

    # return statistics.mean(total_sentence_count)
    return total_sentence_count

In [None]:
def remove_outliers(data):
    # Calculate the first and third quartiles (Q1 and Q3)
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)

    # Calculate the interquartile range (IQR)
    iqr = q3 - q1

    # Define the lower and upper bounds for outliers
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr

    # Remove outliers
    filtered_data = [x for x in data if lower_bound <= x <= upper_bound]

    return filtered_data

def calculate_average(data):
    # Calculate the average of the filtered data
    if len(data) > 0:
        average = sum(data) / len(data)
        return average
    else:
        return None

# Example usage:
average_without_outliers_mimic = calculate_average(remove_outliers(calculate_average_sentence_count(bhc_list)))
average_without_outliers_gpt15 = calculate_average(remove_outliers(calculate_average_sentence_count(gpt15)))
average_without_outliers_gpt30 = calculate_average(remove_outliers(calculate_average_sentence_count(gpt30)))
average_without_outliers_gpt45 = calculate_average(remove_outliers(calculate_average_sentence_count(gpt45)))

print("Average without outliers Mimic:", average_without_outliers_mimic)
print("Average without outliers GPT-15:", average_without_outliers_gpt15)
print("Average without outliers GPT-30:", average_without_outliers_gpt30)
print("Average without outliers GPT-45:", average_without_outliers_gpt45)

In [None]:
std_deviation_mimic = statistics.stdev(remove_outliers(calculate_average_sentence_count(bhc_list)))
std_deviation_gpt15 = statistics.stdev(remove_outliers(calculate_average_sentence_count(gpt15)))
std_deviation_gpt30 = statistics.stdev(remove_outliers(calculate_average_sentence_count(gpt30)))
std_deviation_gpt45 = statistics.stdev(remove_outliers(calculate_average_sentence_count(gpt45)))

print(f'The standard deviation of the numbers is: {std_deviation_mimic:.2f}')
print(f'The standard deviation of the numbers is: {std_deviation_gpt15:.2f}')
print(f'The standard deviation of the numbers is: {std_deviation_gpt30:.2f}')
print(f'The standard deviation of the numbers is: {std_deviation_gpt45:.2f}')