## Imports

In [1]:
import pandas as pd
from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from datasets import load_dataset, load_metric
import datetime
import os
import time
import numpy as np
import evaluate
import accelerate
import torch

## Read In Records

In [2]:
df = pd.read_csv('best_models_test.csv', index_col=False)

In [3]:
print(df.head())

   savsnet_consult_id                                          item_text  \
0             7334512  Bilateral otitis - Ring 1st Mobile number with...   
1             6105807  <<name>> has been shaking his head for the las...   
2             4794769  "Booster L4. Declined KC. Muzzled for exam. V ...   
3             4808820  "Treated for bilateral otitis externa- right e...   
4             8437569  left ear severe otitis-yellow discharge, smell...   

   Binary Class  Multi Class  
0             1            0  
1             0            1  
2             0            1  
3             0            1  
4             0            1  


In [5]:
# Drop previous classifications
df = df.drop(columns=df.iloc[:, 4:], axis=1)

In [4]:
print(df.head())

   savsnet_consult_id                                          item_text  \
0             7334512  Bilateral otitis - Ring 1st Mobile number with...   
1             6105807  <<name>> has been shaking his head for the las...   
2             4794769  "Booster L4. Declined KC. Muzzled for exam. V ...   
3             4808820  "Treated for bilateral otitis externa- right e...   
4             8437569  left ear severe otitis-yellow discharge, smell...   

   Binary Class  Multi Class  
0             1            0  
1             0            1  
2             0            1  
3             0            1  
4             0            1  


In [5]:
df.to_csv('best_models_test.csv', index=False)

## Perform Inference

In [6]:
binary_balanced_classifier_path = '/opt/jupyterlab/notebooks/DogBERT/Classifiers/Pseudomonas_Otitis/Binary_Classifier/Balanced_Binary_Classifier/DogBERT_psoe_Classifier'
binary_unbalanced_classifier_path = '/opt/jupyterlab/notebooks/DogBERT/Classifiers/Pseudomonas_Otitis/Binary_Classifier/Binary_Weighted_Loss/DogBERT_PSOE_Binary_Classifier_Filtered_Unweighted'
mc_unbalanced_classifier = '/opt/jupyterlab/notebooks/DogBERT/Classifiers/Pseudomonas_Otitis/Multi_Class Classifier/Weighted Loss/DogBERT_PSOE_Multi_Class_Classifier_Weighted'

In [7]:
def predict_sentiment(text):
  inputs = tokenizer(text, padding="max_length", truncation=True, return_tensors="pt")
  outputs = model(**inputs)
  predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
  predicted_class = torch.argmax(predictions).item()
  confidence_score = predictions.squeeze()[predicted_class].item()
  return predicted_class, confidence_score

In [8]:
tokenizer = AutoTokenizer.from_pretrained(binary_balanced_classifier_path)
model = AutoModelForSequenceClassification.from_pretrained(binary_balanced_classifier_path)

# Assuming your dataframe is called 'df' and the text column is 'text'
df["DogBERT_balanced_binary_predicted_pseudomonas_otitis"], df["DogBERT_balanced_binary_confidence_score"] = zip(*df["item_text"].apply(predict_sentiment))

In [9]:
tokenizer = AutoTokenizer.from_pretrained(binary_unbalanced_classifier_path)
model = AutoModelForSequenceClassification.from_pretrained(binary_unbalanced_classifier_path)

# Assuming your dataframe is called 'df' and the text column is 'text'
df["DogBERT_unbalanced_binary_predicted_pseudomonas_otitis"], df["DogBERT_unbalanced_binary_confidence_score"] = zip(*df["item_text"].apply(predict_sentiment))

In [10]:
tokenizer = AutoTokenizer.from_pretrained(mc_unbalanced_classifier)
model = AutoModelForSequenceClassification.from_pretrained(mc_unbalanced_classifier)

# Assuming your dataframe is called 'df' and the text column is 'text'
df["DogBERT_unbalanced_multiclass_predicted_pseudomonas_otitis"], df["DogBERT_unbalanced_multiclass_confidence_score"] = zip(*df["item_text"].apply(predict_sentiment))

In [11]:
print(df.head())

   savsnet_consult_id                                          item_text  \
0             7334512  Bilateral otitis - Ring 1st Mobile number with...   
1             6105807  <<name>> has been shaking his head for the las...   
2             4794769  "Booster L4. Declined KC. Muzzled for exam. V ...   
3             4808820  "Treated for bilateral otitis externa- right e...   
4             8437569  left ear severe otitis-yellow discharge, smell...   

   Binary Class  Multi Class  \
0             1            0   
1             0            1   
2             0            1   
3             0            1   
4             0            1   

   DogBERT_balanced_binary_predicted_pseudomonas_otitis  \
0                                                  1      
1                                                  0      
2                                                  0      
3                                                  0      
4                                                  0   

In [21]:
"""
Check records are labelled the same for each classifier
"""

'\nCheck records are labelled the same for each classifier\n'

## Check Performance - Binary

In [36]:
def CreateMetricDict(preds, labels):
    """
    Function to create a dictionary of ML metrics from the output of a multilabel model

    Args: list of predictions, list of ground truth labels
    """
    metric_dict = {"accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"], 
                   "precision":precision.compute(predictions=preds, references=labels)["precision"], 
                   "recall":recall.compute(predictions=preds, references=labels)["recall"], 
                   "f1":f1.compute(predictions=preds, references=labels)["f1"]}


    return metric_dict

In [13]:
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

In [14]:
predictions_binary_balanced = list(df['DogBERT_balanced_binary_predicted_pseudomonas_otitis'])
predictions_binary_unbalanced = list(df['DogBERT_unbalanced_binary_predicted_pseudomonas_otitis'])
labels = list(df['Binary Class'])

In [34]:
print(df['Binary Class'].value_counts())

Binary Class
0.0    122
1.0     27
Name: count, dtype: int64


In [15]:
balanced_binary_metrics = CreateMetricDict(predictions_binary_balanced, labels)

In [16]:
unbalanced_binary_metrics = CreateMetricDict(predictions_binary_unbalanced, labels)

In [17]:
print(balanced_binary_metrics)

{'accuracy': 0.84, 'precision': 0.5454545454545454, 'recall': 0.8571428571428571, 'f1': 0.6666666666666666}


In [18]:
print(unbalanced_binary_metrics)

{'accuracy': 0.8333333333333334, 'precision': 0.5348837209302325, 'recall': 0.8214285714285714, 'f1': 0.647887323943662}


## Check Performance - Multi Class

In [19]:
def CreateMetricDict(preds, labels):
    """
    Function to create a dictionary of ML metrics from the output of a multilabel model

    Args: list of predictions, list of ground truth labels
    """
    metric_dict = {"accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"], 
                   "precision_raw":precision.compute(predictions=preds, references=labels, average=None)["precision"], 
                   "recall_raw":recall.compute(predictions=preds, references=labels, average=None)["recall"], 
                   "f1_raw":f1.compute(predictions=preds, references=labels, average=None)["f1"], 
                   "precision_macro":precision.compute(predictions=preds, references=labels, average='macro')["precision"], 
                   "recall_macro":recall.compute(predictions=preds, references=labels, average='macro')["recall"], 
                   "f1_macro":f1.compute(predictions=preds, references=labels, average='macro')["f1"], 
                   "precision_weighted":precision.compute(predictions=preds, references=labels, average='weighted')["precision"],
                   "recall_weighted":recall.compute(predictions=preds, references=labels, average='weighted')["recall"],
                   "f1_weighted":f1.compute(predictions=preds, references=labels, average='weighted')["f1"]
                  }


    return metric_dict

In [20]:
predictions = list(df['DogBERT_unbalanced_multiclass_predicted_pseudomonas_otitis'])
labels = list(df['Multi Class'])

unbalanced_mc_metrics = CreateMetricDict(predictions, labels)

In [21]:
print(unbalanced_mc_metrics)

{'accuracy': 0.8066666666666666, 'precision_raw': array([0.66666667, 0.84615385, 1.        , 1.        ]), 'recall_raw': array([0.66666667, 0.93617021, 0.35714286, 0.66666667]), 'f1_raw': array([0.66666667, 0.88888889, 0.52631579, 0.8       ]), 'precision_macro': 0.8782051282051282, 'recall_macro': 0.6566616008105369, 'f1_macro': 0.7204678362573098, 'precision_weighted': 0.8169230769230769, 'recall_weighted': 0.8066666666666666, 'f1_weighted': 0.7954931773879143}


## Label Like Original Data

In [23]:
df_2 = pd.read_csv('best_models_test.csv', index_col=False)

In [24]:
print(df_2)

     savsnet_consult_id                                          item_text  \
0               7334512  Bilateral otitis - Ring 1st Mobile number with...   
1               6105807  <<name>> has been shaking his head for the las...   
2               4794769  "Booster L4. Declined KC. Muzzled for exam. V ...   
3               4808820  "Treated for bilateral otitis externa- right e...   
4               8437569  left ear severe otitis-yellow discharge, smell...   
..                  ...                                                ...   
145            11185557  Presenting: otitis externa right ear (RODS). o...   
146            11186959  Rechcek ears. OR doing same. Left ear dirty, r...   
147             7280453  . Next appointment in 1 week. here for re-cehc...   
148            10353911  . Rescue from RSPCA. Had slight growth of pseu...   
149             6063132  In for chekc ears. Results confirm ear infecti...   

     Binary Class  Multi Class  
0               1            0

In [25]:
replacements = {0: 1, 1:0, 2:1, 3:1}
df_2['Binary Class (Original Labelling)'] = df_2['Multi Class'].replace(replacements)

In [26]:
print(df_2)

     savsnet_consult_id                                          item_text  \
0               7334512  Bilateral otitis - Ring 1st Mobile number with...   
1               6105807  <<name>> has been shaking his head for the las...   
2               4794769  "Booster L4. Declined KC. Muzzled for exam. V ...   
3               4808820  "Treated for bilateral otitis externa- right e...   
4               8437569  left ear severe otitis-yellow discharge, smell...   
..                  ...                                                ...   
145            11185557  Presenting: otitis externa right ear (RODS). o...   
146            11186959  Rechcek ears. OR doing same. Left ear dirty, r...   
147             7280453  . Next appointment in 1 week. here for re-cehc...   
148            10353911  . Rescue from RSPCA. Had slight growth of pseu...   
149             6063132  In for chekc ears. Results confirm ear infecti...   

     Binary Class  Multi Class  Binary Class (Original Labellin

## Classify Binary Class With Original Labelling

In [27]:
tokenizer = AutoTokenizer.from_pretrained(binary_balanced_classifier_path)
model = AutoModelForSequenceClassification.from_pretrained(binary_balanced_classifier_path)

# Assuming your dataframe is called 'df' and the text column is 'text'
df_2["DogBERT_balanced_binary_predicted_pseudomonas_otitis"], df_2["DogBERT_balanced_binary_confidence_score"] = zip(*df_2["item_text"].apply(predict_sentiment))

In [28]:
tokenizer = AutoTokenizer.from_pretrained(binary_unbalanced_classifier_path)
model = AutoModelForSequenceClassification.from_pretrained(binary_unbalanced_classifier_path)

# Assuming your dataframe is called 'df' and the text column is 'text'
df_2["DogBERT_unbalanced_binary_predicted_pseudomonas_otitis"], df_2["DogBERT_unbalanced_binary_confidence_score"] = zip(*df_2["item_text"].apply(predict_sentiment))

In [29]:
tokenizer = AutoTokenizer.from_pretrained(mc_unbalanced_classifier)
model = AutoModelForSequenceClassification.from_pretrained(mc_unbalanced_classifier)

# Assuming your dataframe is called 'df' and the text column is 'text'
df_2["DogBERT_unbalanced_multiclass_predicted_pseudomonas_otitis"], df_2["DogBERT_unbalanced_multiclass_confidence_score"] = zip(*df_2["item_text"].apply(predict_sentiment))

In [30]:
print(df_2)

     savsnet_consult_id                                          item_text  \
0               7334512  Bilateral otitis - Ring 1st Mobile number with...   
1               6105807  <<name>> has been shaking his head for the las...   
2               4794769  "Booster L4. Declined KC. Muzzled for exam. V ...   
3               4808820  "Treated for bilateral otitis externa- right e...   
4               8437569  left ear severe otitis-yellow discharge, smell...   
..                  ...                                                ...   
145            11185557  Presenting: otitis externa right ear (RODS). o...   
146            11186959  Rechcek ears. OR doing same. Left ear dirty, r...   
147             7280453  . Next appointment in 1 week. here for re-cehc...   
148            10353911  . Rescue from RSPCA. Had slight growth of pseu...   
149             6063132  In for chekc ears. Results confirm ear infecti...   

     Binary Class  Multi Class  Binary Class (Original Labellin

In [32]:
predictions_binary_balanced_ol = list(df_2['DogBERT_balanced_binary_predicted_pseudomonas_otitis'])
predictions_binary_unbalanced_ol = list(df_2['DogBERT_unbalanced_binary_predicted_pseudomonas_otitis'])
labels = list(df_2['Binary Class (Original Labelling)'])

In [37]:
balanced_binary_metrics_ol = CreateMetricDict(predictions_binary_balanced_ol, labels)
unbalanced_binary_metrics_ol = CreateMetricDict(predictions_binary_unbalanced_ol, labels)

In [38]:
print(balanced_binary_metrics_ol)

{'accuracy': 0.8533333333333334, 'precision': 0.8863636363636364, 'recall': 0.6964285714285714, 'f1': 0.78}


In [39]:
print(unbalanced_binary_metrics_ol)

{'accuracy': 0.82, 'precision': 0.8372093023255814, 'recall': 0.6428571428571429, 'f1': 0.7272727272727273}


In [40]:
print(unbalanced_mc_metrics)

{'accuracy': 0.8066666666666666, 'precision_raw': array([0.66666667, 0.84615385, 1.        , 1.        ]), 'recall_raw': array([0.66666667, 0.93617021, 0.35714286, 0.66666667]), 'f1_raw': array([0.66666667, 0.88888889, 0.52631579, 0.8       ]), 'precision_macro': 0.8782051282051282, 'recall_macro': 0.6566616008105369, 'f1_macro': 0.7204678362573098, 'precision_weighted': 0.8169230769230769, 'recall_weighted': 0.8066666666666666, 'f1_weighted': 0.7954931773879143}


In [45]:
# Create a new dictionary without the specified keys
keys_to_keep = ['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted']
unbalanced_mc_metrics_weighted = {key: value for key, value in unbalanced_mc_metrics.items() if key in keys_to_keep}
print(unbalanced_mc_metrics_weighted)  # Output: {'orange': 3}

{'accuracy': 0.8066666666666666, 'precision_weighted': 0.8169230769230769, 'recall_weighted': 0.8066666666666666, 'f1_weighted': 0.7954931773879143}


In [46]:
# Rename values, make note that all values for this are weighted in final report
unbalanced_mc_metrics_weighted['precision'] = unbalanced_mc_metrics_weighted.pop('precision_weighted')
unbalanced_mc_metrics_weighted['recall'] = unbalanced_mc_metrics_weighted.pop('recall_weighted')
unbalanced_mc_metrics_weighted['f1'] = unbalanced_mc_metrics_weighted.pop('f1_weighted')

In [47]:
print(unbalanced_mc_metrics_weighted)

{'accuracy': 0.8066666666666666, 'precision': 0.8169230769230769, 'recall': 0.8066666666666666, 'f1': 0.7954931773879143}


In [50]:
print(unbalanced_mc_metrics['precision_raw'][1])

0.8461538461538461


In [51]:
unbalanced_mc_metrics_positive_psoe_only = {'accuracy': unbalanced_mc_metrics['accuracy'], 'precision': unbalanced_mc_metrics['precision_raw'][1], 'recall': unbalanced_mc_metrics['recall_raw'][1], 'f1':unbalanced_mc_metrics['f1_raw'][1]}

In [52]:
print(unbalanced_mc_metrics_positive_psoe_only)

{'accuracy': 0.8066666666666666, 'precision': 0.8461538461538461, 'recall': 0.9361702127659575, 'f1': 0.8888888888888888}


In [55]:
dicts = [balanced_binary_metrics_ol, unbalanced_binary_metrics_ol, unbalanced_mc_metrics_weighted, unbalanced_mc_metrics_positive_psoe_only]

metrics_dict = {'model':['DogBERT - Balanced Binary', 'DogBERT - Unbalanced Binary', 'DogBERT - Multi-Class', 'DogBERT Multi-Class - Positive Pseudomonas Classifications Only'], 'accuracy': [],'precision':[], 'recall':[], 'f1': []}


for model in dicts:
    for metric in model.keys():
        metrics_dict[metric].append(model[metric])

metrics_df = pd.DataFrame(metrics_dict)
print(metrics_df)

                                               model  accuracy  precision  \
0                            DogBERT Balanced Binary  0.853333   0.886364   
1                          DogBERT Unbalanced Binary  0.820000   0.837209   
2                                DogBERT Multi-Class  0.806667   0.816923   
3  DogBERT Multi-Class - Positive Pseudomonas Cla...  0.806667   0.846154   

     recall        f1  
0  0.696429  0.780000  
1  0.642857  0.727273  
2  0.806667  0.795493  
3  0.936170  0.888889  


In [57]:
print(metrics_df.to_latex(index=False,
                  formatters={"name": str.upper},
                  float_format="{:.2f}".format,))

\begin{tabular}{lrrrr}
\toprule
model & accuracy & precision & recall & f1 \\
\midrule
DogBERT Balanced Binary & 0.85 & 0.89 & 0.70 & 0.78 \\
DogBERT Unbalanced Binary & 0.82 & 0.84 & 0.64 & 0.73 \\
DogBERT Multi-Class & 0.81 & 0.82 & 0.81 & 0.80 \\
DogBERT Multi-Class - Positive Pseudomonas Classifications Only & 0.81 & 0.85 & 0.94 & 0.89 \\
\bottomrule
\end{tabular}

