## Imports

In [1]:
import pandas as pd
from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from datasets import load_dataset, load_metric
import datetime
import os
import time
import numpy as np
import evaluate
import accelerate
import torch

## Import Data

In [2]:
print(os.getcwd())

/opt/jupyterlab/notebooks/DogBERT/Classifiers/Pseudomonas_Otitis/Multi_Class Classifier/Weighted Loss


In [3]:
os.chdir('../..')

In [4]:
df_psoe = pd.read_excel('adamwilliams-OtitisStudyPseudomonas (1).xls', sheet_name='Case Data', index_col=False)

In [5]:
os.chdir('../../../savsnet_resources/pickles')

In [6]:
df_narratives = pd.read_pickle('narrative_pickle.pkl.gz', compression='gzip').drop_duplicates(subset='savsnet_consult_id', keep='first')

In [7]:
os.chdir('../../DogBERT/Classifiers/Pseudomonas_Otitis/Multi_Class Classifier/Weighted Loss')

In [8]:
# Filter for just dog records
df_psoe = df_psoe[df_psoe.species == 'dog']

# Remove unclassified records
df_psoe = df_psoe.dropna(subset=['PseudomonasOtitis'])

"""
Create Dataset
"""
# Join in tick labels
df_dataset = pd.merge(df_narratives, df_psoe, on='savsnet_consult_id', how='inner')

In [9]:
print(df_dataset.head())

                                           item_text  consult_record       pk  \
0  "<<identifier>> otitis externa been really goo...          231003  2040703   
1  "HL issues and ears. been slowing up last few ...          231674  2041630   
2  "bilateral oe thickened ear canals cleaning is...          232326  2042546   
3  "left eear purulent otitis suspecrt pseudomona...          233568  2044364   
4  "Bilat OE again. Not purulent or ulcerated, so...          237069  2049428   

        consult_record_date  savsnet_consult_id  Unnamed: 0  \
0 2014-06-30 17:09:38+00:00               95721           0   
1 2014-07-26 11:24:00+00:00              129032           1   
2 2014-08-29 16:22:44+00:00              169684           2   
3 2014-07-16 19:29:10+00:00              117477           3   
4 2014-07-15 17:17:57+00:00              115552           4   

             searchterm__name  narrative_item_id PseudomonasOtitis  \
0  Pseudomonas and ear/otitis            2040703                

In [10]:
# Extract unique labels
unique_labels = df_dataset['PseudomonasOtitis'].unique()
print(unique_labels)
# Create a dictionary to map labels to numbers
label2id = {label: i for i, label in enumerate(unique_labels)}
print(label2id)

['?' '✓' '!' '⍉']
{'?': 0, '✓': 1, '!': 2, '⍉': 3}


In [11]:
id2label = {i:label for i, label in enumerate(unique_labels)}
print(id2label)

{0: '?', 1: '✓', 2: '!', 3: '⍉'}


In [12]:
# Label dataset   
df_dataset['PseudomonasOtitis_ID'] = df_dataset['PseudomonasOtitis'].map(label2id)

In [13]:
print(df_dataset.head())

                                           item_text  consult_record       pk  \
0  "<<identifier>> otitis externa been really goo...          231003  2040703   
1  "HL issues and ears. been slowing up last few ...          231674  2041630   
2  "bilateral oe thickened ear canals cleaning is...          232326  2042546   
3  "left eear purulent otitis suspecrt pseudomona...          233568  2044364   
4  "Bilat OE again. Not purulent or ulcerated, so...          237069  2049428   

        consult_record_date  savsnet_consult_id  Unnamed: 0  \
0 2014-06-30 17:09:38+00:00               95721           0   
1 2014-07-26 11:24:00+00:00              129032           1   
2 2014-08-29 16:22:44+00:00              169684           2   
3 2014-07-16 19:29:10+00:00              117477           3   
4 2014-07-15 17:17:57+00:00              115552           4   

             searchterm__name  narrative_item_id PseudomonasOtitis  \
0  Pseudomonas and ear/otitis            2040703                

In [14]:
df_dataset = df_dataset[["PseudomonasOtitis_ID", 'item_text']].reset_index()
# Rename columns
df_dataset = df_dataset.rename(columns={'PseudomonasOtitis_ID':'label', 'item_text':'text'})
df_dataset = df_dataset.drop("index", axis=1)
print(df_dataset.head())

   label                                               text
0      0  "<<identifier>> otitis externa been really goo...
1      1  "HL issues and ears. been slowing up last few ...
2      1  "bilateral oe thickened ear canals cleaning is...
3      1  "left eear purulent otitis suspecrt pseudomona...
4      2  "Bilat OE again. Not purulent or ulcerated, so...


In [15]:
print(df_dataset['label'].value_counts())

label
1    638
0    143
2     74
3     39
Name: count, dtype: int64


## Split Data

In [16]:
# Generate train/val/test split 
"""
Split Each Category into 80/10/10 train/val/test sets
"""
# for each label in the dataframe, randomly select the number of records in the group_size. Note each index selected and remove from test set
import random

# def proportional_split_train_val_test(df):
# get labels
labels = df_dataset['label'].unique()
    
# Set up train_set, val_set and test_set
train_set = []
val_set = []
test_set = []
    
#iterate over labels
for label in labels:
    # Get indexes of labels in a given group
    indexes = df_dataset[df_dataset['label'] == label].index.to_list()
    
    # Get size of group
    train_size = round(len(indexes) * 0.8)
    val_test_size = round(len(indexes) * 0.1)
    
    # Randomly sample train_size indexes to make train_set. Remove these indexes 
    train_indexes = random.sample(indexes, train_size)
    train_set += train_indexes
    
    # Remove train_indexes from overall indexes
    indexes = list(set(indexes) - set(train_indexes))
    
    # Randomly sample val_test_size indexes to make val_set. Remove these indexes
    val_indexes = random.sample(indexes, val_test_size)
    val_set += val_indexes
    
    # Remove train_indexes from overall indexes
    indexes = list(set(indexes) - set(val_indexes))
    test_set += indexes

print(len(train_set), len(val_set), len(test_set))
    # return train_set, val_set, test_set

714 89 91


In [17]:
# Create train, val and test dataframes
df_train = df_dataset.iloc[train_set]
df_val = df_dataset.iloc[val_set]
df_test = df_dataset.iloc[test_set]

## Calculate Class Weights

In [18]:
# iterate over each column and calculate the weight based on max value in column
def calculate_class_weights(train_dataset, label_cols):
    labels = list(label_cols.values())
    total_samples = len(train_dataset)
    label_lst = []
    class_weights = []
    train_dataset_vc = train_dataset['label'].value_counts()
    for label in labels:
        class_frequency = train_dataset_vc[label]
        weight = total_samples / (len(labels) * class_frequency)
        if weight < 1:
            weight = 1
        class_weights.append(weight)
        label_lst.append(label)            
    weights = torch.tensor(class_weights, dtype=torch.float32)
    return weights


weights = calculate_class_weights(df_train, label2id)
print(id2label)
print(weights)

{0: '?', 1: '✓', 2: '!', 3: '⍉'}
tensor([1.5658, 1.0000, 3.0254, 5.7581])


In [20]:
df_train.to_csv('train.csv')
df_val.to_csv('val.csv')
df_test.to_csv('test.csv')

In [21]:
datasets = load_dataset("csv", data_files={'train': "train.csv",
                                           'eval': "val.csv",
                                           'test':"test.csv"})

Generating train split: 0 examples [00:00, ? examples/s]

Generating eval split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

## Tokenize Text

In [22]:
# Load BERT Tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [23]:
def tokenize_and_encode(examples):
    return tokenizer(
        examples["text"], truncation=True, padding="max_length", max_length=512
    )

In [24]:
tokenized_datasets = datasets.map(
    tokenize_and_encode, batched=True)

Map:   0%|          | 0/714 [00:00<?, ? examples/s]

Map:   0%|          | 0/89 [00:00<?, ? examples/s]

Map:   0%|          | 0/91 [00:00<?, ? examples/s]

## Create Unweighted BERT Classifier

In [35]:
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [36]:
# Setup weights and biases stuff
os.environ["WANDB_PROJECT"]="Pseudomonas_Otitis_Classifier_MC_Post_Regex"
os.environ["WANDB_LOG_MODEL"] = "checkpoint"  # log all model checkpoints

In [37]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)

In [38]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
early_stopping_callback = EarlyStoppingCallback(early_stopping_patience=3)

In [39]:
def compute_metrics(eval_preds):
    accuracy = evaluate.load("accuracy")
    f1 = evaluate.load("f1")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    metrics = {"accuracy": accuracy.compute(predictions=predictions, references=labels), "f1_weighted":f1.compute(predictions=predictions, references=labels, average='weighted')}
    return metrics

In [40]:
training_args = TrainingArguments(
    output_dir= "BERT_PSOE_Multi_Class_Classifier_Unweighted",
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=30,
    weight_decay=0.01,
    save_strategy = "epoch",
    eval_strategy = "epoch",
    load_best_model_at_end=True,
    report_to="wandb",
    fp16 = True,
    run_name="BERT_Unweighted",
    save_total_limit=3
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [41]:
trainer = Trainer(
     model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['eval'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[early_stopping_callback],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Weighted
1,No log,0.876445,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
2,No log,0.876661,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
3,No log,0.899711,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
4,No log,0.919575,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}


[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-30)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-60)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-90)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-120)... Done. 1.7s


In [42]:
from numba import cuda
import gc
torch.cuda.empty_cache()
gc.collect()

0

## BERT Weighted Classifier

In [43]:
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [44]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss(
            weight=weights
        ).to("cuda")
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

In [45]:
training_args = TrainingArguments(
    output_dir= "BERT_PSOE_Multi_Class_Classifier_Weighted",
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=30,
    weight_decay=0.01,
    save_strategy = "epoch",
    eval_strategy = "epoch",
    load_best_model_at_end=True,
    report_to="wandb",
    fp16 = True,
    run_name="BERT_Weighted",
    save_total_limit=3
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [46]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['eval'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[early_stopping_callback],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model()

  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Weighted
1,No log,1.079669,{'accuracy': 0.7078651685393258},{'f1': 0.5960969840331165}
2,No log,1.08708,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
3,No log,1.053728,{'accuracy': 0.6629213483146067},{'f1': 0.5861105186947884}
4,No log,1.076201,{'accuracy': 0.7191011235955056},{'f1': 0.6150259195872709}
5,No log,1.04891,{'accuracy': 0.7078651685393258},{'f1': 0.6072980203317282}
6,No log,1.075109,{'accuracy': 0.6853932584269663},{'f1': 0.5930424806829301}
7,No log,1.063764,{'accuracy': 0.7078651685393258},{'f1': 0.6072980203317282}
8,No log,1.086668,{'accuracy': 0.7078651685393258},{'f1': 0.6388394155055691}


[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-30)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-60)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-90)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-120)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-150)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-180)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-210)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./BERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-240)... Done. 1.7s


In [47]:
from numba import cuda
import gc
torch.cuda.empty_cache()
gc.collect()

572

## DogBERT Unweighted Classifier

In [53]:
# Load Model
model_dir = "/opt/jupyterlab/notebooks/DogBERT/Domain Adaptation/DogBERT v0.0.2"
model = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=4, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /opt/jupyterlab/notebooks/DogBERT/Domain Adaptation/DogBERT v0.0.2 and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [54]:
training_args = TrainingArguments(
    output_dir= "DogBERT_PSOE_Multi_Class_Classifier_Unweighted",
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=20,
    weight_decay=0.01,
    save_strategy = "epoch",
    eval_strategy = "epoch",
    load_best_model_at_end=True,
    report_to="wandb",
    fp16 = True,
    run_name="DogBERT_Unweighted",
    save_total_limit=3
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [55]:
trainer = Trainer(
     model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['eval'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[early_stopping_callback],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Weighted
1,No log,0.861975,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
2,No log,0.704413,{'accuracy': 0.6966292134831461},{'f1': 0.6125353512191394}
3,No log,0.57666,{'accuracy': 0.7865168539325843},{'f1': 0.7574522231825604}
4,No log,0.57827,{'accuracy': 0.7865168539325843},{'f1': 0.7841544223566695}
5,No log,0.677979,{'accuracy': 0.797752808988764},{'f1': 0.7855208461950036}
6,No log,0.595179,{'accuracy': 0.8426966292134831},{'f1': 0.844087747458534}


[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-30)... Done. 2.0s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-60)... Done. 1.9s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-90)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-120)... Done. 1.8s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-150)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Unweighted/checkpoint-180)... Done. 1.8s


In [56]:
from numba import cuda
import gc
torch.cuda.empty_cache()
gc.collect()

570

## DogBERT Weighted

In [57]:
# Load Model
model_dir = "/opt/jupyterlab/notebooks/DogBERT/Domain Adaptation/DogBERT v0.0.2"
model = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=4, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /opt/jupyterlab/notebooks/DogBERT/Domain Adaptation/DogBERT v0.0.2 and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [58]:
training_args = TrainingArguments(
    output_dir= "DogBERT_PSOE_Multi_Class_Classifier_Weighted",
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=30,
    weight_decay=0.01,
    save_strategy = "epoch",
    eval_strategy = "epoch",
    load_best_model_at_end=True,
    report_to="wandb",
    fp16 = True,
    run_name="DogBERT_Weighted",
    save_total_limit=3
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [59]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['eval'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[early_stopping_callback],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model()

  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Weighted
1,No log,1.012914,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
2,No log,0.761601,{'accuracy': 0.7078651685393258},{'f1': 0.6527719471539696}
3,No log,0.637763,{'accuracy': 0.7865168539325843},{'f1': 0.7907312254368946}
4,No log,0.521367,{'accuracy': 0.8314606741573034},{'f1': 0.8371390600459104}
5,No log,0.587775,{'accuracy': 0.8426966292134831},{'f1': 0.8435283748069018}
6,No log,0.62199,{'accuracy': 0.8426966292134831},{'f1': 0.8338588043141562}
7,No log,0.779723,{'accuracy': 0.8314606741573034},{'f1': 0.8194980155402761}


[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-30)... Done. 1.8s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-60)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-90)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-120)... Done. 1.9s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-150)... Done. 2.1s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-180)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./DogBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-210)... Done. 1.7s


In [60]:
from numba import cuda
import gc
torch.cuda.empty_cache()
gc.collect()

570

## PetBERT Unweighted

In [61]:
# Load PetBERT Tokenizer
tokenizer = AutoTokenizer.from_pretrained('SAVSNET/PetBERT')

In [62]:
model = AutoModelForSequenceClassification.from_pretrained('SAVSNET/PetBERT', num_labels=4, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SAVSNET/PetBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [63]:
def tokenize_and_encode(examples):
    return tokenizer(
        examples["text"], truncation=True, padding="max_length", max_length=512
    )

In [64]:
tokenized_datasets = datasets.map(
    tokenize_and_encode, batched=True)

Map:   0%|          | 0/714 [00:00<?, ? examples/s]

Map:   0%|          | 0/89 [00:00<?, ? examples/s]

Map:   0%|          | 0/91 [00:00<?, ? examples/s]

In [65]:
training_args = TrainingArguments(
    output_dir= "PetBERT_PSOE_Multi_Class_Classifier_UnWeighted",
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=50,
    weight_decay=0.01,
    save_strategy = "epoch",
    eval_strategy = "epoch",
    load_best_model_at_end=True,
    report_to="wandb",
    fp16 = True,
    run_name="PetBERT_UnWeighted",
    save_total_limit=3
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [66]:
trainer = Trainer(
     model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['eval'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[early_stopping_callback],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Weighted
1,No log,0.854833,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
2,No log,0.702463,{'accuracy': 0.7078651685393258},{'f1': 0.6308552665332717}
3,No log,0.665367,{'accuracy': 0.7640449438202247},{'f1': 0.7303769628734872}
4,No log,0.855561,{'accuracy': 0.7303370786516854},{'f1': 0.7111063094209161}
5,No log,0.867961,{'accuracy': 0.6966292134831461},{'f1': 0.7044935515749438}
6,No log,1.006251,{'accuracy': 0.7303370786516854},{'f1': 0.7298703489889937}


[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_UnWeighted/checkpoint-30)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_UnWeighted/checkpoint-60)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_UnWeighted/checkpoint-90)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_UnWeighted/checkpoint-120)... Done. 1.8s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_UnWeighted/checkpoint-150)... Done. 1.8s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_UnWeighted/checkpoint-180)... Done. 1.7s


In [67]:
torch.cuda.empty_cache()
gc.collect()

570

## PetBERT Weighted

In [73]:
model = AutoModelForSequenceClassification.from_pretrained('SAVSNET/PetBERT', num_labels=4, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SAVSNET/PetBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [74]:
training_args = TrainingArguments(
    output_dir= "PetBERT_PSOE_Multi_Class_Classifier_Weighted",
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=50,
    weight_decay=0.01,
    save_strategy = "epoch",
    eval_strategy = "epoch",
    load_best_model_at_end=True,
    report_to="wandb",
    fp16 = True,
    run_name="PetBERT_Weighted",
    save_total_limit=3
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [75]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['eval'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[early_stopping_callback],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model()

  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Weighted
1,No log,1.000918,{'accuracy': 0.7191011235955056},{'f1': 0.6016009400014688}
2,No log,0.873487,{'accuracy': 0.6966292134831461},{'f1': 0.630837327466541}
3,No log,0.720505,{'accuracy': 0.6853932584269663},{'f1': 0.6917814800464935}
4,No log,0.782366,{'accuracy': 0.6853932584269663},{'f1': 0.6836908409942118}
5,No log,0.897063,{'accuracy': 0.7303370786516854},{'f1': 0.7102290209917681}
6,No log,0.984602,{'accuracy': 0.7078651685393258},{'f1': 0.7085753198112749}


[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-30)... Done. 1.9s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-60)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-90)... Done. 1.8s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-120)... Done. 1.9s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-150)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (./PetBERT_PSOE_Multi_Class_Classifier_Weighted/checkpoint-180)... Done. 1.7s


In [76]:
torch.cuda.empty_cache()
gc.collect()

570

## Inference

In [77]:
def predict_sentiment(text):
  inputs = tokenizer(text, padding="max_length", truncation=True, return_tensors="pt")
  outputs = model(**inputs)
  predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
  predicted_class = torch.argmax(predictions).item()
  confidence_score = predictions.squeeze()[predicted_class].item()
  return predicted_class, confidence_score

In [78]:
tokenizer = AutoTokenizer.from_pretrained("BERT_PSOE_Multi_Class_Classifier_Unweighted")
model = AutoModelForSequenceClassification.from_pretrained("BERT_PSOE_Multi_Class_Classifier_Unweighted")

In [79]:
# Assuming your dataframe is called 'df' and the text column is 'text'
df_test["BERT_unweighted_predicted_pseudomonas_otitis"], df_test["BERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["BERT_unweighted_predicted_pseudomonas_otitis"], df_test["BERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["BERT_unweighted_predicted_pseudomonas_otitis"], df_test["BERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))


In [81]:
tokenizer = AutoTokenizer.from_pretrained("BERT_PSOE_Multi_Class_Classifier_Weighted")
model = AutoModelForSequenceClassification.from_pretrained("BERT_PSOE_Multi_Class_Classifier_Weighted")

In [82]:
# Assuming your dataframe is called 'df' and the text column is 'text'
df_test["BERT_weighted_predicted_pseudomonas_otitis"], df_test["BERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["BERT_weighted_predicted_pseudomonas_otitis"], df_test["BERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["BERT_weighted_predicted_pseudomonas_otitis"], df_test["BERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))


In [83]:
tokenizer = AutoTokenizer.from_pretrained("DogBERT_PSOE_Multi_Class_Classifier_Unweighted")
model = AutoModelForSequenceClassification.from_pretrained("DogBERT_PSOE_Multi_Class_Classifier_Unweighted")

In [84]:
# Assuming your dataframe is called 'df' and the text column is 'text'
df_test["DogBERT_unweighted_predicted_pseudomonas_otitis"], df_test["DogBERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["DogBERT_unweighted_predicted_pseudomonas_otitis"], df_test["DogBERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["DogBERT_unweighted_predicted_pseudomonas_otitis"], df_test["DogBERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))


In [85]:
tokenizer = AutoTokenizer.from_pretrained("DogBERT_PSOE_Multi_Class_Classifier_Weighted")
model = AutoModelForSequenceClassification.from_pretrained("DogBERT_PSOE_Multi_Class_Classifier_Weighted")

In [86]:
# Assuming your dataframe is called 'df' and the text column is 'text'
df_test["DogBERT_weighted_predicted_pseudomonas_otitis"], df_test["DogBERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["DogBERT_weighted_predicted_pseudomonas_otitis"], df_test["DogBERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["DogBERT_weighted_predicted_pseudomonas_otitis"], df_test["DogBERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))


In [87]:
tokenizer = AutoTokenizer.from_pretrained("PetBERT_PSOE_Multi_Class_Classifier_UnWeighted")
model = AutoModelForSequenceClassification.from_pretrained("PetBERT_PSOE_Multi_Class_Classifier_UnWeighted")

In [88]:
# Assuming your dataframe is called 'df' and the text column is 'text'
df_test["PetBERT_unweighted_predicted_pseudomonas_otitis"], df_test["PetBERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["PetBERT_unweighted_predicted_pseudomonas_otitis"], df_test["PetBERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["PetBERT_unweighted_predicted_pseudomonas_otitis"], df_test["PetBERT_unweighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))


In [89]:
tokenizer = AutoTokenizer.from_pretrained("PetBERT_PSOE_Multi_Class_Classifier_Weighted")
model = AutoModelForSequenceClassification.from_pretrained("PetBERT_PSOE_Multi_Class_Classifier_Weighted")

In [90]:
# Assuming your dataframe is called 'df' and the text column is 'text'
df_test["PetBERT_weighted_predicted_pseudomonas_otitis"], df_test["PetBERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["PetBERT_weighted_predicted_pseudomonas_otitis"], df_test["PetBERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["PetBERT_weighted_predicted_pseudomonas_otitis"], df_test["PetBERT_weighted_confidence_score"] = zip(*df_test["text"].apply(predict_sentiment))


In [91]:
print(df_test.head())

     label                                               text  \
0        0  "<<identifier>> otitis externa been really goo...   
357      0  ".  Next appointment in 3 weeks. Continued imp...   
631      0  "ES: recheck leg and check ears. OR doing well...   
520      0  "Aged dog, few conditions of concern: 1) bilat...   
138      0  "O feels ear no better. Purulent discharge wit...   

     BERT_unweighted_predicted_pseudomonas_otitis  \
0                                               1   
357                                             1   
631                                             1   
520                                             1   
138                                             1   

     BERT_unweighted_confidence_score  \
0                            0.695481   
357                          0.706644   
631                          0.705835   
520                          0.690102   
138                          0.714850   

     BERT_weighted_predicted_pseudomonas_ot

In [92]:
def CreateMetricDict(preds, labels):
    """
    Function to create a dictionary of ML metrics from the output of a multilabel model

    Args: list of predictions, list of ground truth labels
    """
    metric_dict = {"accuracy": accuracy.compute(predictions=predictions, references=labels)["accuracy"], 
                   "precision_raw":precision.compute(predictions=predictions, references=labels, average=None)["precision"], 
                   "recall_raw":recall.compute(predictions=predictions, references=labels, average=None)["recall"], 
                   "f1_raw":f1.compute(predictions=predictions, references=labels, average=None)["f1"], 
                   "precision_macro":precision.compute(predictions=predictions, references=labels, average='macro')["precision"], 
                   "recall_macro":recall.compute(predictions=predictions, references=labels, average='macro')["recall"], 
                   "f1_macro":f1.compute(predictions=predictions, references=labels, average='macro')["f1"], 
                   "precision_weighted":precision.compute(predictions=predictions, references=labels, average='weighted')["precision"],
                   "recall_weighted":recall.compute(predictions=predictions, references=labels, average='weighted')["recall"],
                   "f1_weighted":f1.compute(predictions=predictions, references=labels, average='weighted')["f1"]
                  }


    return metric_dict

In [94]:
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

predictions = list(df_test['BERT_unweighted_predicted_pseudomonas_otitis'])
labels = list(df_test['label'])

bert_uw_metrics = CreateMetricDict(predictions, labels)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [95]:
print(bert_uw_metrics)

{'accuracy': 0.7032967032967034, 'precision_raw': array([0.       , 0.7032967, 0.       , 0.       ]), 'recall_raw': array([0., 1., 0., 0.]), 'f1_raw': array([0.        , 0.82580645, 0.        , 0.        ]), 'precision_macro': 0.17582417582417584, 'recall_macro': 0.25, 'f1_macro': 0.2064516129032258, 'precision_weighted': 0.49462625286801115, 'recall_weighted': 0.7032967032967034, 'f1_weighted': 0.5807869549805034}


In [96]:
predictions = list(df_test['BERT_weighted_predicted_pseudomonas_otitis'])
labels = list(df_test['label'])

bert_w_metrics = CreateMetricDict(predictions, labels)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [97]:
print(bert_w_metrics)

{'accuracy': 0.7032967032967034, 'precision_raw': array([0.        , 0.71111111, 0.        , 0.        ]), 'recall_raw': array([0., 1., 0., 0.]), 'f1_raw': array([0.        , 0.83116883, 0.        , 0.        ]), 'precision_macro': 0.17777777777777778, 'recall_macro': 0.25, 'f1_macro': 0.2077922077922078, 'precision_weighted': 0.5001221001221001, 'recall_weighted': 0.7032967032967034, 'f1_weighted': 0.5845582988440131}


In [98]:
predictions = list(df_test['DogBERT_unweighted_predicted_pseudomonas_otitis'])
labels = list(df_test['label'])

dogbert_uw_metrics = CreateMetricDict(predictions, labels)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [99]:
print(dogbert_uw_metrics)

{'accuracy': 0.7692307692307693, 'precision_raw': array([0.4       , 0.86956522, 0.        , 1.        ]), 'recall_raw': array([0.53333333, 0.9375    , 0.        , 0.5       ]), 'f1_raw': array([0.45714286, 0.90225564, 0.        , 0.66666667]), 'precision_macro': 0.567391304347826, 'recall_macro': 0.4927083333333333, 'f1_macro': 0.506516290726817, 'precision_weighted': 0.7214524605828952, 'recall_weighted': 0.7692307692307693, 'f1_weighted': 0.7392106640226942}


In [100]:
predictions = list(df_test['DogBERT_weighted_predicted_pseudomonas_otitis'])
labels = list(df_test['label'])

dogbert_w_metrics = CreateMetricDict(predictions, labels)

In [101]:
print(dogbert_w_metrics)

{'accuracy': 0.8131868131868132, 'precision_raw': array([0.58823529, 0.890625  , 0.85714286, 0.33333333]), 'recall_raw': array([0.66666667, 0.890625  , 0.75      , 0.25      ]), 'f1_raw': array([0.625     , 0.890625  , 0.8       , 0.28571429]), 'precision_macro': 0.6673341211484595, 'recall_macro': 0.6393229166666666, 'f1_macro': 0.6503348214285714, 'precision_weighted': 0.8133407209037462, 'recall_weighted': 0.8131868131868132, 'f1_weighted': 0.8122841444270016}


In [103]:
predictions = list(df_test['PetBERT_unweighted_predicted_pseudomonas_otitis'])
labels = list(df_test['label'])

petbert_uw_metrics = CreateMetricDict(predictions, labels)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [104]:
print(petbert_uw_metrics)

{'accuracy': 0.7252747252747253, 'precision_raw': array([0.27272727, 0.7875    , 0.        , 0.        ]), 'recall_raw': array([0.2     , 0.984375, 0.      , 0.      ]), 'f1_raw': array([0.23076923, 0.875     , 0.        , 0.        ]), 'precision_macro': 0.26505681818181814, 'recall_macro': 0.29609375, 'f1_macro': 0.2764423076923077, 'precision_weighted': 0.5988011988011989, 'recall_weighted': 0.7252747252747253, 'f1_weighted': 0.6534234995773457}


In [105]:
predictions = list(df_test['PetBERT_weighted_predicted_pseudomonas_otitis'])
labels = list(df_test['label'])

petbert_w_metrics = CreateMetricDict(predictions, labels)

In [106]:
print(petbert_w_metrics)

{'accuracy': 0.7912087912087912, 'precision_raw': array([0.5       , 0.86956522, 0.8       , 0.4       ]), 'recall_raw': array([0.4   , 0.9375, 0.5   , 0.5   ]), 'f1_raw': array([0.44444444, 0.90225564, 0.61538462, 0.44444444]), 'precision_macro': 0.6423913043478261, 'recall_macro': 0.584375, 'f1_macro': 0.6016322858428121, 'precision_weighted': 0.7818920210224557, 'recall_weighted': 0.7912087912087912, 'f1_weighted': 0.7814492557118351}


In [107]:
dicts = [bert_uw_metrics, bert_w_metrics, dogbert_uw_metrics, dogbert_w_metrics, petbert_uw_metrics, petbert_w_metrics]

metrics_dict = {'model':['BERT (unweighted loss)', 'BERT (weighted loss)','DogBERT (unweighted loss)', 'DogBERT (weighted loss)','PetBERT (unweighted loss)', 'PetBERT (weighted loss)'], 'accuracy': [],
                'precision_raw':[], 'recall_raw':[], 'f1_raw': [], 'precision_macro':[], 'recall_macro':[], 'f1_macro': [], 'precision_weighted':[], 'recall_weighted':[], 'f1_weighted': []}


for model in dicts:
    for metric in model.keys():
        metrics_dict[metric].append(model[metric])

In [108]:
print(metrics_dict)

{'model': ['BERT (unweighted loss)', 'BERT (weighted loss)', 'DogBERT (unweighted loss)', 'DogBERT (weighted loss)', 'PetBERT (unweighted loss)', 'PetBERT (weighted loss)'], 'accuracy': [0.7032967032967034, 0.7032967032967034, 0.7692307692307693, 0.8131868131868132, 0.7252747252747253, 0.7912087912087912], 'precision_raw': [array([0.       , 0.7032967, 0.       , 0.       ]), array([0.        , 0.71111111, 0.        , 0.        ]), array([0.4       , 0.86956522, 0.        , 1.        ]), array([0.58823529, 0.890625  , 0.85714286, 0.33333333]), array([0.27272727, 0.7875    , 0.        , 0.        ]), array([0.5       , 0.86956522, 0.8       , 0.4       ])], 'recall_raw': [array([0., 1., 0., 0.]), array([0., 1., 0., 0.]), array([0.53333333, 0.9375    , 0.        , 0.5       ]), array([0.66666667, 0.890625  , 0.75      , 0.25      ]), array([0.2     , 0.984375, 0.      , 0.      ]), array([0.4   , 0.9375, 0.5   , 0.5   ])], 'f1_raw': [array([0.        , 0.82580645, 0.        , 0.        ]

In [109]:
metrics_df = pd.DataFrame(metrics_dict)
metrics_df.to_csv('Pseudomonas_Otitis_Unbalanced_Evaluation_Metrics_(New).csv', index=False)

In [110]:
print(len(df_test))

91


## Create Latex Table

In [111]:
metrics_df = pd.read_csv('Pseudomonas_Otitis_Unbalanced_Evaluation_Metrics_(New).csv', index_col=False)

In [112]:
print(metrics_df)

                       model  accuracy  \
0     BERT (unweighted loss)  0.703297   
1       BERT (weighted loss)  0.703297   
2  DogBERT (unweighted loss)  0.769231   
3    DogBERT (weighted loss)  0.813187   
4  PetBERT (unweighted loss)  0.725275   
5    PetBERT (weighted loss)  0.791209   

                                   precision_raw  \
0      [0.        0.7032967 0.        0.       ]   
1  [0.         0.71111111 0.         0.        ]   
2  [0.4        0.86956522 0.         1.        ]   
3  [0.58823529 0.890625   0.85714286 0.33333333]   
4  [0.27272727 0.7875     0.         0.        ]   
5  [0.5        0.86956522 0.8        0.4       ]   

                                      recall_raw  \
0                                  [0. 1. 0. 0.]   
1                                  [0. 1. 0. 0.]   
2  [0.53333333 0.9375     0.         0.5       ]   
3  [0.66666667 0.890625   0.75       0.25      ]   
4          [0.2      0.984375 0.       0.      ]   
5                  [0.4    0

In [114]:
metrics_df_weighted = metrics_df[['model', 'accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted']]

In [115]:
print(metrics_df_weighted)

                       model  accuracy  precision_weighted  recall_weighted  \
0     BERT (unweighted loss)  0.703297            0.494626         0.703297   
1       BERT (weighted loss)  0.703297            0.500122         0.703297   
2  DogBERT (unweighted loss)  0.769231            0.721452         0.769231   
3    DogBERT (weighted loss)  0.813187            0.813341         0.813187   
4  PetBERT (unweighted loss)  0.725275            0.598801         0.725275   
5    PetBERT (weighted loss)  0.791209            0.781892         0.791209   

   f1_weighted  
0     0.580787  
1     0.584558  
2     0.739211  
3     0.812284  
4     0.653423  
5     0.781449  


In [117]:
print(metrics_df_weighted.to_latex(index=False,
                  formatters={"name": str.upper},
                  float_format="{:.2f}".format,))

\begin{tabular}{lrrrr}
\toprule
model & accuracy & precision_weighted & recall_weighted & f1_weighted \\
\midrule
BERT (unweighted loss) & 0.70 & 0.49 & 0.70 & 0.58 \\
BERT (weighted loss) & 0.70 & 0.50 & 0.70 & 0.58 \\
DogBERT (unweighted loss) & 0.77 & 0.72 & 0.77 & 0.74 \\
DogBERT (weighted loss) & 0.81 & 0.81 & 0.81 & 0.81 \\
PetBERT (unweighted loss) & 0.73 & 0.60 & 0.73 & 0.65 \\
PetBERT (weighted loss) & 0.79 & 0.78 & 0.79 & 0.78 \\
\bottomrule
\end{tabular}

