## Prepare Dataset

In [1]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device count: ", torch.cuda.device_count())
for i in range(torch.cuda.device_count()):
    print("Device", i, "name: ", torch.cuda.get_device_name(i))
print("Device: ", device)

if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Total Memory:    ', round(torch.cuda.get_device_properties(0).total_memory/1024**3,1), 'GB')
    print('Allocated Memory:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached Memory:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Device count:  1
Device 0 name:  NVIDIA GeForce RTX 4070 SUPER
Device:  cuda
NVIDIA GeForce RTX 4070 SUPER
Total Memory:     12.0 GB
Allocated Memory: 0.0 GB
Cached Memory:    0.0 GB


In [2]:
import pandas as pd
import json


# Dental dataset
#from datasets import load_dataset
#ds = load_dataset("Lines/Open-Domain-Oral-Disease-QA-Dataset")

path = "e:/git/cs224u/70-fixed-batch-inference.xlsx"

# Load dataset
df = pd.read_excel(path, sheet_name="review")
print('Original dataset length', len(df))

# Filter dataset
df = df.drop(df[df['reference_output_69'].str.find('PARSE_FAILED') != -1].index)
print('Filtered dataset length', len(df))

df['output'] = df.apply(lambda x: json.loads(x['reference_output_69']), axis=1)
df['input'] = df['raw_input'].copy()
print("Max input length", df['input'].str.len().max())

c = {}
for index, row in df.iterrows():
    x = row['output']
    for key, value in x.items():
        c[key] = c.get(key, 0) + int(len(value) > 0)

categories = sorted(list(c.keys()))

# Validation of the dataset
for index, row in df.iterrows():
    a=[0] * len(categories)
    try:
        x = json.loads(row['reference_output_69'])
        if type(x) is dict:
            # Check dental features for ex.
            if 'dental_features' in x and len(x['dental_features'])>0:
                a.append(1)
            else:
                a.append(0)
        else:
            print(x['answer'])
    except ValueError:  # includes simplejson.decoder.JSONDecodeError
        print('Decoding JSON has failed')

# print(df['output'].loc[df.index[2]])
for i in categories:
    print(c[i], '\t', i)
df['output_vector'] = df.apply(lambda x: [int(len(x['output'].get(i, [0])) > 0) for i in categories], axis=1)
df['output_vector']

Original dataset length 5492
Filtered dataset length 5451
Max input length 2078
2141 	 alignment
395 	 as_previous
1197 	 bite
181 	 crowding
1375 	 dental_features
472 	 finishing
485 	 leveling
272 	 midline
95 	 non_clinical_reason_for_new_order
1174 	 occlusion
1833 	 other_instructions
484 	 overcorrection_aligners
689 	 passive_aligners
1630 	 polite_expressions
55 	 request_for_clin_check
199 	 skip_active_treatment
1272 	 spaces
2648 	 teeth_movements
124 	 tracking
394 	 treatment_length


0       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, ...
1       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2       [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, ...
3       [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
4       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
                              ...                        
5487    [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
5488    [1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, ...
5489    [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, ...
5490    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
5491    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
Name: output_vector, Length: 5451, dtype: object

## Wrap the dataset into the class

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AdamW
from sklearn.model_selection import train_test_split


# Prepare the dataset
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        # Tokenize the text
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            #return_attention_mask=True,
            return_attention_mask=False,
            return_tensors='pt'
        )

        # Take into account that the 
        return {
            'input_ids': encoding['input_ids'].flatten().to(device),
            #'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.float, device=device)  # Float for multi-label
        }

# Prepare data for training
#X_train, X_val, y_train, y_val = train_test_split(df['input'], df['output_vector'], test_size=0.2)

In [4]:
# Validate one of the examples
X_train, X_val, y_train, y_val = train_test_split(df['input'].tolist(), df['output_vector'].tolist(), test_size=0.2, shuffle=False)
k = 0
print(X_val[0], y_val[0])
for i in range(len(categories)):
    print(categories[i], y_val[0][i])

[NumberingSystem]unknown

[FormInstructionsUpperArch:]please follow my clinical preferences. Thank you!

[FormInstructionsLowerArch:]please follow my clinical preferences. Thank you!

 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
alignment 0
as_previous 0
bite 0
crowding 0
dental_features 0
finishing 0
leveling 0
midline 0
non_clinical_reason_for_new_order 0
occlusion 0
other_instructions 1
overcorrection_aligners 0
passive_aligners 0
polite_expressions 1
request_for_clin_check 0
skip_active_treatment 0
spaces 0
teeth_movements 0
tracking 0
treatment_length 0


In [5]:
from tqdm import trange, tqdm

# Load the tokenizer and model
model_name = 'bert-base-uncased'  # Change as needed

device_name = device.type + ":" + str(torch.cuda.current_device())

tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Max sentence", tokenizer.model_max_length)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(categories), device_map = device_name)

# Freeze all existing parameters
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the classification head
for param in model.classifier.parameters():
    param.requires_grad = True

# Last layer unblock
for param in model.bert.encoder.layer[-1].parameters():
    param.requires_grad = True

# Print out which layers are trainable for verification
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"Trainable layer: {name}")

print(model)



Max sentence 512


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable layer: bert.encoder.layer.11.attention.self.query.weight
Trainable layer: bert.encoder.layer.11.attention.self.query.bias
Trainable layer: bert.encoder.layer.11.attention.self.key.weight
Trainable layer: bert.encoder.layer.11.attention.self.key.bias
Trainable layer: bert.encoder.layer.11.attention.self.value.weight
Trainable layer: bert.encoder.layer.11.attention.self.value.bias
Trainable layer: bert.encoder.layer.11.attention.output.dense.weight
Trainable layer: bert.encoder.layer.11.attention.output.dense.bias
Trainable layer: bert.encoder.layer.11.attention.output.LayerNorm.weight
Trainable layer: bert.encoder.layer.11.attention.output.LayerNorm.bias
Trainable layer: bert.encoder.layer.11.intermediate.dense.weight
Trainable layer: bert.encoder.layer.11.intermediate.dense.bias
Trainable layer: bert.encoder.layer.11.output.dense.weight
Trainable layer: bert.encoder.layer.11.output.dense.bias
Trainable layer: bert.encoder.layer.11.output.LayerNorm.weight
Trainable layer: bert

In [6]:
from sklearn.metrics import accuracy_score, f1_score
import torch.nn as nn
import numpy as np


name = 'berta_multiclass_model_2_last_retrained'

# Prepare data for training
X_train, X_val, y_train, y_val = train_test_split(df['input'].tolist(), df['output_vector'].tolist(), test_size=0.2, shuffle=False)

# Create datasets
train_dataset = CustomDataset(X_train, y_train, tokenizer)
val_dataset = CustomDataset(X_val, y_val, tokenizer)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)


def binary_cross_entropy(t, p):
    t = np.float64(t)
    p = np.float64(p)
    # binary cross-entropy loss
    return -np.sum(t * np.log(p) + (1 - t) * np.log(1 - p))/len(t)


# Evaluation function
def evaluate_model(model, val_loader):
    model.eval()
    predictions_f, true_labels = [], []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids']
            #attention_mask = batch['attention_mask']
            labels = batch['labels']

            #outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            outputs = model(input_ids=input_ids)
            preds = torch.sigmoid(outputs.logits).cpu().numpy()
            predictions_f.extend(preds)
            true_labels.extend(labels.cpu().numpy())

    predictions = [(p > 0.5).astype(int) for p in predictions_f]
    accuracy = accuracy_score(true_labels, predictions)
    f1score = f1_score(true_labels, predictions, average='macro')
    f1score_micro = f1_score(true_labels, predictions, average='micro')
    loss = binary_cross_entropy(true_labels, predictions_f)
    print(f'Validation Accuracy: {accuracy:.4f}, F1 Score: {f1score:.4f}, F1 Score micro: {f1score_micro:.4f}, Loss: {loss}')

    return (accuracy, f1score, f1score_micro, loss, predictions_f, true_labels)


# Training function
def train_model(model, train_loader, val_loader, epochs=10):
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

    losses = []
    accuracies = []
    f1scores = []
    f1scores_micro = []
    val_losses = []
    predictions = []
    true_labels = []

    current_f1score_micro = 0

    for epoch in range(epochs):
        model.train() # set dropout to train mode
        for param in model.parameters():
            param.requires_grad = False

        # Unfreeze the classification head
        for param in model.classifier.parameters():
            param.requires_grad = True

        # Last layer unblock
        for param in model.bert.encoder.layer[-1].parameters():
            param.requires_grad = True

        for batch in tqdm(train_loader):
            optimizer.zero_grad()
            input_ids = batch['input_ids']
            #attention_mask = batch['attention_mask']
            labels = batch['labels']
            
            #outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            outputs = model(input_ids=input_ids, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch + 1}/{epochs} completed. Train loss: {loss.item()}')
        losses.append(loss.item())
    
        (accuracy, f1score, f1score_micro, val_loss, predictions_f, true_labels) = evaluate_model(model, val_loader)
        accuracies.append(accuracy)
        f1scores.append(f1score)
        f1scores_micro.append(f1score_micro)
        val_losses.append(val_loss)

        if f1score_micro > current_f1score_micro:
            print("Saving model with higher f1score_micro", f1score_micro)
            current_f1score_micro = f1score_micro
            model.save_pretrained(name)
            tokenizer.save_pretrained(name)

    return (losses, accuracies, f1scores, f1scores_micro, val_losses, predictions_f, true_labels)

In [8]:
%env CUDA_LAUNCH_BLOCKING=0

import os
print(os.environ["CUDA_LAUNCH_BLOCKING"])

# Train the model
(losses, accuracies, f1scores, f1scores_micro, val_losses, predictions_f, true_labels) = train_model(model, train_loader, val_loader, epochs=50)

env: CUDA_LAUNCH_BLOCKING=0
0


  0%|          | 0/2180 [00:00<?, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████| 2180/2180 [00:43<00:00, 49.90it/s]


Epoch 1/50 completed. Train loss: 0.44349905848503113
Validation Accuracy: 0.0156, F1 Score: 0.0734, F1 Score micro: 0.2573, Loss: 7.0583991845452525
Saving model with higher f1score_micro 0.2572992700729927


100%|██████████| 2180/2180 [00:43<00:00, 50.00it/s]


Epoch 2/50 completed. Train loss: 0.28661951422691345
Validation Accuracy: 0.0137, F1 Score: 0.0927, F1 Score micro: 0.2962, Loss: 6.881657009764314
Saving model with higher f1score_micro 0.2962313759859772


100%|██████████| 2180/2180 [00:43<00:00, 49.98it/s]


Epoch 3/50 completed. Train loss: 0.3782748281955719
Validation Accuracy: 0.0174, F1 Score: 0.1262, F1 Score micro: 0.3378, Loss: 6.586743935988435
Saving model with higher f1score_micro 0.3377600708277999


100%|██████████| 2180/2180 [00:43<00:00, 49.98it/s]


Epoch 4/50 completed. Train loss: 0.3548486828804016
Validation Accuracy: 0.0357, F1 Score: 0.2014, F1 Score micro: 0.4353, Loss: 6.2018649275299005
Saving model with higher f1score_micro 0.4352720450281426


100%|██████████| 2180/2180 [00:43<00:00, 50.08it/s]


Epoch 5/50 completed. Train loss: 0.1940208524465561
Validation Accuracy: 0.0541, F1 Score: 0.1965, F1 Score micro: 0.4588, Loss: 5.882495622034449
Saving model with higher f1score_micro 0.45877829987184965


100%|██████████| 2180/2180 [00:43<00:00, 49.97it/s]


Epoch 6/50 completed. Train loss: 0.2880956828594208
Validation Accuracy: 0.0632, F1 Score: 0.2373, F1 Score micro: 0.5059, Loss: 5.67616326779751
Saving model with higher f1score_micro 0.5058967059780398


100%|██████████| 2180/2180 [00:43<00:00, 49.90it/s]


Epoch 7/50 completed. Train loss: 0.18343524634838104
Validation Accuracy: 0.0834, F1 Score: 0.2856, F1 Score micro: 0.5487, Loss: 5.516466224084794
Saving model with higher f1score_micro 0.5486968449931413


100%|██████████| 2180/2180 [00:43<00:00, 49.99it/s]


Epoch 8/50 completed. Train loss: 0.441332072019577
Validation Accuracy: 0.1082, F1 Score: 0.2974, F1 Score micro: 0.5718, Loss: 5.305032608227822
Saving model with higher f1score_micro 0.5717597063936642


100%|██████████| 2180/2180 [00:43<00:00, 49.92it/s]


Epoch 9/50 completed. Train loss: 0.2870183289051056
Validation Accuracy: 0.1082, F1 Score: 0.3031, F1 Score micro: 0.5738, Loss: 5.142255236323431
Saving model with higher f1score_micro 0.5737769080234834


100%|██████████| 2180/2180 [00:43<00:00, 49.98it/s]


Epoch 10/50 completed. Train loss: 0.2714517116546631
Validation Accuracy: 0.1366, F1 Score: 0.3238, F1 Score micro: 0.6080, Loss: 4.94655784175343
Saving model with higher f1score_micro 0.6079513030245387


100%|██████████| 2180/2180 [00:43<00:00, 49.96it/s]


Epoch 11/50 completed. Train loss: 0.1991310864686966
Validation Accuracy: 0.1558, F1 Score: 0.3557, F1 Score micro: 0.6451, Loss: 4.711381841520625
Saving model with higher f1score_micro 0.6450543378154356


100%|██████████| 2180/2180 [00:44<00:00, 49.39it/s]


Epoch 12/50 completed. Train loss: 0.18366225063800812
Validation Accuracy: 0.1751, F1 Score: 0.3665, F1 Score micro: 0.6575, Loss: 4.555294427335593
Saving model with higher f1score_micro 0.6575292397660819


100%|██████████| 2180/2180 [00:44<00:00, 49.32it/s]


Epoch 13/50 completed. Train loss: 0.1352473944425583
Validation Accuracy: 0.1778, F1 Score: 0.3625, F1 Score micro: 0.6664, Loss: 4.42982243502763
Saving model with higher f1score_micro 0.66642107202063


100%|██████████| 2180/2180 [00:43<00:00, 49.60it/s]


Epoch 14/50 completed. Train loss: 0.22462764382362366
Validation Accuracy: 0.1989, F1 Score: 0.3681, F1 Score micro: 0.6794, Loss: 4.324008263809738
Saving model with higher f1score_micro 0.6793907138924573


100%|██████████| 2180/2180 [00:44<00:00, 49.04it/s]


Epoch 15/50 completed. Train loss: 0.1434611976146698
Validation Accuracy: 0.2392, F1 Score: 0.4392, F1 Score micro: 0.7102, Loss: 4.132527741789106
Saving model with higher f1score_micro 0.7101626727304531


100%|██████████| 2180/2180 [00:44<00:00, 49.26it/s]


Epoch 16/50 completed. Train loss: 0.19802813231945038
Validation Accuracy: 0.2301, F1 Score: 0.4041, F1 Score micro: 0.7007, Loss: 4.092115258645759


100%|██████████| 2180/2180 [00:44<00:00, 48.79it/s]


Epoch 17/50 completed. Train loss: 0.1991570144891739
Validation Accuracy: 0.2493, F1 Score: 0.4171, F1 Score micro: 0.7229, Loss: 3.9736885906541355
Saving model with higher f1score_micro 0.7229422066549912


100%|██████████| 2180/2180 [00:43<00:00, 49.59it/s]


Epoch 18/50 completed. Train loss: 0.24090638756752014
Validation Accuracy: 0.2576, F1 Score: 0.4327, F1 Score micro: 0.7234, Loss: 3.894116700164188
Saving model with higher f1score_micro 0.7234194122885129


100%|██████████| 2180/2180 [00:45<00:00, 48.35it/s]


Epoch 19/50 completed. Train loss: 0.27905386686325073
Validation Accuracy: 0.2777, F1 Score: 0.4484, F1 Score micro: 0.7370, Loss: 3.8021645829648594
Saving model with higher f1score_micro 0.7369705420951717


100%|██████████| 2180/2180 [00:43<00:00, 49.97it/s]


Epoch 20/50 completed. Train loss: 0.1369701474905014
Validation Accuracy: 0.3006, F1 Score: 0.4904, F1 Score micro: 0.7498, Loss: 3.7162756488467372
Saving model with higher f1score_micro 0.7497838492132112


100%|██████████| 2180/2180 [00:44<00:00, 49.25it/s]


Epoch 21/50 completed. Train loss: 0.11328374594449997
Validation Accuracy: 0.2915, F1 Score: 0.4535, F1 Score micro: 0.7453, Loss: 3.6711538769318275


100%|██████████| 2180/2180 [00:44<00:00, 48.75it/s]


Epoch 22/50 completed. Train loss: 0.1486080139875412
Validation Accuracy: 0.2906, F1 Score: 0.4716, F1 Score micro: 0.7447, Loss: 3.647114092743095


100%|██████████| 2180/2180 [00:43<00:00, 49.69it/s]


Epoch 23/50 completed. Train loss: 0.2647155225276947
Validation Accuracy: 0.3236, F1 Score: 0.5216, F1 Score micro: 0.7683, Loss: 3.51000477575665
Saving model with higher f1score_micro 0.768259385665529


100%|██████████| 2180/2180 [00:43<00:00, 49.73it/s]


Epoch 24/50 completed. Train loss: 0.2238844484090805
Validation Accuracy: 0.3181, F1 Score: 0.5215, F1 Score micro: 0.7627, Loss: 3.4954524424163314


100%|██████████| 2180/2180 [00:44<00:00, 49.43it/s]


Epoch 25/50 completed. Train loss: 0.29856356978416443
Validation Accuracy: 0.3236, F1 Score: 0.5453, F1 Score micro: 0.7716, Loss: 3.446556418639758
Saving model with higher f1score_micro 0.7715996578272027


100%|██████████| 2180/2180 [00:44<00:00, 48.56it/s]


Epoch 26/50 completed. Train loss: 0.08687163144350052
Validation Accuracy: 0.3428, F1 Score: 0.5692, F1 Score micro: 0.7828, Loss: 3.3589624870620414
Saving model with higher f1score_micro 0.7827848101265823


100%|██████████| 2180/2180 [00:44<00:00, 49.08it/s]


Epoch 27/50 completed. Train loss: 0.30611488223075867
Validation Accuracy: 0.3226, F1 Score: 0.5506, F1 Score micro: 0.7733, Loss: 3.433229199422593


100%|██████████| 2180/2180 [00:44<00:00, 48.59it/s]


Epoch 28/50 completed. Train loss: 0.10671136528253555
Validation Accuracy: 0.3465, F1 Score: 0.6051, F1 Score micro: 0.7899, Loss: 3.302927316105889
Saving model with higher f1score_micro 0.7899020096329513


100%|██████████| 2180/2180 [00:44<00:00, 48.61it/s]


Epoch 29/50 completed. Train loss: 0.10951953381299973
Validation Accuracy: 0.3492, F1 Score: 0.5916, F1 Score micro: 0.7892, Loss: 3.2751205294100365


100%|██████████| 2180/2180 [00:44<00:00, 49.26it/s]


Epoch 30/50 completed. Train loss: 0.30289173126220703
Validation Accuracy: 0.3556, F1 Score: 0.6015, F1 Score micro: 0.7961, Loss: 3.25228859337611
Saving model with higher f1score_micro 0.7960875331564987


100%|██████████| 2180/2180 [00:44<00:00, 49.15it/s]


Epoch 31/50 completed. Train loss: 0.23766544461250305
Validation Accuracy: 0.3602, F1 Score: 0.6010, F1 Score micro: 0.7982, Loss: 3.1584652031680327
Saving model with higher f1score_micro 0.7982181158224715


100%|██████████| 2180/2180 [00:44<00:00, 49.09it/s]


Epoch 32/50 completed. Train loss: 0.10843576490879059
Validation Accuracy: 0.3566, F1 Score: 0.6050, F1 Score micro: 0.7957, Loss: 3.1285072817776776


100%|██████████| 2180/2180 [00:44<00:00, 49.14it/s]


Epoch 33/50 completed. Train loss: 0.1036168560385704
Validation Accuracy: 0.3529, F1 Score: 0.5806, F1 Score micro: 0.7934, Loss: 3.1504310898896537


100%|██████████| 2180/2180 [00:44<00:00, 48.78it/s]


Epoch 34/50 completed. Train loss: 0.03925035521388054
Validation Accuracy: 0.3547, F1 Score: 0.6025, F1 Score micro: 0.7923, Loss: 3.21527879677706


100%|██████████| 2180/2180 [00:45<00:00, 48.32it/s]


Epoch 35/50 completed. Train loss: 0.1814589500427246
Validation Accuracy: 0.3786, F1 Score: 0.6475, F1 Score micro: 0.8115, Loss: 3.0297503627424414
Saving model with higher f1score_micro 0.8115140673280208


100%|██████████| 2180/2180 [00:44<00:00, 48.92it/s]


Epoch 36/50 completed. Train loss: 0.1086307093501091
Validation Accuracy: 0.3721, F1 Score: 0.6243, F1 Score micro: 0.8054, Loss: 3.0410865900526303


100%|██████████| 2180/2180 [00:44<00:00, 48.73it/s]


Epoch 37/50 completed. Train loss: 0.020067909732460976
Validation Accuracy: 0.3804, F1 Score: 0.6297, F1 Score micro: 0.8119, Loss: 3.0026361791669838
Saving model with higher f1score_micro 0.8119393247431088


100%|██████████| 2180/2180 [00:44<00:00, 48.85it/s]


Epoch 38/50 completed. Train loss: 0.07436540722846985
Validation Accuracy: 0.3859, F1 Score: 0.6482, F1 Score micro: 0.8143, Loss: 2.9385166579501076
Saving model with higher f1score_micro 0.8142950391644909


100%|██████████| 2180/2180 [00:44<00:00, 48.79it/s]


Epoch 39/50 completed. Train loss: 0.21276669204235077
Validation Accuracy: 0.3914, F1 Score: 0.6456, F1 Score micro: 0.8117, Loss: 2.9576493819000067


100%|██████████| 2180/2180 [00:44<00:00, 48.61it/s]


Epoch 40/50 completed. Train loss: 0.02759835124015808
Validation Accuracy: 0.3868, F1 Score: 0.6508, F1 Score micro: 0.8206, Loss: 2.900178439861701
Saving model with higher f1score_micro 0.8205625606207565


100%|██████████| 2180/2180 [00:45<00:00, 48.44it/s]


Epoch 41/50 completed. Train loss: 0.09444967657327652
Validation Accuracy: 0.3978, F1 Score: 0.6752, F1 Score micro: 0.8203, Loss: 2.8927538135091235


100%|██████████| 2180/2180 [00:44<00:00, 48.65it/s]


Epoch 42/50 completed. Train loss: 0.3200553059577942
Validation Accuracy: 0.4060, F1 Score: 0.6684, F1 Score micro: 0.8230, Loss: 2.8992058095312707
Saving model with higher f1score_micro 0.8230305007339749


100%|██████████| 2180/2180 [00:44<00:00, 48.92it/s]


Epoch 43/50 completed. Train loss: 0.060452599078416824
Validation Accuracy: 0.4161, F1 Score: 0.6901, F1 Score micro: 0.8282, Loss: 2.821103059156832
Saving model with higher f1score_micro 0.8281726295523825


100%|██████████| 2180/2180 [00:45<00:00, 48.37it/s]


Epoch 44/50 completed. Train loss: 0.2036730796098709
Validation Accuracy: 0.4033, F1 Score: 0.6754, F1 Score micro: 0.8230, Loss: 2.882359318066803


100%|██████████| 2180/2180 [00:45<00:00, 48.29it/s]


Epoch 45/50 completed. Train loss: 0.05763217434287071
Validation Accuracy: 0.4024, F1 Score: 0.6828, F1 Score micro: 0.8255, Loss: 2.8593865487284207


100%|██████████| 2180/2180 [00:44<00:00, 48.83it/s]


Epoch 46/50 completed. Train loss: 0.07380010187625885
Validation Accuracy: 0.4198, F1 Score: 0.7034, F1 Score micro: 0.8342, Loss: 2.794037142928125
Saving model with higher f1score_micro 0.834207598156096


100%|██████████| 2180/2180 [00:45<00:00, 47.92it/s]


Epoch 47/50 completed. Train loss: 0.2654193937778473
Validation Accuracy: 0.4271, F1 Score: 0.7355, F1 Score micro: 0.8365, Loss: 2.7928697663504036
Saving model with higher f1score_micro 0.8365079365079365


100%|██████████| 2180/2180 [00:46<00:00, 46.77it/s]


Epoch 48/50 completed. Train loss: 0.07183494418859482
Validation Accuracy: 0.4189, F1 Score: 0.7147, F1 Score micro: 0.8346, Loss: 2.7572241632074315


100%|██████████| 2180/2180 [00:46<00:00, 47.25it/s]


Epoch 49/50 completed. Train loss: 0.12899073958396912
Validation Accuracy: 0.4308, F1 Score: 0.7181, F1 Score micro: 0.8342, Loss: 2.770382837916212


100%|██████████| 2180/2180 [00:44<00:00, 49.44it/s]


Epoch 50/50 completed. Train loss: 0.06673504412174225
Validation Accuracy: 0.4207, F1 Score: 0.7114, F1 Score micro: 0.8363, Loss: 2.7622447827548293


In [9]:
# Save the model
#name = 'berta_multiclass_model_2_last_retrained'
#model.save_pretrained(name)
#tokenizer.save_pretrained(name)

In [14]:
import pandas as pd
import plotly.express as px

df_metrics = pd.DataFrame({'f1score': f1scores, 'f1score-micro': f1scores_micro,'val_losses': val_losses, 'val_accuracies': accuracies, 'loss': losses})
df_metrics["epoch"] = range(0, len(df_metrics))

fig = px.line(df_metrics, x="epoch", y="loss")
fig.show()

In [15]:
fig = px.line(df_metrics, x="epoch", y="f1score")
fig.show()

In [16]:
fig = px.line(df_metrics, x="epoch", y="val_losses")
fig.show()

In [17]:
# (losses, accuracies, f1scores, f1scores_micro, val_losses, predictions_f, true_labels)
with open(name + "/losses.txt", "w") as f:
    for i in losses:
        f.write(str(i)+"\n")
with open(name + "/accuracies.txt", "w") as f:
    for i in accuracies:
        f.write(str(i)+"\n")
with open(name + "/f1scores.txt", "w") as f:
    for i in f1scores:
        f.write(str(i)+"\n")
with open(name + "/f1scores_micro.txt", "w") as f:
    for i in f1scores_micro:
        f.write(str(i)+"\n")
with open(name + "/val_losses.txt", "w") as f:
    for i in val_losses:
        f.write(str(i)+"\n")