In [1]:
#install packages
!pip install transformers
#import packages
import pandas as pd
import matplotlib.pyplot as plt
import torch
from tqdm.notebook import tqdm

from transformers import BertTokenizer
from torch.utils.data import TensorDataset

from transformers import BertForSequenceClassification

from sklearn.metrics import f1_score

from sklearn.model_selection import train_test_split

import numpy as np
import random


#Read data from git:
#https://raw.githubusercontent.com/FANMISUA/TweetAENormalization/main/ADENormalization/Data/CADEC/3.csv
# URL of the CSV file
csv_url = "https://raw.githubusercontent.com/FANMISUA/TweetAENormalization/main/ADENormalization/Data/CADEC/3.csv"

# Read the CSV file into a pandas DataFrame
column_names = ["TT", "llt_code", "ade", "soc_code"]
cadec_all = pd.read_csv(csv_url,names=column_names, header=None)

# Display the first few rows of the DataFrame
print(cadec_all.shape)

# Remove duplicate rows based on the 'ade' column
cadec_unique = cadec_all.drop_duplicates(subset='ade')

# Display the resulting DataFrame
print(cadec_unique.shape)
# Count occurrences of each 'soc_code'
soc_code_counts = cadec_unique['soc_code'].value_counts()
# Sort the counts from high to low and print the result
print(soc_code_counts)

#get top 3 of the SMM4H list
#['10018065','10037175','10029205','10022891','10028395','10017947']
top3SMM4H = [10018065,10037175,10029205]
top6SMM4H = [10018065,10037175,10029205,10022891,10028395,10017947]

# Filter DataFrame
filtered_data3 = cadec_unique[cadec_unique['soc_code'].isin(top3SMM4H)]
filtered_data6 = cadec_unique[cadec_unique['soc_code'].isin(top6SMM4H)]

# Select only the Term and SOC columns
CADECtop3inSMM4H = filtered_data3[['ade', 'soc_code']]
CADECtop6inSMM4H = filtered_data6[['ade', 'soc_code']]

print(CADECtop3inSMM4H)
data = CADECtop3inSMM4H
# Convert to DataFrame
df = pd.DataFrame(CADECtop3inSMM4H)

import pandas as pd

# Get unique labels and their counts
label_counts = df['soc_code'].value_counts()

# Sort labels by counts
sorted_labels = label_counts.index.tolist()

# Create label dictionary with labels assigned from 1 to the number of unique labels
label_dict = {label: i for i, label in enumerate(sorted_labels, 0)}

# Print label dictionary
print(label_dict)

label_df = pd.DataFrame.from_dict(label_dict, orient='index', columns=['Label'])

# Reset index to make the index a regular column
label_df.reset_index(inplace=True)

# Rename columns
label_df.columns = ['SOC Code', 'Label']

# Sort DataFrame by label
label_df = label_df.sort_values(by='Label')

# Print the DataFrame
print(label_df)

df['label'] = df.soc_code.replace(label_dict)

def custom_train_test_split(X, y, test_size=0.15, random_state=None):
    classes, counts = np.unique(y, return_counts=True)
    min_class_count = min(counts)

    # Find classes with only one or two instances
    single_or_double_instance_classes = classes[np.logical_or(counts == 1, counts == 2)]

    # Remove instances of single-instance or two-instance classes
    X_filtered = X[~np.isin(y, single_or_double_instance_classes)]
    y_filtered = y[~np.isin(y, single_or_double_instance_classes)]

    if len(y_filtered) < 2:
        raise ValueError("No classes have more than two instances after filtering.")

    # Perform stratified split on the filtered dataset
    X_train, X_val, y_train, y_val = train_test_split(X_filtered, y_filtered, test_size=test_size, random_state=random_state, stratify=y_filtered)

    # Randomly assign instances of single-instance classes to training or testing sets
    for class_label in single_or_double_instance_classes:
        class_indices = np.where(y == class_label)[0]
        np.random.shuffle(class_indices)

        if len(class_indices) <= 2:
            # Only one instance, randomly assign to training or testing set
            if np.random.rand() < test_size:
                X_val = np.concatenate((X_val, X[class_indices]))
                y_val = np.concatenate((y_val, y[class_indices]))
            else:
                X_train = np.concatenate((X_train, X[class_indices]))
                y_train = np.concatenate((y_train, y[class_indices]))

    return X_train, X_val, y_train, y_val


#evaluation
from sklearn.metrics import f1_score

def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

def accuracy_per_class(preds, labels):
    label_dict_inverse = {v: k for k, v in label_dict.items()}

    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')


(5962, 4)
(3348, 4)
soc_code
10028395    962
10018065    654
10037175    401
10017947    300
10029205    286
10040785    184
10007541     92
10038738     91
10022891     82
10015919     67
10038604     59
10038359     50
10022117     35
10047065     25
10013993     16
10019805     15
10041244      7
10027433      6
10021881      5
10021428      4
10014698      3
10005329      3
10029104      1
Name: count, dtype: int64
                            ade  soc_code
926            voracious hunger  10018065
927            loss of appetite  10018065
929            lack of appetite  10018065
931                    anorexia  10018065
932                    anorexic  10018065
...                         ...       ...
5326  short term memory lacking  10037175
5328      couldn't eat or drink  10037175
5329              Could not eat  10037175
5331           can't eat normal  10037175
5332   Disturbed sleep patterns  10037175

[1341 rows x 2 columns]
{10018065: 0, 10037175: 1, 10029205: 2}
   SOC C

In [None]:
# Define the random seeds and other parameters
seed_values = list(range(2, 42, 2))
batch_size = 8
epochs = 10

# Placeholder for accuracies
accuracies = []
all_accuracies = {label: [] for label in range(len(label_dict))}


# Loop over each seed value
for seed_val in seed_values:
  # Set seeds
  random.seed(seed_val)
  np.random.seed(seed_val)
  torch.manual_seed(seed_val)
  torch.cuda.manual_seed_all(seed_val)

  X_train, X_val, y_train, y_val = custom_train_test_split(df.index.values, df.label.values, test_size=0.2, random_state=42)
  df['data_type'] = ['not_set']*df.shape[0]
  df.loc[X_train, 'data_type'] = 'train'
  df.loc[X_val, 'data_type'] = 'val'
  print(df.groupby(['soc_code', 'label', 'data_type']).count())

  result = df.groupby(['soc_code', 'label', 'data_type']).size().reset_index(name='count')
  result_sorted = result.sort_values(by='label')

  # Group by soc_code, label, and data_type
  grouped = df.groupby(['soc_code', 'label', 'data_type']).size().reset_index(name='count')
  # Filter data for training and validation
  train_counts = grouped[grouped['data_type'] == 'train']
  val_counts = grouped[grouped['data_type'] == 'val']

  tokenizer = BertTokenizer.from_pretrained('distilbert-base-uncased',
                                            do_lower_case=True)

  encoded_data_train = tokenizer.batch_encode_plus(
      df[df.data_type=='train'].ade.values,
      add_special_tokens=True,
      return_attention_mask=True,
      pad_to_max_length=True,
      max_length=256,
      return_tensors='pt'
  )

  encoded_data_val = tokenizer.batch_encode_plus(
      df[df.data_type=='val'].ade.values,
      add_special_tokens=True,
      return_attention_mask=True,
      pad_to_max_length=True,
      max_length=256,
      return_tensors='pt'
  )

  input_ids_train = encoded_data_train['input_ids']
  attention_masks_train = encoded_data_train['attention_mask']
  labels_train = torch.tensor(df[df.data_type=='train'].label.values)

  input_ids_val = encoded_data_val['input_ids']
  attention_masks_val = encoded_data_val['attention_mask']
  labels_val = torch.tensor(df[df.data_type=='val'].label.values)

  dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
  dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)

  model = BertForSequenceClassification.from_pretrained("distilbert-base-uncased",
                                                        num_labels=len(label_dict),
                                                        output_attentions=False,
                                                        output_hidden_states=False)

  #data loader
  from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

  dataloader_train = DataLoader(dataset_train,
                                sampler=RandomSampler(dataset_train),
                                batch_size=batch_size)

  dataloader_validation = DataLoader(dataset_val,
                                    sampler=SequentialSampler(dataset_val),
                                    batch_size=batch_size)

  from transformers import AdamW, get_linear_schedule_with_warmup

  #training parameters
  optimizer = AdamW(model.parameters(),
                    lr=1e-5,
                    eps=1e-8)


  scheduler = get_linear_schedule_with_warmup(optimizer,
                                              num_warmup_steps=0,
                                              num_training_steps=len(dataloader_train)*epochs)

  # Assuming device is cuda
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  # Move model to GPU
  model.to(device)
  print(f"Device used: {device}")


  #training
  import random

  def evaluate(dataloader_val):

      model.eval()

      loss_val_total = 0
      predictions, true_vals = [], []

      for batch in dataloader_val:

          batch = tuple(b.to(device) for b in batch)

          inputs = {'input_ids':      batch[0],
                    'attention_mask': batch[1],
                    'labels':         batch[2],
                  }

          with torch.no_grad():
              outputs = model(**inputs)

          loss = outputs[0]
          logits = outputs[1]
          loss_val_total += loss.item()

          logits = logits.detach().cpu().numpy()
          label_ids = inputs['labels'].cpu().numpy()
          predictions.append(logits)
          true_vals.append(label_ids)

      loss_val_avg = loss_val_total/len(dataloader_val)

      predictions = np.concatenate(predictions, axis=0)
      true_vals = np.concatenate(true_vals, axis=0)

      return loss_val_avg, predictions, true_vals

  for epoch in tqdm(range(1, epochs+1)):

      model.train()

      loss_train_total = 0

      progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
      for batch in progress_bar:

          model.zero_grad()

          batch = tuple(b.to(device) for b in batch)

          inputs = {'input_ids':      batch[0],
                    'attention_mask': batch[1],
                    'labels':         batch[2],
                  }

          outputs = model(**inputs)
          # outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)


          loss = outputs[0]
          loss_train_total += loss.item()
          loss.backward()

          torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

          optimizer.step()
          scheduler.step()

          progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})


      torch.save(model.state_dict(), f'./finetuned_BERT_epoch_{epoch}.model')

      tqdm.write(f'\nEpoch {epoch}')

      loss_train_avg = loss_train_total/len(dataloader_train)
      tqdm.write(f'Training loss: {loss_train_avg}')

      val_loss, predictions, true_vals = evaluate(dataloader_validation)
      val_f1 = f1_score_func(predictions, true_vals)
      tqdm.write(f'Validation loss: {val_loss}')
      tqdm.write(f'F1 Score (Weighted): {val_f1}')

  _, predictions, true_vals = evaluate(dataloader_validation)
  accuracy_per_class(predictions, true_vals)

  #plot evaluation
  import matplotlib.pyplot as plt

  def accuracy_per_class(predictions, true_vals):
      pred_flat = np.argmax(predictions, axis=1).flatten()
      labels_flat = true_vals.flatten()
      accuracy_dict = {}
      count_dict = {}
      for label in np.unique(labels_flat):
          y_preds = pred_flat[labels_flat == label]
          y_true = labels_flat[labels_flat == label]
          accuracy_dict[label] = np.sum(y_preds == y_true) / len(y_true)
          count_dict[label] = len(y_true)
      return accuracy_dict, count_dict

  accuracy_dict, count_dict = accuracy_per_class(predictions, true_vals)
  for label, accuracy in accuracy_dict.items():
      all_accuracies[label].append(accuracy)
  print(seed_val, accuracy_dict, count_dict )

print(all_accuracies)
# Compute average and standard deviation of accuracy
avg_accuracy = {label: np.mean(accs) for label, accs in all_accuracies.items()}
std_accuracy = {label: np.std(accs) for label, accs in all_accuracies.items()}


                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerN

Device used: cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.9714340856715814
Validation loss: 0.755262293798082
F1 Score (Weighted): 0.6895799902793499


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.6370780093233976
Validation loss: 0.8982541366096806
F1 Score (Weighted): 0.6428760596542382


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.43042959206139864
Validation loss: 0.7289040507200886
F1 Score (Weighted): 0.754511704900004


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.3585911004442666
Validation loss: 0.7871228314059622
F1 Score (Weighted): 0.7334743802785535


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.2924796019110884
Validation loss: 0.7241425821420682
F1 Score (Weighted): 0.7910749646822794


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.22086416616160146
Validation loss: 0.6754249315366477
F1 Score (Weighted): 0.8124627951548768


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.209224241080263
Validation loss: 0.8630954835577594
F1 Score (Weighted): 0.7915511066731606


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.18594470948811426
Validation loss: 0.6720881018931429
F1 Score (Weighted): 0.8294681709460822


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.13322112166649425
Validation loss: 0.6800881259978804
F1 Score (Weighted): 0.8430558867968486


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.12876234911364823
Validation loss: 0.6992673961897654
F1 Score (Weighted): 0.8485802263601969
Class: 10018065
Accuracy: 113/131

Class: 10037175
Accuracy: 67/81

Class: 10029205
Accuracy: 48/57

2 {0: 0.8625954198473282, 1: 0.8271604938271605, 2: 0.8421052631578947} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.0035084468215258
Validation loss: 0.8680112681844655
F1 Score (Weighted): 0.5940412182895907


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.6682338310933825
Validation loss: 0.692833892398459
F1 Score (Weighted): 0.729575703974327


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.4619898766056815
Validation loss: 0.5882424083264435
F1 Score (Weighted): 0.78276418296428


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.3262887308443549
Validation loss: 0.6745936306508478
F1 Score (Weighted): 0.7982353677581436


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.25628195657853536
Validation loss: 0.6575269734432154
F1 Score (Weighted): 0.8389649911650884


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.21181020269921022
Validation loss: 0.7080622565562782
F1 Score (Weighted): 0.8429285155150902


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.15774335704375503
Validation loss: 0.7308886748539996
F1 Score (Weighted): 0.8368270349488209


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.179762010766652
Validation loss: 0.7382152295116718
F1 Score (Weighted): 0.8555644066796483


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.1371089575096353
Validation loss: 0.7607674071743318
F1 Score (Weighted): 0.8474388065466132


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.09763964615178419
Validation loss: 0.7446182005600456
F1 Score (Weighted): 0.8591307717701769


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


4 {0: 0.8625954198473282, 1: 0.8641975308641975, 2: 0.8421052631578947} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight',

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.0378468450325637
Validation loss: 0.865270742598702
F1 Score (Weighted): 0.5257806137430124


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.6889495947467748
Validation loss: 0.6386905881192755
F1 Score (Weighted): 0.7422969353828133


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.4545439895409257
Validation loss: 0.6032662622575813
F1 Score (Weighted): 0.7808871228781088


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.32346089337187917
Validation loss: 0.6288390241782454
F1 Score (Weighted): 0.8083423073142402


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.262483134914412
Validation loss: 0.7830602194059312
F1 Score (Weighted): 0.7702554447240887


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.26446922646432336
Validation loss: 0.8684907286318347
F1 Score (Weighted): 0.7729835062871362


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.21091924263372905
Validation loss: 0.7363278174855035
F1 Score (Weighted): 0.8002479863184848


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.1585968270352277
Validation loss: 0.7861718080684488
F1 Score (Weighted): 0.8141970529417467


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.14121246929349962
Validation loss: 0.8401169811738382
F1 Score (Weighted): 0.8278165893631958


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.11050575026554237
Validation loss: 0.8367074013581527
F1 Score (Weighted): 0.8036438579300181


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


6 {0: 0.816793893129771, 1: 0.7777777777777778, 2: 0.8070175438596491} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight',

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.9623320809940794
Validation loss: 0.8743000142276287
F1 Score (Weighted): 0.45445281760384654


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.6785407373820668
Validation loss: 0.8670505583834123
F1 Score (Weighted): 0.6474260281514467


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.45084532426753593
Validation loss: 0.6590928912491483
F1 Score (Weighted): 0.7596063158445


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.4097901091062978
Validation loss: 0.5629931719261495
F1 Score (Weighted): 0.8152020516896412


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.2543540375251601
Validation loss: 0.7535361493389834
F1 Score (Weighted): 0.7868931430343832


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.23207987265994967
Validation loss: 0.7131875148522394
F1 Score (Weighted): 0.8297336404590487


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.21633465477932634
Validation loss: 0.7323113995679544
F1 Score (Weighted): 0.8124725690713965


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.17259365548344968
Validation loss: 0.6436441631350831
F1 Score (Weighted): 0.8403517179374089


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.1200195800012617
Validation loss: 0.736442605348523
F1 Score (Weighted): 0.8265699868179694


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.11993182502224792
Validation loss: 0.6661354459288037
F1 Score (Weighted): 0.8550114428626306


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


8 {0: 0.8931297709923665, 1: 0.8024691358024691, 2: 0.8421052631578947} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight',

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.985861920821133
Validation loss: 1.0177643299102783
F1 Score (Weighted): 0.46540946696185925


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.7344330993494881
Validation loss: 1.1286745705806158
F1 Score (Weighted): 0.5462968170240224


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.4905857169694865
Validation loss: 0.5983991041122114
F1 Score (Weighted): 0.7755138102099914


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.3323311954458703
Validation loss: 0.6470282551942065
F1 Score (Weighted): 0.7970413779460305


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.27076837256439584
Validation loss: 0.7838060459437068
F1 Score (Weighted): 0.8042725749954389


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.23239745085350058
Validation loss: 0.7622788681525409
F1 Score (Weighted): 0.8113806206574262


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.19436353711070797
Validation loss: 0.7718388175194645
F1 Score (Weighted): 0.8160474721753077


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.18081957758439066
Validation loss: 0.7981477006242665
F1 Score (Weighted): 0.829621459609322


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.15691268331121042
Validation loss: 0.7856798019629958
F1 Score (Weighted): 0.8380726869278841


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.11392886135831419
Validation loss: 0.7544933236820404
F1 Score (Weighted): 0.8329730451686237


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


10 {0: 0.8396946564885496, 1: 0.8148148148148148, 2: 0.8421052631578947} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight',

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.029228893233769
Validation loss: 0.8417672208126854
F1 Score (Weighted): 0.6243561708229293


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.6745786414996012
Validation loss: 0.5861965922967476
F1 Score (Weighted): 0.7568417514888593


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.460285626379634
Validation loss: 0.7051215525990462
F1 Score (Weighted): 0.758526829562873


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.34749607014845113
Validation loss: 0.5559597028391984
F1 Score (Weighted): 0.821243842744148


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.3025554029667166
Validation loss: 0.6484999036868376
F1 Score (Weighted): 0.8031133828996282


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.2588342767375619
Validation loss: 0.6355433311340782
F1 Score (Weighted): 0.8275065815031203


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.1599565075637439
Validation loss: 0.8049938334871138
F1 Score (Weighted): 0.8118656368011115


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.1393257450414891
Validation loss: 0.695944248825339
F1 Score (Weighted): 0.8367603980529219


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.13848571358300718
Validation loss: 0.7624379694290624
F1 Score (Weighted): 0.8311452526538083


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.11197227883454997
Validation loss: 0.7782400177795139
F1 Score (Weighted): 0.8283008443105699


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


12 {0: 0.8625954198473282, 1: 0.7407407407407407, 2: 0.8771929824561403} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight',

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.9350381397934102
Validation loss: 0.8671439800630597
F1 Score (Weighted): 0.5607420191144763


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.605515714456786
Validation loss: 0.5943361699362012
F1 Score (Weighted): 0.739773585600673


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.4597612711698262
Validation loss: 0.786997933707693
F1 Score (Weighted): 0.7124109482793427


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.3453565050166712
Validation loss: 0.5657852489350583
F1 Score (Weighted): 0.8215595143903852


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.26162116365409943
Validation loss: 0.8566385001108489
F1 Score (Weighted): 0.7440537790785308


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.22811322551178956
Validation loss: 0.5909133857280454
F1 Score (Weighted): 0.8446276532541319


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.20880620365913735
Validation loss: 0.662097051010385
F1 Score (Weighted): 0.8366345260223048


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.16083416079932045
Validation loss: 0.6502979999315679
F1 Score (Weighted): 0.8511309004380513


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.14086132795539505
Validation loss: 0.658444476332721
F1 Score (Weighted): 0.8548520314102818


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.11755872668555951
Validation loss: 0.6636043965135126
F1 Score (Weighted): 0.8475422779273519


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


14 {0: 0.8625954198473282, 1: 0.8148148148148148, 2: 0.8596491228070176} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight',

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.0078125729489682
Validation loss: 0.8130088588770699
F1 Score (Weighted): 0.6802813444287604


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.6101915561663571
Validation loss: 0.6993879255126504
F1 Score (Weighted): 0.7178237853478482


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.40730226256731733
Validation loss: 0.7246123419608921
F1 Score (Weighted): 0.7690800433767052


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.28426973062998323
Validation loss: 0.6521873515071895
F1 Score (Weighted): 0.801337017869973


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.22978348134836155
Validation loss: 0.7145819001872202
F1 Score (Weighted): 0.8357152267428872


Epoch 6:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.20081587068027637
Validation loss: 0.8294646312886684
F1 Score (Weighted): 0.8069893621762488


Epoch 7:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.15625131292852448
Validation loss: 0.8165276690372539
F1 Score (Weighted): 0.8263717882455403


Epoch 8:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.15297794402159973
Validation loss: 0.8853836931726512
F1 Score (Weighted): 0.8263012742998911


Epoch 9:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.14447277785911322
Validation loss: 0.7774783343929277
F1 Score (Weighted): 0.8584414673493723


Epoch 10:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.126242581162421
Validation loss: 0.7775223790257073
F1 Score (Weighted): 0.862203838014805


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


16 {0: 0.8931297709923665, 1: 0.8148148148148148, 2: 0.8596491228070176} {0: 131, 1: 81, 2: 57}
                          ade
soc_code label data_type     
10018065 0     train      523
               val        131
10029205 2     train      229
               val         57
10037175 1     train      320
               val         81


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight',

Device used: cuda




  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.011656389974836
Validation loss: 0.8284923890057732
F1 Score (Weighted): 0.6252750257345725


Epoch 2:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.6809788347402616
Validation loss: 0.7616753561750931
F1 Score (Weighted): 0.6655362541895646


Epoch 3:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.44247604192081674
Validation loss: 0.648552839342943
F1 Score (Weighted): 0.7751849243282896


Epoch 4:   0%|          | 0/134 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.33486557202830686
Validation loss: 0.6071913326843915
F1 Score (Weighted): 0.8197848268212026


Epoch 5:   0%|          | 0/134 [00:00<?, ?it/s]