# Load Data & Data preprocessing

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import os
from transformers import AutoTokenizer
from datasets import Dataset
import pandas as pd

In [None]:
model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [None]:
model_checkpoint = "albert-base-v2"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [None]:
def load_data(text_dir, ann_dir, num_files=None):
    data = []
    text_files = sorted(os.listdir(text_dir))[:num_files]
    ann_files = sorted(os.listdir(ann_dir))[:num_files]

    for txt_file, ann_file in zip(text_files, ann_files):
        with open(os.path.join(text_dir, txt_file), 'r') as f:
            lines = f.read().split('\n')

        line_labels = [{} for _ in lines]  

        with open(os.path.join(ann_dir, ann_file), 'r') as f:
            for line in f:
                if line.startswith('T'):
                    parts = line.split('\t')
                    label_type_and_positions = parts[1].split(' ')
                    label_type = label_type_and_positions[0]
                    positions = ' '.join(label_type_and_positions[1:]).split(';')
                    for position in positions:
                        position_parts = position.strip().split(' ')
                        for i in range(0, len(position_parts), 2):
                            start, end = map(int, position_parts[i:i+2])
                            for line_index, line in enumerate(lines):
                                line_start = sum(len(l) + 1 for l in lines[:line_index]) 
                                line_end = line_start + len(line)
                                if line_start <= start < line_end and line_start < end <= line_end:
                                    if label_type not in line_labels[line_index]:
                                        line_labels[line_index][label_type] = []
                                    line_labels[line_index][label_type].append((start - line_start, end - line_start))

        for line_index, line in enumerate(lines):
            if line:
                data.append({
                    'id': f'{txt_file.split(".")[0]}_{line_index}',
                    'text': line,
                    'labels': line_labels[line_index],
                    'file_names': txt_file
                })

    df = pd.DataFrame(data)
    dataset = Dataset.from_pandas(df)

    return dataset

In [None]:
text_dir = "cadec\\text"
ann_dir = "cadec\\original"

dataset = load_data(text_dir, ann_dir)

In [None]:
label_names = ['O', 'B-Drug', 'I-Drug', 'B-ADR', 'I-ADR', 'B-Disease', 'I-Disease', 'B-Symptom', 'I-Symptom','B-Finding', 'I-Finding']

In [None]:
from datasets import ClassLabel, Sequence

ner_feature = Sequence(feature=ClassLabel(num_classes=11,
                                          names=['O', 'B-Drug', 'I-Drug', 'B-ADR', 'I-ADR', 'B-Disease', 'I-Disease', 'B-Symptom', 'I-Symptom','B-Finding', 'I-Finding']))

In [None]:
def encode_example(example):
    encoding = tokenizer(example['text'], truncation=True, padding=True, max_length=512)
    tokens = tokenizer.convert_ids_to_tokens(encoding['input_ids'])

    labels = ['O'] * len(encoding['input_ids'])

    for label_type, positions in example['labels'].items():
        if positions is not None:
            for start, end in positions:
                # Convert character positions to token positions
                start_token, end_token = encoding.char_to_token(start), encoding.char_to_token(end - 1)
                if start_token is not None and end_token is not None:
                    labels[start_token] = 'B-' + label_type
                    for i in range(start_token + 1, end_token + 1):
                        labels[i] = 'I-' + label_type

    labels = [ner_feature.feature.str2int(label) for label in labels]

    labels[0] = labels[-1] = -100
    labels = [label if token not in ['[CLS]', '[SEP]', '[PAD]'] else -100 for token, label in zip(tokens, labels)]

    return {'input_ids': encoding['input_ids'], 'attention_mask': encoding['attention_mask'], 'tokens': tokens, 'labels': labels}

dataset = dataset.map(encode_example)

In [None]:
i=145
print(dataset[i]['file_names'])
tokens = dataset[i]['tokens']
tags = dataset[i]['labels']

for token, tag in zip(tokens, tags):
    if tag != -100:
        tag = label_names[tag]
    print(f"Token: {token}, Tag: {tag}")

In [None]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [None]:
dataset

In [None]:
from datasets import DatasetDict

dataset = dataset.train_test_split(test_size=0.2, seed=1)
train_validation_dataset = dataset['train'].train_test_split(test_size=0.1, seed=1)

dataset = DatasetDict({
    'train': train_validation_dataset['train'],
    'validation': train_validation_dataset['test'],
    'test': dataset['test']
})

def select_fields(example):
    return {'input_ids': example['input_ids'], 'labels': example['labels'], 'attention_mask': example['attention_mask']}

processed_dataset = dataset.map(select_fields,remove_columns=['tokens', 'file_names','id','text'])

In [None]:
dataset

In [None]:
processed_dataset

# Explore the data

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

labels = ['ADR', 'Drug', 'Disease', 'Symptom', 'Finding']

co_occurrence_matrix = np.zeros((len(labels), len(labels)))

for filename in os.listdir('cadec/original'):
    if filename.endswith('.ann'):
        with open(os.path.join('cadec/original', filename), 'r') as file:

            label_dict = defaultdict(int)
            for line in file:
                label = line.split('\t')[1].split()[0]
                if label in labels:
                    label_dict[label] += 1

            for i in range(len(labels)):
                for j in range(i, len(labels)):
                    if label_dict[labels[i]] > 0 and label_dict[labels[j]] > 0:
                        co_occurrence_matrix[i, j] += 1
                        if i != j:
                            co_occurrence_matrix[j, i] += 1
plt.figure(figsize=(7, 7))
plt.imshow(co_occurrence_matrix, cmap='hot', interpolation='nearest')
plt.colorbar(label='co-occurrence frequency', orientation='horizontal')

plt.xticks(np.arange(len(labels)), labels, rotation=0)
plt.yticks(np.arange(len(labels)), labels)

plt.show()

In [None]:
import os
from collections import defaultdict

labels = ['ADR', 'Drug', 'Disease', 'Symptom', 'Finding']
label_counts = defaultdict(int)
for filename in os.listdir('cadec/original'):
    if filename.endswith('.ann'):
        with open(os.path.join('cadec/original', filename), 'r') as file:
            for line in file:
                if line.startswith('T'):
                    label = line.split('\t')[1].split()[0]
                    if label in labels:
                        label_counts[label] += 1

for label, count in label_counts.items():
    print(f'{label}: {count}')

In [None]:
from collections import Counter

labels = []

for split in ['train', 'validation', 'test']:
    for example in dataset[split]:
        for tag in example['labels']:
            if tag != -100:
                labels.append(tag)
label_counts = Counter(labels)

total_freqs = {label_names[label]: count for label, count in label_counts.items()}
total_ratio = {label: round(freq / sum(total_freqs.values()), 3) for label, freq in total_freqs.items()} 
for label in total_freqs.keys():
    print(f"Label: {label}, Frequency: {total_freqs[label]}, Ratio: {total_ratio[label]}")

In [None]:
import matplotlib.pyplot as plt
from collections import defaultdict

label_counts = {'train': defaultdict(int), 'validation': defaultdict(int), 'test': defaultdict(int)}
for split in ['train', 'validation', 'test']:
    for example in dataset[split]:
        for tag in example['labels']:
            if tag != -100:  
                label = label_names[tag]
                label_counts[split][label] += 1
fig, ax = plt.subplots(figsize=(12, 6))  
width = 0.2
x = range(len(label_names))
for i, split in enumerate(['train', 'validation', 'test']):
    counts = [label_counts[split][label] for label in label_names]
    ax.bar([xi + i * width for xi in x], counts, width, label=split)

ax.set_ylabel('Counts (log scale)')
ax.set_yscale('log')  
ax.set_title('Label distribution in each split')
ax.set_xticks([xi + width for xi in x])
ax.set_xticklabels(label_names)
ax.legend()

plt.show()

# Define Model

In [None]:
for split in ['train', 'validation', 'test']:
    sample = dataset[split][140]
    tokens = sample['tokens']
    labels = sample['labels']
    labels = [label_names[label] if label != -100 else '-100' for label in labels]
    print(f'Split: {split}')
    print(sample['file_names'])
    for token, label in zip(tokens, labels):
        print(f'Token: {token:<15} Label: {label}')
    print()

In [None]:
id2label = {str(i): label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

In [None]:
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

In [None]:
# Metrics
import evaluate

metric = evaluate.load("seqeval")
import numpy as np

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)
    return all_metrics

In [None]:
model.config.num_labels

# BERT

## Hyper-tuning

### Baseline Hyperparameters

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="cadec_baseline", 
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
)

In [None]:
from transformers import Trainer

trainer_0 = Trainer(
    model=model,
    args=args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer_0.train()

In [None]:
predictions, labels, _ = trainer_0.predict(processed_dataset["validation"])

predicted_indices = np.argmax(predictions, axis=-1)

In [None]:
import numpy as np

def get_preds_and_labels(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    return true_labels, true_predictions

In [None]:
true_labels,true_predictions = get_preds_and_labels((predictions, labels))
print(true_labels[0])
print(true_predictions[0])

In [None]:
all_metrics = metric.compute(predictions=true_predictions, references=true_labels)

In [None]:
all_metrics

### Other Combinations

In [None]:
def compute_metrics_tune(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

In [None]:
from transformers import TrainingArguments
def optuna_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64]),
        "weight_decay": trial.suggest_float("weight_decay", 0.0, 0.3),
        "warmup_steps": trial.suggest_int("warmup_steps", 0, 500),
        "gradient_accumulation_steps": trial.suggest_int("gradient_accumulation_steps", 1, 10),
    }

def model_init(trial):
    return AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

args_tuner = TrainingArguments(
    output_dir='./results',         
    evaluation_strategy="epoch",
    optim="adamw_torch",
    num_train_epochs=0.3,             
    per_device_train_batch_size=16, 
    per_device_eval_batch_size=64,  
    warmup_steps=500,               
    weight_decay=0.01,              
    logging_dir='./logs',          
    logging_steps=10,
    gradient_accumulation_steps=1,  
)

In [None]:
from transformers import Trainer
trainer_tuner = Trainer(
    model=None,
    args=args_tuner,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
    model_init=model_init,
    data_collator=data_collator,
)

In [None]:
best_trial = trainer_tuner.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=20
)

In [None]:
print(f"Best Trial Score: {best_trial.objective}")
print(f"Best Hyperparameters: {best_trial.hyperparameters}")

#### Model 1

In [None]:
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

In [None]:
args_test = []
args_test.append(TrainingArguments(
    output_dir="./results", 
    optim="adamw_torch",
    save_strategy="epoch",
    per_device_train_batch_size=32,  
    per_device_eval_batch_size=64,   
    learning_rate=5.97795988468052e-05,
    num_train_epochs=3,
    weight_decay=0.29365893220385486,
    warmup_steps=176,
    gradient_accumulation_steps=6,
    logging_dir='./logs',      
    logging_steps=10,
    push_to_hub=False,
    evaluation_strategy="steps",
    eval_steps=100, 
))

In [None]:
from transformers import Trainer
trainer_test = []
trainer_test.append(Trainer(
    model=model,
    args=args_test[0],
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
))
trainer_test[0].train()


In [None]:
trainer_test[0].state.log_history

In [None]:
predictions, labels, _ = trainer_test[0].predict(processed_dataset["validation"])
predicted_indices = np.argmax(predictions, axis=-1)

In [None]:
true_labels,true_predictions = get_preds_and_labels((predictions, labels))

In [None]:
all_metrics = []
all_metrics.append(metric.compute(predictions=true_predictions, references=true_labels))

In [None]:
all_metrics[0]

#### Model 2

In [None]:
args_test.append(TrainingArguments(
    output_dir="./results", 
    optim="adamw_torch",
    save_strategy="epoch",
    per_device_train_batch_size=16, 
    per_device_eval_batch_size=64,   
    learning_rate=5.753308827948429e-05,
    num_train_epochs=3,
    weight_decay=0.2990812548264342,
    warmup_steps=213,
    gradient_accumulation_steps=8,
    logging_dir='./logs',        
    logging_steps=10,
    push_to_hub=False,
    evaluation_strategy="epoch",
))

In [None]:
trainer_test.append(Trainer(
    model=model,
    args=args_test[1],
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
))
trainer_test[1].train()

In [None]:
predictions, labels, _ = trainer_test[1].predict(processed_dataset["validation"])
predicted_indices = np.argmax(predictions, axis=-1)
true_labels,true_predictions = get_preds_and_labels((predictions, labels))
all_metrics.append(metric.compute(predictions=true_predictions, references=true_labels))

In [None]:
all_metrics[1]

#### Model 3

In [None]:
from transformers import TrainingArguments
from transformers import Trainer
args_test = TrainingArguments(
    output_dir="./results", 
    optim="adamw_torch",
    save_strategy="epoch",
    per_device_train_batch_size=32, 
    per_device_eval_batch_size=64,  
    learning_rate=9.367839518177488e-05,
    num_train_epochs=3,
    weight_decay=0.26268932261420513,
    warmup_steps=61,
    gradient_accumulation_steps=7,
    logging_dir='./logs',         
    logging_steps=10,
    push_to_hub=False,
    evaluation_strategy="epoch",
)

In [None]:
trainer_test = Trainer(
    model=model,
    args=args_test,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
)
trainer_test.train()

In [None]:
predictions, labels, _ = trainer_test.predict(processed_dataset["validation"])
predicted_indices = np.argmax(predictions, axis=-1)
true_labels,true_predictions = get_preds_and_labels((predictions, labels))
all_metrics=metric.compute(predictions=true_predictions, references=true_labels)

In [None]:
all_metrics

## Complete Training

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="cadec_baseline", 
    logging_dir='./logs',
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
    logging_steps=10,
    evaluation_strategy="steps",
    eval_steps=100, 
)

In [None]:
from datasets import concatenate_datasets

train_val_dataset = concatenate_datasets([processed_dataset["train"], processed_dataset["validation"]])

trainer_1 = Trainer(
    model=model,
    args=args,
    train_dataset=train_val_dataset,  
    eval_dataset=processed_dataset["test"],
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
)
trainer_1.train()

In [None]:
trainer_1.state.log_history

### Reports

In [None]:
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(
    'cadec_baseline/checkpoint-2280',
    id2label=id2label,
    label2id=label2id,
)

In [None]:
from datasets import concatenate_datasets
from transformers import Trainer

train_val_dataset = concatenate_datasets([processed_dataset["train"], processed_dataset["validation"]])

trainer_1 = Trainer(
    model=model,
    args=args,
    train_dataset=train_val_dataset,  
    eval_dataset=processed_dataset["test"],  
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
)

In [None]:
predictions, labels, _ = trainer_1.predict(processed_dataset["test"])
predicted_indices = np.argmax(predictions, axis=-1)
true_labels,true_predictions = get_preds_and_labels((predictions, labels))
all_metrics=metric.compute(predictions=true_predictions, references=true_labels)

In [None]:
all_metrics

In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix

flat_true_labels = [label for sublist in true_labels for label in sublist]
flat_true_predictions = [pred for sublist in true_predictions for pred in sublist]

label_names = ['O', 'B-Drug', 'I-Drug', 'B-ADR', 'I-ADR', 'B-Disease', 'I-Disease', 'B-Symptom', 'I-Symptom', 'B-Finding', 'I-Finding']

cm = confusion_matrix(flat_true_labels, flat_true_predictions, labels=label_names)

short_labels = [label for label in label_names]
df_cm = pd.DataFrame(cm, index=short_labels, columns=short_labels)

df_cm_percentage = (df_cm / df_cm.sum().sum() * 100).round(3)


pd.set_option('display.expand_frame_repr', False)
df_cm_percentage_str = df_cm_percentage.to_string().replace('.000', '    ')

print(df_cm_percentage_str)

In [None]:
df_cm_percentage_latex = df_cm_percentage.to_latex(float_format="%.3f")

print(df_cm_percentage_latex)

In [None]:
print(df_cm)

In [None]:
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report

y_true = true_labels
y_pred = true_predictions

print("Precision: ", precision_score(y_true, y_pred))
print("Recall: ", recall_score(y_true, y_pred))
print("F1-Score: ", f1_score(y_true, y_pred))
print("\nClassification Report: ")
print(classification_report(y_true, y_pred))

In [None]:
label_names

# Albert

## Data Preparation

In [None]:
model_checkpoint = "albert-base-v2"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [None]:
i=145
print(dataset[i]['file_names'])
tokens = dataset[i]['tokens']
tags = dataset[i]['labels']

for token, tag in zip(tokens, tags):
    if tag != -100:
        tag = label_names[tag]
    print(f"Token: {token}, Tag: {tag}")

In [None]:
dataset

In [None]:
processed_dataset

In [None]:
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

## Hyper-tuning

### Baseline Hyperparameters

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="albert", 
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
)


In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()

### Other Combinations

In [None]:
from transformers import Trainer
trainer_tuner = Trainer(
    model=None,
    args=args_tuner,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
    model_init=model_init,
    data_collator=data_collator,
)

In [None]:
best_trial = trainer_tuner.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=20
)

#### Model 1

In [None]:
# [I 2024-01-03 11:24:10,550]
# Trial 10 finished with value: 2.066261419601351 and parameters: {'learning_rate': 9.55234940591104e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0.08938621127315785, 'warmup_steps': 223, 'gradient_accumulation_steps': 1}. Best is trial 10 with value: 2.066261419601351.
# Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# {'eval_loss': 0.39240533113479614, 'eval_precision': 0.5277108433734939, 'eval_recall': 0.292, 'eval_f1': 0.37596566523605146, 'eval_accuracy': 0.8705849109918056, 'eval_runtime': 2.3539, 'eval_samples_per_second': 258.3, 'eval_steps_per_second': 4.248, 'epoch': 0.3}
# {'train_runtime': 12.1387, 'train_samples_per_second': 135.163, 'train_steps_per_second': 8.485, 'train_loss': 0.7450381980358975, 'epoch': 0.3}

In [None]:
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

In [None]:
args_test = []
args_test.append(TrainingArguments(
    output_dir = "./albert_test",
    optim="adamw_torch",
    per_device_train_batch_size=16,  
    learning_rate=9.55234940591104e-05,
    num_train_epochs=3,
    weight_decay=0.08938621127315785,
    warmup_steps=223,
    gradient_accumulation_steps=1,
    push_to_hub=False,
    evaluation_strategy="epoch",
))

In [None]:
from transformers import Trainer
trainer_test = []
trainer_test.append(Trainer(
    model=model,
    args=args_test[0],
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
))
trainer_test[0].train()

#### Model 2

In [None]:
# [I 2024-01-03 11:27:29,424] Trial 17 finished with value: 2.3091917346772814 and parameters: {'learning_rate': 5.643324435863421e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0.09606971752344114, 'warmup_steps': 183, 'gradient_accumulation_steps': 2}. Best is trial 17 with value: 2.3091917346772814.
# Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# {'eval_loss': 0.3853124678134918, 'eval_precision': 0.42016806722689076, 'eval_recall': 0.5333333333333333, 'eval_f1': 0.47003525264394835, 'eval_accuracy': 0.8856550814731091, 'eval_runtime': 2.3471, 'eval_samples_per_second': 259.041, 'eval_steps_per_second': 4.261, 'epoch': 0.3}
# {'train_runtime': 11.9747, 'train_samples_per_second': 137.014, 'train_steps_per_second': 4.343, 'train_loss': 1.2237023264169693, 'epoch': 0.3}

In [None]:
args_test = []
args_test.append(TrainingArguments(
    output_dir = "./albert_test",
    optim="adamw_torch",
    per_device_train_batch_size=16, 
    learning_rate=5.643324435863421e-05,
    num_train_epochs=3,
    weight_decay=0.09606971752344114,
    warmup_steps=183,
    gradient_accumulation_steps=2,
    push_to_hub=False,
    evaluation_strategy="epoch",
))

In [None]:
from transformers import Trainer
trainer_test = []
trainer_test.append(Trainer(
    model=model,
    args=args_test[0],
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
))
trainer_test[0].train()

#### Model 3

In [None]:
# [I 2024-01-03 11:29:46,281] Trial 19 finished with value: 2.26373217447632 and parameters: {'learning_rate': 3.527893281297765e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0.03473836154590933, 'warmup_steps': 458, 'gradient_accumulation_steps': 1}. Best is trial 17 with value: 2.3091917346772814.
# {'eval_loss': 0.37651291489601135, 'eval_precision': 0.4409937888198758, 'eval_recall': 0.47333333333333333, 'eval_f1': 0.4565916398713826, 'eval_accuracy': 0.8928134124517284, 'eval_runtime': 2.34, 'eval_samples_per_second': 259.825, 'eval_steps_per_second': 4.273, 'epoch': 0.3}
# {'train_runtime': 12.0941, 'train_samples_per_second': 135.661, 'train_steps_per_second': 8.517, 'train_loss': 1.201701590158407, 'epoch': 0.3}

In [None]:
args_test = []
args_test.append(TrainingArguments(
    output_dir = "./albert_test",
    optim="adamw_torch",
    per_device_train_batch_size=16, 
    learning_rate=3.527893281297765e-05,
    num_train_epochs=3,
    weight_decay=0.03473836154590933,
    warmup_steps=458,
    gradient_accumulation_steps=1,
    push_to_hub=False,
    evaluation_strategy="epoch",
))

In [None]:
from transformers import Trainer
trainer_test = []
trainer_test.append(Trainer(
    model=model,
    args=args_test[0],
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
))
trainer_test[0].train()

## Complete Training

In [None]:
from datasets import concatenate_datasets

train_val_dataset = concatenate_datasets([processed_dataset["train"], processed_dataset["validation"]])

trainer_1 = Trainer(
    model=model,
    args=args,
    train_dataset=train_val_dataset,
    eval_dataset=processed_dataset["test"], 
    data_collator=data_collator,
    compute_metrics=compute_metrics_tune,
    tokenizer=tokenizer,
)
trainer_1.train()

### Reports

In [None]:
predictions, labels, _ = trainer_1.predict(processed_dataset["test"])
predicted_indices = np.argmax(predictions, axis=-1)
true_labels,true_predictions = get_preds_and_labels((predictions, labels))
all_metrics=metric.compute(predictions=true_predictions, references=true_labels)

In [None]:
all_metrics

In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix

flat_true_labels = [label for sublist in true_labels for label in sublist]
flat_true_predictions = [pred for sublist in true_predictions for pred in sublist]

cm = confusion_matrix(flat_true_labels, flat_true_predictions, labels=label_names)
short_labels = [label for label in label_names]

df_cm = pd.DataFrame(cm, index=short_labels, columns=short_labels)
df_cm_percentage = (df_cm / df_cm.sum().sum() * 100).round(3)
pd.set_option('display.expand_frame_repr', False)
df_cm_percentage_str = df_cm_percentage.to_string().replace('.000', '    ')

print(df_cm_percentage_str)

In [None]:
df_cm_percentage_latex = df_cm_percentage.to_latex(float_format="%.3f")

print(df_cm_percentage_latex)

In [None]:
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
y_true = true_labels
y_pred = true_predictions

print("Precision: ", precision_score(y_true, y_pred))
print("Recall: ", recall_score(y_true, y_pred))
print("F1-Score: ", f1_score(y_true, y_pred))
print("\nClassification Report: ")
print(classification_report(y_true, y_pred))