In [None]:
import os
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_from_disk
from sklearn.metrics import accuracy_score, f1_score


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}


def train_model(features_path: str, model_path: str, logs_path: str, num_labels: int = 4) -> None:
    print(f"Loading features from {features_path}...")
    dataset = load_from_disk(features_path)
    tokenized_train = dataset["train"]
    tokenized_validation = dataset["validation"]
    tokenized_test = dataset["test"]

    print("Initializing BigBird model...")
    model = AutoModelForSequenceClassification.from_pretrained(
        "nbroad/bigbird-base-health-fact",
        num_labels=num_labels
    )

    print("Setting up training arguments...")
    training_args = TrainingArguments(
        output_dir=logs_path,
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
        save_total_limit=2,
        save_strategy="epoch",
        logging_dir=os.path.join(logs_path, "logs"),
        logging_steps=50,
        push_to_hub=False
    )

    print("Initializing Trainer...")
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_validation,
        tokenizer=None,
        compute_metrics=compute_metrics
    )

    print("Starting training...")
    trainer.train()

    print(f"Saving fine-tuned model to {model_path}...")
    trainer.save_model(model_path)

    print("Evaluating on the test set...")
    test_results = trainer.evaluate(eval_dataset=tokenized_test)
    print(f"Test Results: {test_results}")

    results_file = os.path.join(model_path, "test_results.txt")
    with open(results_file, "w") as f:
        f.write(str(test_results))
    print(f"Test results saved to {results_file}.")


if __name__ == "__main__":
    FEATURES_PATH = "./data/features"  
    MODEL_PATH = "./models/fine_tuned_bigbird_health_fact" 
    LOGS_PATH = "./models/logs"  

    os.makedirs(MODEL_PATH, exist_ok=True)
    os.makedirs(LOGS_PATH, exist_ok=True)

    train_model(FEATURES_PATH, MODEL_PATH, LOGS_PATH)


Loading features from ./data/features...
Initializing BigBird model...
Setting up training arguments...
Initializing Trainer...


  trainer = Trainer(


Starting training...


Attention type 'block_sparse' is not possible if sequence_length: 512 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...
                                                  
  7%|▋         | 257/3687 [41:15<39:16,  1.46it/s]

{'loss': 0.8063, 'grad_norm': 11.074920654296875, 'learning_rate': 1.972877678329265e-05, 'epoch': 0.04}


                                                  
  7%|▋         | 257/3687 [41:47<39:16,  1.46it/s]

{'loss': 0.7109, 'grad_norm': 20.083398818969727, 'learning_rate': 1.94575535665853e-05, 'epoch': 0.08}


                                                  
  7%|▋         | 257/3687 [42:19<39:16,  1.46it/s]

{'loss': 0.7065, 'grad_norm': 14.968084335327148, 'learning_rate': 1.918633034987795e-05, 'epoch': 0.12}




KeyboardInterrupt: 

In [26]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    print(f"Metrics computed: Accuracy={acc}, F1={f1}")
    return {"accuracy": acc, "f1": f1}

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset


tokenizer = AutoTokenizer.from_pretrained("nbroad/bigbird-base-health-fact")

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)


features_path = "./../data/features"
dataset = load_from_disk(features_path)
tokenized_test = dataset["test"]

num_labels = 4 
model = AutoModelForSequenceClassification.from_pretrained(
    "nbroad/bigbird-base-health-fact",
    num_labels=num_labels
)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=10
)

trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_test,
    compute_metrics=compute_metrics
)

print("Evaluating on the test set")
test_results = trainer.evaluate(eval_dataset=tokenized_test)
print(f"Test Results: {test_results}")


Attention type 'block_sparse' is not possible if sequence_length: 512 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


Evaluating on the test set...


100%|██████████| 78/78 [00:28<00:00,  2.69it/s]

Metrics computed: Accuracy=0.6736842105263158, F1=0.6465685614126215
Test Results: {'eval_loss': 1.2507350444793701, 'eval_model_preparation_time': 0.0013, 'eval_accuracy': 0.6736842105263158, 'eval_f1': 0.6465685614126215, 'eval_runtime': 29.766, 'eval_samples_per_second': 41.49, 'eval_steps_per_second': 2.62}





In [28]:
test_results

{'eval_loss': 1.2507350444793701,
 'eval_model_preparation_time': 0.0013,
 'eval_accuracy': 0.6736842105263158,
 'eval_f1': 0.6465685614126215,
 'eval_runtime': 29.766,
 'eval_samples_per_second': 41.49,
 'eval_steps_per_second': 2.62}