In [11]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Download the model and tokenizer
model_name = "mccoole/ModernBERT-large-ade-corpus-v2-classification"
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-large")
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Create a classification pipeline
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Example sentences to test
examples = [
    "The patient developed a rash after taking penicillin.",
    "The patient was given penicillin with no adverse effects.",
    "After starting the new medication, she experienced severe headaches and dizziness.",
    "The treatment was well-tolerated with no complications."
]

# Run classification on examples
for example in examples:
    result = classifier(example)
    print(f"Text: {example}")
    print(f"Prediction: {result[0]['label']}, Confidence: {result[0]['score']:.4f}\n")

Device set to use cpu


Text: The patient developed a rash after taking penicillin.
Prediction: LABEL_0, Confidence: 0.7138

Text: The patient was given penicillin with no adverse effects.
Prediction: LABEL_0, Confidence: 0.7581

Text: After starting the new medication, she experienced severe headaches and dizziness.
Prediction: LABEL_0, Confidence: 0.6555

Text: The treatment was well-tolerated with no complications.
Prediction: LABEL_0, Confidence: 0.7070



In [None]:
from datasets import load_dataset
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

# Load the dataset
dataset = load_dataset("ade_corpus_v2", "Ade_corpus_v2_classification")
test_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)["test"]

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_test = test_dataset.map(tokenize_function, batched=True)
tokenized_test = tokenized_test.remove_columns(["text"])
tokenized_test.set_format("torch")

# Define compute_metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Set up Trainer for evaluation
training_args = TrainingArguments(
    output_dir="./results",
    do_train=False,
    do_predict=True,
    per_device_eval_batch_size=16,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_test,
    compute_metrics=compute_metrics
)

# Run evaluation
metrics = trainer.evaluate()
print(metrics)