<a href="https://colab.research.google.com/github/HatemMoushir/FIX-protocol-Oracle/blob/main/AG_News.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ Notebooks: AG News & IMDb training with DistilBERT

# --------- Part 1: AG News Training ---------

!pip install -q datasets transformers evaluate pandas
#!pip install -q evaluate

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import evaluate

# Load dataset
ag_dataset = load_dataset("ag_news")

# Load tokenizer and tokenize data
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length")

ag_encoded = ag_dataset.map(tokenize, batched=True)

# Load model for 4-class classification
ag_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4)

# Evaluation metric
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# Training arguments
ag_args = TrainingArguments(
    output_dir="ag_news_model",
    evaluation_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    logging_steps=100,
    save_strategy="epoch",
    load_best_model_at_end=True,
)

ag_trainer = Trainer(
    model=ag_model,
    args=ag_args,
    train_dataset=ag_encoded["train"].select(range(10000)),
    eval_dataset=ag_encoded["test"].select(range(1000)),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train AG News model
ag_trainer.train()

ModuleNotFoundError: No module named 'evaluate'