In [None]:

# pip install transformers datasets evaluate


import torch
import numpy as np
import os
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import evaluate


os.environ["WANDB_DISABLED"] = "true"


# 👉 Option 1 (default): Load from Hugging Face (requires internet)
# dataset = load_dataset("emotion")

# 👉 Option 2: Load locally (if internet not available)
# Make sure you have previously saved the dataset using `dataset.save_to_disk("emotion_dataset")`
if os.path.exists("emotion_dataset"):
    dataset = DatasetDict.load_from_disk("emotion_dataset")
else:
    dataset = load_dataset("emotion")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

tokenized = dataset.map(tokenize, batched=True)
tokenized = tokenized.rename_column("label", "labels")
tokenized.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=6)

accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=preds, references=labels)

output_dir = "./emotion_model"
training_args = TrainingArguments(
    output_dir=output_dir,
    do_train=True,
    do_eval=True,
    num_train_epochs=4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=500,
    save_steps=500,
    save_total_limit=1,
    evaluation_strategy="epoch",  
    logging_dir="./logs",        
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    compute_metrics=compute_metrics,
)

trainer.train()

trainer.evaluate()

model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)


KeyboardInterrupt: 