# Emotion Classification with BERT
This notebook demonstrates training a BERT model to classify emotions in text using the Hugging Face `transformers` and `datasets` libraries.

## 1. Install Dependencies
If not installed, uncomment the following lines to install the required libraries.

In [None]:
# !pip install datasets transformers scikit-learn torch

## 2. Load the Dataset

In [None]:
from datasets import load_dataset
dataset = load_dataset("emotion")
print(dataset)

Extract the texts and labels for training and testing.

In [None]:
train_texts = dataset["train"]["text"]
train_labels = dataset["train"]["label"]
test_texts = dataset["test"]["text"]
test_labels = dataset["test"]["label"]

## 3. Load BERT Tokenizer

In [None]:
from transformers import AutoTokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

Define a tokenization function to encode the texts.

In [None]:
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)
encoded_dataset = dataset.map(tokenize, batched=True)
encoded_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
encoded_dataset

## 4. Load BERT Model for Sequence Classification

In [None]:
from transformers import AutoModelForSequenceClassification
num_labels = dataset["train"].features["label"].num_classes
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

## 5. Training Setup

In [None]:
from transformers import Trainer, TrainingArguments
import os
os.environ["WANDB_DISABLED"] = "true"
training_args = TrainingArguments(
    output_dir="./results",
    do_eval=True,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=500,
    save_total_limit=2
    )
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["test"],
    tokenizer=tokenizer,
)

## 6. Train the Model

In [None]:
trainer.train()

## 7. Evaluate the Model

In [None]:
from sklearn.metrics import classification_report
preds = trainer.predict(encoded_dataset["test"])
y_pred = preds.predictions.argmax(-1)
y_true = encoded_dataset["test"]["label"]
print(classification_report(y_true, y_pred, target_names=dataset["train"].features["label"].names))

## 8. Save the Model and Tokenizer
This allows you to reload the model later or use it in a separate application.

In [None]:
model.save_pretrained("./sentiment-bert")
tokenizer.save_pretrained("./sentiment-bert")