<a href="https://colab.research.google.com/github/Kgo890/AI_Fake_News_Detector/blob/main/emotion_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

!jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace emotion_model.ipynb

In [None]:
!pip install -q transformers datasets neattext

In [None]:
import os
os.environ['WANDB_DISABLED'] = "true"
os.environ["WANDB_SILENT"] = "true"
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import torch
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
dataset = load_dataset("dair-ai/emotion")
label_names = dataset["train"].features["label"].names

In [None]:
MODEL = "bhadresh-savani/roberta-base-emotion"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=6)

In [None]:
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

In [None]:
tokenized_dataset = dataset.map(tokenize, batched=True)
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [None]:
training_args = TrainingArguments(
    output_dir="./roberta-emotion-pytorch",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_steps=50,
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="accuracy"
)

In [None]:
def compute_metrics(pred):
    preds = np.argmax(pred.predictions, axis=1)
    labels = pred.label_ids
    accuracy = np.mean(preds == labels)
    return {"accuracy": accuracy}

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

In [None]:
predictions_output = trainer.predict(tokenized_dataset["test"])
y_pred = np.argmax(predictions_output.predictions, axis=1)
y_true = predictions_output.label_ids


In [None]:
print("\nClassification Report:\n")
print(classification_report(y_true, y_pred, target_names=label_names))

In [None]:
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_names, yticklabels=label_names)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
trainer.save_model("roberta-emotion-model")
tokenizer.save_pretrained("roberta-emotion-model")

In [None]:
def predict_emotion(text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)

    probs = outputs.logits.softmax(dim=1)
    predicted_class = probs.argmax().item()
    return label_names[predicted_class], float(probs[0][predicted_class])

predict_emotion("I feel so empty and hopeless today.")