In [None]:
!pip install transformers datasets sentencepiece scikit-learn
!pip install pandas datasets --quiet

In [None]:
#------------------------------------------ Emotion Classification (BERT) FINE - TUNING ------------------------------------------------

from google.colab import files
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Upload dataset
uploaded = files.upload()  # upload your emotion_dataset.csv

df = pd.read_csv(next(iter(uploaded)))  # first uploaded file
df = df[["text", "label"]].dropna()
num_labels = df["label"].nunique()

# Tokenization
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def preprocess(example):
    encoding = tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)
    encoding["label"] = example["label"]
    return encoding

dataset = Dataset.from_pandas(df)
tokenized_dataset = dataset.map(preprocess)

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

training_args = TrainingArguments(
    output_dir="./bert-emotion-custom",
    per_device_train_batch_size=8,
    num_train_epochs=3,
    save_strategy="no",
    logging_dir='./logs',
    save_total_limit=1
)

trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_dataset)
trainer.train()

model.save_pretrained("./bert-emotion-custom")
tokenizer.save_pretrained("./bert-emotion-custom")


In [None]:
#-------------------------------------------- TEST MODEL ------------------------------------------

from transformers import BertTokenizer, BertForSequenceClassification
import torch
import torch.nn.functional as F

# Load your fine-tuned model
model_path = "./bert-emotion-custom"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)

# Label mapping (same as used in training)
label2emotion = {
    0: "joy",
    1: "anger",
    2: "neutral",
    3: "excitement",
    4: "sadness"
}

def predict_emotion(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    probs = F.softmax(logits, dim=1)
    predicted_label = torch.argmax(probs, dim=1).item()
    return label2emotion[predicted_label], probs[0][predicted_label].item()

# Test it
text = "I love this game!"
emotion, confidence = predict_emotion(text)
print(f"Predicted Emotion: {emotion} (Confidence: {confidence:.2f})")
