In [None]:
# ===========================================
# üß† 04_Evaluation_and_Prediction_BERT_local.py
# Author: Reckless_Babu
# Description: Load trained BERT (multi-label) model
#              and predict emotions locally in VS Code
# ===========================================

# ================================
# üìö STEP 1: Import Libraries
# ================================
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, classification_report
import os

# ================================
# ‚öôÔ∏è STEP 2: Setup Device & Paths
# ================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"‚úÖ Using device: {device}")

# Path to your trained model folder (change if needed)
model_path = r"bert_multilabel_model"
csv_path = r"data/processed/go_emotions_dataset.csv"   # <-- change this if your CSV name is different

# ================================
# üß† STEP 3: Load Model + Tokenizer
# ================================
print("üì¶ Loading model and tokenizer...")
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)
model.to(device)
model.eval()
print("‚úÖ Model loaded successfully!")

# ================================
# üß© STEP 4: Prediction Function
# ================================
def predict_emotions(texts, model, tokenizer, label_cols, threshold=0.5):
    encoded = tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )
    encoded = {k: v.to(device) for k, v in encoded.items()}
    with torch.no_grad():
        outputs = model(**encoded)
        preds = torch.sigmoid(outputs.logits).cpu().numpy()
    preds_binary = (preds >= threshold).astype(int)
    return preds_binary

# ================================
# üß™ STEP 5: Evaluate on Dataset
# ================================
if os.path.exists(csv_path):
    print("üìä Evaluating on dataset...")
    df = pd.read_csv(csv_path)
    text_col = 'text'
    label_cols = df.columns.tolist()[3:]  # all emotions

    X_texts = df[text_col].astype(str).tolist()
    y_true = df[label_cols].astype(float).values

    y_pred = predict_emotions(X_texts, model, tokenizer, label_cols)

    micro_f1 = f1_score(y_true, y_pred, average='micro', zero_division=0)
    print(f"üéØ Micro F1-score: {micro_f1:.4f}")
    print("\nüìã Classification Report:\n")
    print(classification_report(y_true, y_pred, target_names=label_cols, zero_division=0))
else:
    print("‚ö†Ô∏è CSV not found ‚Äî skipping evaluation.")

# ================================
# üí¨ STEP 6: Real-Time Prediction
# ================================
label_cols = ['admiration','amusement','anger','annoyance','approval','caring','confusion','curiosity',
              'desire','disappointment','disapproval','disgust','embarrassment','excitement','fear',
              'gratitude','grief','joy','love','nervousness','optimism','pride','realization','relief',
              'remorse','sadness','surprise','neutral']

def get_emotion(text):
    preds = predict_emotions([text], model, tokenizer, label_cols)[0]
    emotions = [label_cols[i] for i, p in enumerate(preds) if p == 1]
    return emotions if emotions else ["neutral"]

print("\nüí¨ Enter text below (or press Enter to exit):")
while True:
    text = input("\nüó£Ô∏è Enter text: ")
    if text.strip() == "":
        break
    emotions = get_emotion(text)
    print(f"üé≠ Predicted Emotions: {emotions}")
