In [None]:
from datasets import load_dataset
import pandas as pd
import os

csv_path = "/kaggle/working/emotion_dataset.csv"

# Load dataset or CSV
if not os.path.exists(csv_path):
    dataset = load_dataset("dair-ai/emotion")
    df = dataset["train"].to_pandas()

    # Map label ints to strings
    label_map = dataset["train"].features["label"].int2str
    df["emotion"] = df["label"].map(label_map)

    df.to_csv(csv_path, index=False)
else:
    df = pd.read_csv(csv_path)

    # Add emotion column if missing
    if "emotion" not in df.columns:
        dataset = load_dataset("dair-ai/emotion", split="train")  # just for label mapping
        label_map = dataset.features["label"].int2str
        df["emotion"] = df["label"].map(label_map)

df.head()


### main

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
from collections import defaultdict
import torch
import numpy as np
import pandas as pd
import os

# 1. Load Emotion Classifiers (Ensemble)
ensemble_models = [
    pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None),
    pipeline("text-classification", model="bhadresh-savani/bert-base-go-emotion", top_k=None)
]


# 2. Load LLaMA
llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")  # local path or HuggingFace
llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
llama_model.to("cuda" if torch.cuda.is_available() else "cpu")

# 3. Load Flan-T5 as fallback
flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
flan_model.to("cuda" if torch.cuda.is_available() else "cpu")

# 4. Rule-based fallback
template_responses = {
    "joy": "That's wonderful to hear! 😊",
    "sadness": "I'm really sorry you're feeling this way. You're not alone — I'm here for you. 💙",
    "anger": "I understand you're upset. It's okay to feel this way — want to talk about it?",
    "fear": "It's okay to feel overwhelmed sometimes. Take a breath — you’re doing your best. 🧘",
    "surprise": "Wow, that does sound unexpected. Thanks for sharing it with me!",
    "love": "That’s so heartwarming. It’s beautiful to feel this way. ❤️",
    "neutral": "Thanks for sharing that. I'm listening. 🤝",
    "excitement": "That's amazing! 🎉 So happy for you!"
}

def generate_with_flan(emotion, user_input):
    prompt = f"The user is feeling {emotion}. They said: '{user_input}'. Reply empathetically."
    inputs = flan_tokenizer(prompt, return_tensors="pt", truncation=True).to(flan_model.device)
    outputs = flan_model.generate(**inputs, max_new_tokens=80)
    return flan_tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
def validate_response_with_emotion(response, emotion):
    emotion_keywords = {
        "joy": ["happy", "joy", "wonderful", "delight", "glad", "excited"],
        "sadness": ["sorry", "sad", "lonely", "down", "blue", "hurt"],
        "anger": ["angry", "upset", "mad", "furious", "frustrated"],
        "fear": ["scared", "afraid", "anxious", "nervous", "worried"],
        "love": ["love", "caring", "affection", "heartwarming"],
        "surprise": ["surprised", "unexpected", "shocked"],
        "excitement": ["excited", "thrilled", "pumped", "enthusiastic"],
        "neutral": ["okay", "understood", "noted", "alright"]
    }
    return any(word in response.lower() for word in emotion_keywords.get(emotion, []))


In [None]:
def run_full_emotion_pipeline(user_input):
    # ---- Step 1: Emotion Detection ----
    score_accumulator = defaultdict(list)
    for model in ensemble_models:
        results = model(user_input)[0]
        for r in results:
            score_accumulator[r["label"].lower()].append(r["score"])
    
    averaged = {k: np.mean(v) for k, v in score_accumulator.items()}
    top_emotion = max(averaged, key=averaged.get)

    # ---- Step 2: LLaMA Empathetic Response ----
    llama_prompt = (
    "You are an empathetic chatbot. Below are examples of how you respond to emotions:\n"
    "User is feeling sadness. Message: 'I feel so down.' → Reply: 'I'm really sorry you're feeling this way. You're not alone.'\n"
    "User is feeling joy. Message: 'I got a promotion!' → Reply: 'That's amazing! 🎉 So happy for you!'\n"
    "User is feeling fear. Message: 'I'm scared about tomorrow.' → Reply: 'It's okay to feel overwhelmed sometimes. I'm here for you.'\n"
    f"User is feeling {top_emotion}. Message: '{user_input}' → Reply:"
        )

    llama_ids = llama_tokenizer(llama_prompt, return_tensors="pt", truncation=True).to(llama_model.device)
    llama_out = llama_model.generate(**llama_ids, max_new_tokens=120, do_sample=True, temperature=0.7, top_p=0.9)
    llama_response = llama_tokenizer.decode(llama_out[0], skip_special_tokens=True)

    # ---- Step 3: Postprocess LLaMA Output ----
    reply_parts = llama_response.split("Reply:")
    cleaned_llama = reply_parts[-1].strip() if len(reply_parts) > 1 else llama_response.strip()

    # ---- Step 4: Rule-based or Flan Fallback ----
    # Check if LLaMA output is poor (empty, generic, or unrelated to the emotion)
    if (
        len(cleaned_llama.strip()) < 5 or
        "reply kindly" in cleaned_llama.lower() or
        (top_emotion not in cleaned_llama.lower() and not validate_response_with_emotion(cleaned_llama, top_emotion))
    ):
        try:
            # Try Flan fallback
            final_response = generate_with_flan(top_emotion, user_input)
            llama_source = "🤖 (Used Flan-T5 fallback)"
        except:
            # If Flan also fails, fallback to rule-based
            final_response = template_responses.get(top_emotion, "I'm here for you.")
            llama_source = "📜 (Used Rule-based fallback)"
    else:
        final_response = cleaned_llama
        llama_source = "🦙 (LLaMA-generated)"

    # ---- Final Output ----
    # print(f"📝 User Input: {user_input}\n")
    # print(f"🧠 Emotion Detected: {top_emotion}")
    # print(f"{llama_source} Response: {final_response}")

    return {
        "emotion": top_emotion,
        "response": final_response,
        "source": llama_source
    }

In [None]:
def apply_chatbot(row):
    result = run_full_emotion_pipeline(row['text'])
    return pd.Series({
        'detected_emotion': result['emotion'],
        'generated_response': result['response']
    })

# Run the chatbot and append results
df_results = df.head(100).copy()
df_results[['detected_emotion', 'generated_response']] = df_results.apply(apply_chatbot, axis=1)
df_results.to_csv("/kaggle/working/chatbot_results_improved_100.csv", index=False)

In [None]:
df_results.to_csv("/kaggle/working/chatbot_results_100.csv", index=False)

In [None]:
df_test = dataset["test"].to_pandas()
df_test = df_test.head(100).copy()
from tqdm.notebook import tqdm
tqdm.pandas()

def apply_chatbot(row):
    result = run_full_emotion_pipeline(row['text'])  # This should return a dict with 'emotion' and 'response'
    return pd.Series({
        'detected_emotion': result['emotion'],
        'generated_response': result['response']
    })

# Apply chatbot
df_test[['detected_emotion', 'generated_response']] = df_test.progress_apply(apply_chatbot, axis=1)

In [None]:
df_test.to_csv("/kaggle/working/chatbot_test_results_100.csv", index=False)