In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("data.csv")
df

Unnamed: 0,conversation_id,emotion_label,variant,conversation
0,1,happy,emotional,### Human: I finally got the job I was intervi...
1,1,happy,neutral,### Human: I received a job offer. ### Assista...
2,2,happy,emotional,### Human: My best friend just told me she's g...
3,2,happy,neutral,### Human: My best friend is getting married. ...
4,3,happy,emotional,### Human: This is the best birthday I've ever...
...,...,...,...,...
715,431,neutral,neutral,### Human: How do you properly dispose of old ...
716,432,neutral,neutral,### Human: I need to schedule my annual physic...
717,433,neutral,neutral,### Human: What is the atomic number of carbon...
718,434,neutral,neutral,### Human: The new software will be installed ...


In [11]:
df[df["emotion_label"]== 'happy']

Unnamed: 0,conversation_id,emotion_label,variant,conversation
0,1,happy,emotional,### Human: I finally got the job I was intervi...
1,1,happy,neutral,### Human: I received a job offer. ### Assista...
2,2,happy,emotional,### Human: My best friend just told me she's g...
3,2,happy,neutral,### Human: My best friend is getting married. ...
4,3,happy,emotional,### Human: This is the best birthday I've ever...
...,...,...,...,...
615,373,happy,neutral,### Human: The company announced an extra day ...
616,374,happy,emotional,### Human: The band I loved in high school is ...
617,374,happy,neutral,### Human: A band I like is having a reunion t...
618,375,happy,emotional,### Human: I just watched my son graduate from...


In [12]:
df[df["emotion_label"]== 'sad']

Unnamed: 0,conversation_id,emotion_label,variant,conversation
50,26,sad,emotional,"### Human: I heard back from the university, a..."
51,26,sad,neutral,### Human: My university application was rejec...
52,27,sad,emotional,### Human: My dog passed away this morning. Th...
53,27,sad,neutral,### Human: My dog passed away this morning. ##...
54,28,sad,emotional,### Human: I didn't get the promotion I was ho...
...,...,...,...,...
655,393,sad,neutral,### Human: I am being sued over a car accident...
656,394,sad,emotional,### Human: I had to sell my grandfather's watc...
657,394,sad,neutral,### Human: I sold my grandfather's watch to pa...
658,395,sad,emotional,### Human: My restaurant got a terrible review...


In [13]:
df[df["emotion_label"]== 'angry']

Unnamed: 0,conversation_id,emotion_label,variant,conversation
100,51,angry,emotional,### Human: The airline lost my luggage and the...
101,51,angry,neutral,### Human: The airline has lost my luggage. ##...
102,52,angry,emotional,### Human: My roommate used my things again wi...
103,52,angry,neutral,### Human: My roommate used my things without ...
104,53,angry,emotional,### Human: I just got a parking ticket even th...
...,...,...,...,...
695,413,angry,neutral,### Human: I disagree with the referee's calls...
696,414,angry,emotional,### Human: I lent my friend a sentimental obje...
697,414,angry,neutral,### Human: A friend lost a sentimental object ...
698,415,angry,emotional,### Human: The person in front of me at the co...


In [14]:
df[df["emotion_label"]== 'neutral']

Unnamed: 0,conversation_id,emotion_label,variant,conversation
150,76,neutral,neutral,### Human: What is the capital of Mongolia? ##...
151,77,neutral,neutral,### Human: Please set a timer for 15 minutes. ...
152,78,neutral,neutral,"### Human: I need to buy milk, eggs, and bread..."
153,79,neutral,neutral,### Human: The meeting is scheduled for 3 PM t...
154,80,neutral,neutral,### Human: How do you spell 'conscientious'? #...
...,...,...,...,...
715,431,neutral,neutral,### Human: How do you properly dispose of old ...
716,432,neutral,neutral,### Human: I need to schedule my annual physic...
717,433,neutral,neutral,### Human: What is the atomic number of carbon...
718,434,neutral,neutral,### Human: The new software will be installed ...


In [1]:
import pandas as pd
import numpy as np
import re
from io import StringIO
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# --- 1. SETUP: Load Data and Define Emotion Words ---

# For demonstration, we'll load your CSV data from a string.
# In a real scenario, you would use: df = pd.read_csv('your_file.csv')

df = pd.read_csv("data.csv")

# Define the list of explicit emotion words to check for.
# This list can be expanded for more comprehensive checks.
EMOTION_WORD_LIST = [
    'happy', 'sad', 'angry', 'excited', 'thrilled', 'furious', 'heartbroken', 
    'joyful', 'miserable', 'elated', 'frustrated', 'upset'
]

# Regex for finding emotion words in assistant responses (as requested)
# The \b ensures we match whole words only (e.g., "sad" but not "saddle")
LEAKAGE_REGEX = re.compile(r'\b(' + '|'.join(EMOTION_WORD_LIST) + r')\b', re.IGNORECASE)


# --- 2. HELPER FUNCTIONS ---

def preprocess_text(text):
    """Lowercase, tokenize, and remove stopwords/punctuation."""
    stop_words = set(stopwords.words('english'))
    # Remove punctuation and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text.lower())
    tokens = word_tokenize(text)
    return [word for word in tokens if word not in stop_words and len(word) > 1]

def calculate_jaccard_similarity(text1, text2):
    """Calculates Jaccard similarity between two texts."""
    set1 = set(preprocess_text(text1))
    set2 = set(preprocess_text(text2))
    
    if not set1 and not set2:
        return 1.0 # Both are empty
    if not set1 or not set2:
        return 0.0 # One is empty

    intersection = set1.intersection(set2)
    union = set1.union(set2)
    return len(intersection) / len(union)

def count_emotion_words(text):
    """Counts how many words from our list appear in the text."""
    words = preprocess_text(text)
    return sum(1 for word in words if word in EMOTION_WORD_LIST)

def parse_assistant_response(conversation):
    """Extracts only the Assistant's part of the conversation."""
    parts = conversation.split('### Assistant:')
    if len(parts) > 1:
        return parts[1].strip()
    return ""


# --- 3. MAIN ANALYSIS ---

# Initialize for results and leakage tracking
results = []
total_assistant_responses = 0
total_leakage_instances = 0

# Prepare for Cosine Similarity
# We need to fit the vectorizer on all conversations to build a vocabulary
corpus = df['conversation'].tolist()
vectorizer = TfidfVectorizer(tokenizer=preprocess_text)
tfidf_matrix = vectorizer.fit_transform(corpus)
# Create a mapping from original index to TF-IDF matrix row
index_to_matrix_row = {original_index: i for i, original_index in enumerate(df.index)}

# Group by conversation_id to process emotional-neutral pairs
grouped = df.groupby('conversation_id')

for name, group in grouped:
    # We are only interested in pairs
    if len(group) != 2:
        continue
        
    # As per the request, we skip processing if the label is 'neutral'.
    # This logic naturally works because we process pairs based on the emotional variant.
    emotional_row = group[group['variant'] == 'emotional']
    neutral_row = group[group['variant'] == 'neutral']
    
    # Ensure both variants exist for the pair
    if emotional_row.empty or neutral_row.empty:
        continue

    # Extract the single row from the filtered DataFrame
    emotional_row = emotional_row.iloc[0]
    neutral_row = neutral_row.iloc[0]
    
    # Get conversation texts
    emotional_text = emotional_row['conversation']
    neutral_text = neutral_row['conversation']
    
    # --- Check 1: Lexical Overlap for Factual Alignment ---
    jaccard_score = calculate_jaccard_similarity(emotional_text, neutral_text)
    
    # Cosine Similarity Calculation
    idx1 = index_to_matrix_row[emotional_row.name]
    idx2 = index_to_matrix_row[neutral_row.name]
    cosine_sim = cosine_similarity(tfidf_matrix[idx1], tfidf_matrix[idx2])[0][0]
    
    # --- Check 2: Frequency of Emotion Words ---
    emotional_word_count = count_emotion_words(emotional_text)
    neutral_word_count = count_emotion_words(neutral_text)
    
    # --- Check 3 & 4: Assistant Emotion Leakage ---
    # We check leakage in the assistant response for the emotional variant
    assistant_response = parse_assistant_response(emotional_text)
    leaked_words = []
    if assistant_response:
        total_assistant_responses += 1
        leaked_words = LEAKAGE_REGEX.findall(assistant_response)
        if leaked_words:
            total_leakage_instances += 1

    # Store results for this pair
    results.append({
        'conversation_id': name,
        'emotion_label': emotional_row['emotion_label'],
        'jaccard_similarity': round(jaccard_score, 4),
        'cosine_similarity_tfidf': round(cosine_sim, 4),
        'emotion_words_in_emotional': emotional_word_count,
        'emotion_words_in_neutral': neutral_word_count,
        'assistant_emotion_leakage': len(leaked_words) > 0,
        'leaked_words': leaked_words
    })


# --- 4. REPORTING ---

# Create a final DataFrame from the results
results_df = pd.DataFrame(results)

# Calculate overall leakage rate
leakage_rate = (total_leakage_instances / total_assistant_responses) * 100 if total_assistant_responses > 0 else 0

print("--- Lexical and Content Check Results ---")
print(results_df.to_string())
print("\n" + "="*50 + "\n")

print("--- Assistant Emotion Leakage Report ---")
print(f"Total Assistant Responses Scanned: {total_assistant_responses}")
print(f"Responses with Leaked Emotion Words: {total_leakage_instances}")
print(f"Leakage Rate: {leakage_rate:.2f}%")



--- Lexical and Content Check Results ---
     conversation_id emotion_label  jaccard_similarity  cosine_similarity_tfidf  emotion_words_in_emotional  emotion_words_in_neutral  assistant_emotion_leakage  leaked_words
0                  1         happy              0.5294                   0.5851                           1                         0                      False            []
1                  2         happy              0.6667                   0.7573                           1                         0                      False            []
2                  3         happy              0.6000                   0.6643                           0                         0                      False            []
3                  4         happy              0.5833                   0.7124                           0                         0                      False            []
4                  5         happy              0.8000                   0.8468    