In [17]:
from transformers import pipeline
import pandas as pd
import re
import json
# Load the emotion detection model with the tokenizer
emotion_detector = pipeline(model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=None) 

In [18]:
# Function to preprocess text data
def preprocess_text_safe(text):
    if pd.isnull(text):
        return ""
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\d+', '', text)  # Remove digits
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    text = re.sub(r'[^\w\s]', '', text)  # Remove special characters
    return text.strip()

In [14]:
# Load the CSV file
file_path = 'data/dummy.json'  
data = pd.read_json(file_path)

text = preprocess_text_safe(data["utterance"][0])
print(text)


the blakes got divorced


In [19]:
# Function to detect emotions with text truncation
def detect_emotion(text):
    try:
        # Pass the raw text directly to the emotion detector
        return emotion_detector(text, truncation=True)
    except Exception as e:
        print(f"Error during emotion detection: {e}")
        return None

# Example usage
sample_text = text
print("Sample text:", sample_text)

detected_emotions = detect_emotion(sample_text)
print("Detected emotions:", detected_emotions)

new_list = [{entry['label']: entry['score']} for entry in detected_emotions[0]]
print(new_list)

Sample text: the blakes got divorced
Detected emotions: [[{'label': 'sadness', 'score': 0.9458025097846985}, {'label': 'anger', 'score': 0.04597806558012962}, {'label': 'joy', 'score': 0.0034245906863361597}, {'label': 'fear', 'score': 0.0022369963116943836}, {'label': 'love', 'score': 0.0019880265463143587}, {'label': 'surprise', 'score': 0.0005698736640624702}]]
[{'sadness': 0.9458025097846985}, {'anger': 0.04597806558012962}, {'joy': 0.0034245906863361597}, {'fear': 0.0022369963116943836}, {'love': 0.0019880265463143587}, {'surprise': 0.0005698736640624702}]


In [29]:
def calculate_emotion(filename):
    emotion_level_list = []
    try: 
        with open(filename, 'r') as file:
            data = json.load(file)
        if "utterance" in data:
            for utterance in data["utterance"]:
                clean_text = preprocess_text_safe(utterance)
                detected_emotions = detect_emotion(clean_text)
                emotion_level = {}
                for entry in detected_emotions[0]:
                    emotion_level[entry['label']] =  entry['score']
                print(emotion_level)
                emotion_level_list.append(emotion_level)
            return emotion_level_list
    except FileNotFoundError:
        print(f'Error: File not found - {filename}')
    except json.JSONDecodeError:
        print(f'Error: Invalid JSON format in file - {filename}')
    except Exception as e:
        print(f'Error: {str(e)}')    



calculate the emotion level to take out the keys and help determint the mood for response

In [30]:
filename = "data/dummy.json"
list_e = calculate_emotion(filename)

{'sadness': 0.9458025097846985, 'anger': 0.04597806558012962, 'joy': 0.0034245906863361597, 'fear': 0.0022369963116943836, 'love': 0.0019880265463143587, 'surprise': 0.0005698736640624702}
{'anger': 0.8305054306983948, 'joy': 0.07060625404119492, 'fear': 0.05335665121674538, 'sadness': 0.03337106853723526, 'surprise': 0.007018758915364742, 'love': 0.005141896661370993}
{'anger': 0.9490878582000732, 'fear': 0.03036593832075596, 'joy': 0.012298641726374626, 'sadness': 0.006780564319342375, 'surprise': 0.0009627295657992363, 'love': 0.000504308205563575}
{'surprise': 0.9292944669723511, 'fear': 0.03784411773085594, 'anger': 0.01936550810933113, 'joy': 0.008322379551827908, 'sadness': 0.003525329055264592, 'love': 0.0016482097562402487}
{'joy': 0.9982621073722839, 'love': 0.000590162118896842, 'surprise': 0.0005803056410513818, 'sadness': 0.00022092119615990669, 'anger': 0.0002050536422757432, 'fear': 0.0001416382729075849}
{'surprise': 0.9865759015083313, 'joy': 0.006061989348381758, 'fea

In [31]:
print(len(list_e))

2405


In [32]:
def write_data(filename, e_lst):
    with open(filename, 'r') as json_file:
        existing_data = json.load(json_file)

    existing_data["emotion_level"] = e_lst

    with open(filename, 'w') as json_file:
        json.dump(existing_data, json_file, indent=2)


In [33]:
write_data("data/dummy.json", list_e)