### Emotional annotation using VAD 



In [7]:
import nltk
import re
import numpy as np
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras import backend as K

nltk.download("punkt")
from nltk.corpus import stopwords

nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

# Preprocessing function
def preprocess(text):
    text = nltk.word_tokenize(text.lower())  # Lowercase and tokenize
    text = [w for w in text if not w in stop_words]  # Remove stop words
    text = " ".join(text)
    text = re.sub(r"[^a-zA-Z0-9.!?]+", r" ", text)  # Remove unwanted characters
    text = re.sub(r"\s+", r" ", text).strip()  # Remove extra spaces
    text = re.sub(r"[^\w\s]", "", text)  # Remove punctuation
    return text



[nltk_data] Downloading package punkt to /Users/stef_tse/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/stef_tse/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
# Classify V, A, D
def classify_variable(value, var_type):
    if var_type == "V":  # Valence
        if -1.0 <= value <= -0.6:
            return "highly negative"
        elif -0.6 < value <= -0.2:
            return "moderately negative"
        elif -0.2 < value <= 0.2:
            return "neutral"
        elif 0.2 < value <= 0.6:
            return "moderately positive"
        elif 0.6 < value <= 1.0:
            return "highly positive"
    elif var_type == "A":  # Arousal
        if -1.0 <= value <= -0.6:
            return "inactive"
        elif -0.6 < value <= -0.2:
            return "low arousal"
        elif -0.2 < value <= 0.2:
            return "neutral"
        elif 0.2 < value <= 0.6:
            return "moderate arousal"
        elif 0.6 < value <= 1.0:
            return "high arousal"
    elif var_type == "D":  # Dominance
        if -1.0 <= value <= -0.6:
            return "poor dominance"
        elif -0.6 < value <= -0.2:
            return "low dominance"
        elif -0.2 < value <= 0.2:
            return "neutral"
        elif 0.2 < value <= 0.6:
            return "moderate dominance"
        elif 0.6 < value <= 1.0:
            return "high dominance"


In [9]:
model_path="best_model_with_glove.h5"

# MSE metric
def mse(y_true, y_pred):
    return K.mean(K.square(y_pred - y_true))

# RMSE metric (if required for model loading)
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

# Load the model once and prepare tokenizer
def load_model_and_tokenizer(model_path):
    with tf.keras.utils.custom_object_scope({'mse': mse, 'rmse': rmse}):
        model = tf.keras.models.load_model(model_path)

    tokenizer = Tokenizer(num_words=20000, oov_token="<OOV>")
    return model, tokenizer

# Function to predict and describe
def predict_and_describe(model, tokenizer, input_text):
    words = input_text.split()
    if len(words) > 170:
        input_text = " ".join(words[:170])
    
    # Preprocess the input text
    preprocessed_text = preprocess(input_text)
    
    # Convert text data to sequences
    tokenizer.fit_on_texts([preprocessed_text])  # Fit the tokenizer on the preprocessed text
    preprocessed_text_seq = tokenizer.texts_to_sequences([preprocessed_text])
    
    max_length = 170
    padded_input = pad_sequences(preprocessed_text_seq, maxlen=max_length)

    # Make predictions
    predictions = model(tf.constant(padded_input))[0]
    predictions = predictions.numpy().flatten()
    
    # Extract V, A, D values
    V, A, D = predictions[0], predictions[1], predictions[2]
    
    # Classify each variable
    V_category = classify_variable(V, "V")
    A_category = classify_variable(A, "A")
    D_category = classify_variable(D, "D")
    
    # Generate description
    description = f"Text Analysis:\n- Valence (V): {V:.2f} ({V_category})\n- Arousal (A): {A:.2f} ({A_category})\n- Dominance (D): {D:.2f} ({D_category})"
    return description



In [10]:
# Main Execution
if __name__ == "__main__":
    model_path = "best_model_with_glove.h5"  
    model, tokenizer = load_model_and_tokenizer(model_path) 
    
    # Usage
    input_sentence= "i am deeply sad"
    
    # Predict and describe for different sentences
    result= predict_and_describe(model, tokenizer, input_sentence)
    
    print(result)
    

Text Analysis:
- Valence (V): -0.71 (highly negative)
- Arousal (A): 0.63 (high arousal)
- Dominance (D): -0.72 (poor dominance)
