### Emotional annotation using VAD (02)

In [14]:
import nltk
import re
import pickle
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download("punkt")
from nltk.corpus import stopwords

nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

# File paths for the pre-trained vectorizer and model
tfidf_save_path = "tfidf_vectorizer.pkl" 
regressor_chain_save_path = "regressor_chain_model.pkl"

# Preprocessing function
def preprocess(text):
    text = nltk.word_tokenize(text.lower())  # Lowercase and tokenize
    text = [w for w in text if w not in stop_words]  # Remove stop words
    text = " ".join(text)
    text = re.sub(r"[^a-zA-Z0-9.!?]+", r" ", text)  # Remove unwanted characters
    text = re.sub(r"\s+", r" ", text).strip()  # Remove extra spaces
    text = re.sub(r"[^\w\s]", "", text)  # Remove punctuation
    return text



[nltk_data] Downloading package punkt to /Users/stef_tse/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/stef_tse/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [15]:
# Function to classify V, A, D values
def classify_variable(value, var_type):
    if var_type == "V":  # Valence
        if -1.0 <= value <= -0.6:
            return "highly negative"
        elif -0.6 < value <= -0.2:
            return "moderately negative"
        elif -0.2 < value <= 0.2:
            return "neutral"
        elif 0.2 < value <= 0.6:
            return "moderately positive"
        elif 0.6 < value <= 1.0:
            return "highly positive"
    elif var_type == "A":  # Arousal
        if -1.0 <= value <= -0.6:
            return "inactive"
        elif -0.6 < value <= -0.2:
            return "low arousal"
        elif -0.2 < value <= 0.2:
            return "neutral"
        elif 0.2 < value <= 0.6:
            return "moderate arousal"
        elif 0.6 < value <= 1.0:
            return "stress"
    elif var_type == "D":  # Dominance
        if -1.0 <= value <= -0.6:
            return "poor dominance"
        elif -0.6 < value <= -0.2:
            return "low dominance"
        elif -0.2 < value <= 0.2:
            return "neutral"
        elif 0.2 < value <= 0.6:
            return "moderate dominance"
        elif 0.6 < value <= 1.0:
            return "high dominance"



In [19]:
import joblib

def load_pretrained_model(tfidf_path, regressor_chain_path):
    vectorizer = joblib.load(tfidf_path)
    model = joblib.load(regressor_chain_path)
    return vectorizer, model

# Predict and describe V, A, D values
def predict_and_describe(vectorizer, model, input_text):
    # Preprocess the input text
    preprocessed_text = preprocess(input_text)
    
    # Transform text into TF-IDF features
    X_input = vectorizer.transform([preprocessed_text])
    
    # Predict V, A, D values
    predictions = model.predict(X_input).flatten()
    
    # Extract V, A, D values
    V, A, D = predictions[0], predictions[1], predictions[2]
    
    # Classify each variable
    V_category = classify_variable(V, "V")
    A_category = classify_variable(A, "A")
    D_category = classify_variable(D, "D")
    
    # Generate description
    description = f"Text Analysis:\n- Valence (V): {V:.2f} ({V_category})\n- Arousal (A): {A:.2f} ({A_category})\n- Dominance (D): {D:.2f} ({D_category})"
    return description



In [20]:

# Main Execution
if __name__ == "__main__":
    # Load pre-trained vectorizer and model
    vectorizer, model = load_pretrained_model("tfidf_vectorizer.pkl" , "regressor_chain_model.pkl")
    
    # Example input sentences
    input_sentence_1 = "Amazing!"
    input_sentence_2 = "I feel so tired and stressed."
    
    # Predict and describe for different sentences
    result_1 = predict_and_describe(vectorizer, model, input_sentence_1)
    result_2 = predict_and_describe(vectorizer, model, input_sentence_2)
    
    print(result_1)
    print(result_2)


Text Analysis:
- Valence (V): 0.26 (moderately positive)
- Arousal (A): 0.08 (neutral)
- Dominance (D): 0.10 (neutral)
Text Analysis:
- Valence (V): -0.05 (neutral)
- Arousal (A): -0.11 (neutral)
- Dominance (D): 0.02 (neutral)
