In [None]:
import os
import re
import nltk
import pickle
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline

nltk.download('stopwords')
nltk.download('punkt')

def processText(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

models = 'models'
parent = os.path.abspath(os.path.join(os.getcwd(), '..'))
path = os.path.join(parent, models, 'improved_sentiment_model.pkl')

print(f"Model file path: {path}")

if not os.path.exists(path):
    print("Error: Model file does not exist.")
else:
    try:
        with open(path, 'rb') as sentiment_model_file:
            sentiment_model = pickle.load(sentiment_model_file)
        print("Model loaded successfully!")

    except EOFError:
        print("Error: Ran out of input while loading the model. The model file might be empty or corrupted.")
    except Exception as e:
        print(f"Error loading model: {e}")

def get_sentiment_vector(sentiment_model, text_input):
    try:
        processed_text = processText(text_input)
        
        if sentiment_model is None:
            print("Error: Sentiment model is not loaded. Please ensure the model is properly loaded.")
            return None

        sentiment_scores = sentiment_model.predict_proba([processed_text])  

        print(f"Type of sentiment_scores: {type(sentiment_scores)}")

        sentiment_vector = []
        for label_scores in sentiment_scores[0]:  
            dense_scores = label_scores.toarray() if hasattr(label_scores, 'toarray') else label_scores
            sentiment_vector.extend(dense_scores.flatten())

        sentiment_vector = np.array(sentiment_vector)
        
        sentiment_vector = sentiment_vector / sentiment_vector.sum() * 100
        
        return sentiment_vector

    except Exception as e:
        print(f"Error while predicting sentiment: {e}")
        return None

input_text = input("Enter text for sentiment analysis: ")
if 'sentiment_model' in locals(): 
    sentiment_vector = get_sentiment_vector(sentiment_model, input_text)

    if sentiment_vector is not None:
        print("Sentiment Vector:", sentiment_vector)
    else:
        print("Failed to generate sentiment vector.")
else:
    print("Sentiment model is not loaded.")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\TTill\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\TTill\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Model file path: c:\Users\TTill\Semiotics_Thesis-1\models\improved_sentiment_model.pkl
Model loaded successfully!
Error while predicting sentiment: unsupported operand type(s) for *: 'csr_matrix' and 'csr_matrix'
Failed to generate sentiment vector.


In [8]:
import pickle
import pandas as pd
import re
import numpy as np

def load_model(filepath):
    with open(filepath, 'rb') as model_file:
        return pickle.load(model_file)

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    stopwords = {'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'has',
                 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', 'to', 'was',
                 'were', 'will', 'with'}
    tokens = [word for word in text.split() if word not in stopwords]
    return ' '.join(tokens)

def predict_emotions_with_confidence(text, model, emotion_labels):
    
    processed_text = preprocess_text(text)
    
    base_estimators = model.named_steps['classifier'].estimators_

    X_processed = model.named_steps['tfidf'].transform([processed_text])
    X_processed = model.named_steps['scaler'].transform(X_processed)
    
    emotion_confidence_vector = np.zeros(len(emotion_labels))
    
    for idx, (emotion, estimator) in enumerate(zip(emotion_labels, base_estimators)):
        try:
            proba = estimator.predict_proba(X_processed)[0][1]
            if proba > 0.1:  
                emotion_confidence_vector[idx] = proba
        except (AttributeError, IndexError):
            prediction = estimator.predict(X_processed)[0]
            if prediction == 1:
                emotion_confidence_vector[idx] = 1.0
    
    return emotion_confidence_vector

def main():
    print("Loading the trained model...")
    model_path = 'best_sentiment_model.pkl'
    model = load_model(model_path)

    print("Loading emotion labels...")
    emotion_labels = pd.read_csv('goemotions.csv').columns[7:]  

    print("\nType 'exit' to quit the program.")
    while True:
        user_input = input("\nEnter a text to analyze emotions: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        emotion_scores_vector = predict_emotions_with_confidence(user_input, model, emotion_labels)
        
        if np.any(emotion_scores_vector):
            print("\nPredicted emotion vector (confidence scores):")
            print(emotion_scores_vector)
        else:
            print("No significant emotions detected.")

if __name__ == "__main__":
    main()
    


Loading the trained model...


UnpicklingError: invalid load key, '\x0e'.

In [1]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('sentiment_model.pkl')  # Make sure to change the path to where you saved the model

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotions for the processed text
    emotion_scores = model.predict([processed_text])
    return emotion_scores

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion scores (probabilities for each emotion)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # If there are any significant emotions detected, print them
    if np.any(emotion_scores_vector):
        print("\nPredicted emotion vector (sentiment scores):")
        print(emotion_scores_vector)
    else:
        print("No significant emotions detected.")



Predicted emotion vector (sentiment scores):
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]]


In [10]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Make sure to change the path to where you saved the model

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (float vector)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion probabilities for the processed text
    emotion_probs = model.predict_proba([processed_text])[0]  # Get probabilities for the first (and only) sample
    return emotion_probs

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion probabilities (float vector for each emotion)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # If there are any significant emotions detected, print them
    if np.any(emotion_scores_vector):
        print("\nPredicted emotion vector (sentiment scores):")
        print(emotion_scores_vector)
    else:
        print("No significant emotions detected.")



Predicted emotion vector (sentiment scores):
[[0.53445696 0.46554304]]


In [11]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Make sure to change the path to where you saved the model

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (float vector of 28 values)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion probabilities for the processed text
    emotion_probs = model.predict_proba([processed_text])[0]  # Get probabilities for the first (and only) sample
    return emotion_probs

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion probabilities (float vector of 28 sentiments)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # If there are any significant emotions detected, print them
    if np.any(emotion_scores_vector):
        print("\nPredicted emotion vector (sentiment scores):")
        print(emotion_scores_vector)
    else:
        print("No significant emotions detected.")



Predicted emotion vector (sentiment scores):
[[0.53445696 0.46554304]]


In [12]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Update with the correct path to your model

# Emotion labels (based on GoEmotions dataset columns from row 9 onwards)
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (float vector of 28 values)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion probabilities for the processed text
    emotion_probs = model.predict_proba([processed_text])[0]  # Get probabilities for the first (and only) sample
    return emotion_probs

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion probabilities (float vector of 28 sentiments)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # If there are any significant emotions detected, print them
    if np.any(emotion_scores_vector):
        print("\nPredicted emotion vector (sentiment scores):")
        for label, score in zip(emotion_labels, emotion_scores_vector):
            print(f"{label}: {score:.4f}")
    else:
        print("No significant emotions detected.")



Predicted emotion vector (sentiment scores):


TypeError: unsupported format string passed to numpy.ndarray.__format__

In [13]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Update with the correct path to your model

# Emotion labels (based on GoEmotions dataset columns from row 9 onwards)
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (float vector of 28 values)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion probabilities for the processed text
    emotion_probs = model.predict_proba([processed_text])[0]  # Get probabilities for the first (and only) sample
    return emotion_probs

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion probabilities (float vector of 28 sentiments)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # If there are any significant emotions detected, print them
    if np.any(emotion_scores_vector):
        print("\nPredicted emotion vector (sentiment scores):")
        for label, score in zip(emotion_labels, emotion_scores_vector):
            # Ensure score is treated as a float for formatting
            print(f"{label}: {score:.4f}")
    else:
        print("No significant emotions detected.")



Predicted emotion vector (sentiment scores):


TypeError: unsupported format string passed to numpy.ndarray.__format__

In [3]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('sentiment_model.pkl')  # Update with the correct path to your model

# Emotion labels (based on GoEmotions dataset columns from row 9 onwards)
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (float vector of 28 values)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion probabilities for the processed text
    emotion_probs = model.predict_proba([processed_text])[0]  # Get probabilities for the first (and only) sample
    return emotion_probs

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion probabilities (float vector of 28 sentiments)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # Flatten the vector to ensure it's a 1D array
    emotion_scores_vector = emotion_scores_vector.flatten()  # Ensures it's a 1D array

    # If there are any significant emotions detected, print them
    if np.any(emotion_scores_vector):
        print("\nPredicted emotion vector (sentiment scores):")
        for label, score in zip(emotion_labels, emotion_scores_vector):
            # Ensure score is treated as a float for formatting
            print(f"{label}: {score:.4f}")
    else:
        print("No significant emotions detected.")



Predicted emotion vector (sentiment scores):
admiration: 0.5157
amusement: 0.4843


In [15]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Update with the correct path to your model

# Emotion labels (based on GoEmotions dataset columns from row 9 onwards)
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (float vector of 28 values)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion probabilities for the processed text
    emotion_probs = model.predict_proba([processed_text])[0]  # Get probabilities for the first (and only) sample
    return emotion_probs

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion probabilities (float vector of 28 sentiments)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # Flatten the vector to ensure it's a 1D array
    emotion_scores_vector = emotion_scores_vector.flatten()  # Ensures it's a 1D array

    # Print out the predicted emotion vector for all 28 emotions
    print("\nPredicted emotion vector (sentiment scores):")
    for label, score in zip(emotion_labels, emotion_scores_vector):
        # Ensure score is treated as a float for formatting
        print(f"{label}: {score:.4f}")



Predicted emotion vector (sentiment scores):
admiration: 0.5345
amusement: 0.4655


In [16]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Update with the correct path to your model

# Emotion labels (based on GoEmotions dataset columns from row 9 onwards)
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (float vector of 28 values)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion probabilities for the processed text
    emotion_probs = model.predict_proba([processed_text])[0]  # Get probabilities for the first (and only) sample
    return emotion_probs

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")

    # Predict emotion probabilities (float vector of 28 sentiments)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # Debugging: Print the shape and content of the predicted probabilities vector
    print("\nPredicted probabilities vector shape:", emotion_scores_vector.shape)
    print("Predicted probabilities vector:", emotion_scores_vector)

    # Flatten the vector to ensure it's a 1D array
    emotion_scores_vector = emotion_scores_vector.flatten()  # Ensures it's a 1D array

    # Print out the predicted emotion vector for all 28 emotions
    print("\nPredicted emotion vector (sentiment scores):")
    for label, score in zip(emotion_labels, emotion_scores_vector):
        # Ensure score is treated as a float for formatting
        print(f"{label}: {score:.4f}")



Predicted probabilities vector shape: (1, 2)
Predicted probabilities vector: [[0.53445696 0.46554304]]

Predicted emotion vector (sentiment scores):
admiration: 0.5345
amusement: 0.4655


In [18]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Update with the correct path to your model

# Emotion labels (based on GoEmotions dataset columns from row 9 onwards)
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (hotcode vector)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion scores for the processed text
    emotion_scores = model.predict([processed_text])  # This should give you a hotcode vector (binary or float)
    return emotion_scores

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion scores (vector of 28 emotions)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # If there are any significant emotions detected, print them
    print("\nPredicted emotion vector (hotcode scores):")
    
    # Display each emotion with its corresponding score
    for label, score in zip(emotion_labels, emotion_scores_vector[0]):  # assuming model.predict() returns a 2D array
        print(f"{label}: {score:.4f}")



Predicted emotion vector (hotcode scores):
admiration: 0.0000
amusement: 0.0000
anger: 0.0000
annoyance: 0.0000
approval: 0.0000
caring: 0.0000
confusion: 0.0000
curiosity: 0.0000
desire: 0.0000
disappointment: 0.0000
disapproval: 0.0000
disgust: 0.0000
embarrassment: 0.0000
excitement: 0.0000
fear: 0.0000
gratitude: 0.0000
grief: 0.0000
joy: 0.0000
love: 1.0000
nervousness: 0.0000
optimism: 0.0000
pride: 0.0000
realization: 0.0000
relief: 0.0000
remorse: 0.0000
sadness: 0.0000
surprise: 0.0000
neutral: 0.0000


In [19]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Make sure to change the path to where you saved the model

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotions for the processed text
    emotion_scores = model.predict([processed_text])
    return emotion_scores

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion scores (probabilities for each emotion)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # Set a threshold to filter out emotions with very small probabilities
    threshold = 0.1

    # If there are any significant emotions detected, print them
    print("\nPredicted emotion vector (filtered by threshold):")
    for label, score in zip(emotion_labels, emotion_scores_vector[0]):
        if score >= threshold:  # Display only scores above the threshold
            print(f"{label}: {score:.4f}")
    else:
        print("No significant emotions detected.")



Predicted emotion vector (filtered by threshold):
love: 1.0000
No significant emotions detected.


In [20]:
import joblib
import re
import nltk
import numpy as np

# Load the pre-trained model
pipeline = joblib.load('best_sentiment_model.pkl')  # Update with the correct path to your model

# Emotion labels (based on GoEmotions dataset columns from row 9 onwards)
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
    'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
    'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism',
    'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Function to preprocess input text (same as during training)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

# Function to predict emotions with probabilities (hotcode vector)
def predict_emotions(text, model):
    processed_text = preprocess_text(text)
    # Use the pipeline to predict the emotion scores for the processed text
    emotion_scores = model.predict([processed_text])  # This should give you a hotcode vector (binary or float)
    return emotion_scores

# Main code to take user input and make predictions
if __name__ == "__main__":
    # Example user input
    user_input = input("Enter a sentence to analyze emotions: ")
    
    # Predict emotion scores (vector of 28 emotions)
    emotion_scores_vector = predict_emotions(user_input, pipeline)

    # If there are any significant emotions detected, print them
    print("\nPredicted emotion vector (hotcode scores):")
    
    # Print each emotion with its corresponding score
    for label, score in zip(emotion_labels, emotion_scores_vector[0]):  # assuming model.predict() returns a 2D array
        print(f"{label}: {score:.4f}")

    # Calculate and print the model's confidence score
    confidence_score = np.max(emotion_scores_vector)  # Confidence is the highest score
    most_confident_emotion = emotion_labels[np.argmax(emotion_scores_vector)]  # Emotion corresponding to the highest score

    print(f"\nModel's confidence score: {confidence_score:.4f} (for emotion: {most_confident_emotion})")



Predicted emotion vector (hotcode scores):
admiration: 0.0000
amusement: 0.0000
anger: 0.0000
annoyance: 0.0000
approval: 0.0000
caring: 0.0000
confusion: 0.0000
curiosity: 0.0000
desire: 0.0000
disappointment: 0.0000
disapproval: 0.0000
disgust: 0.0000
embarrassment: 0.0000
excitement: 0.0000
fear: 0.0000
gratitude: 0.0000
grief: 0.0000
joy: 0.0000
love: 1.0000
nervousness: 0.0000
optimism: 0.0000
pride: 0.0000
realization: 0.0000
relief: 0.0000
remorse: 0.0000
sadness: 0.0000
surprise: 0.0000
neutral: 0.0000

Model's confidence score: 1.0000 (for emotion: love)
