In [20]:
import sys
print(sys.executable)

import re

# import streamlit as st

import pickle

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import pandas as pd

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import joblib


TOKENIZER_PATH = "models/tokenizer.pickle"

MODELS = [
    {
        "name": "Recurrent Neural Network",
        "path": "models/rnn.h5"
    },
    {
        "name": "Recurrent Neural Network with GloVe embeddings",
        "path": "models/rnn_glove.h5"
    },
    {
        "name": "Convolutional Neural Network",
        "path": "models/cnn_model.keras"
    },
    {
        "name": "Convolutional Neural Network with GloVe embeddings",
        "path": "models/cnn_model_glove.keras"
    },
    {
        "name": "Logistic Regression with Bag-of-Words",
        "path": "models/logReg_bow.joblib"
    },
    {
        "name": "Logistic Regression with TF-IDF",
        "path": "models/logReg_tfidf.joblib"
    },
    {
        "name": "Naive Bayes with TF-IDF",
        "path": "models/naive_tfidf.joblib"
    },
    {
        "name": "Naive Bayes with Bag-of-Words",
        "path": "models/naive_tfidf.joblib"
    },
    {
        "name": "DenseNet",
        "path": "models/denseNet_model.h5"
    },
]

emotion_to_emoji = {
    'admiration': '🤩',
    'amusement': '😄',
    'anger': '😡',
    'annoyance': '😑',
    'approval': '👍',
    'caring': '🥰',
    'confusion': '😕',
    'curiosity': '🤔',
    'desire': '😏',
    'disappointment': '😞',
    'disapproval': '👎',
    'disgust': '🤢',
    'embarrassment': '😳',
    'excitement': '😃',
    'fear': '😨',
    'gratitude': '🙏',
    'joy': '😀',
    'love': '❤️',
    'neutral': '😐',
    'optimism': '😊',
    'realization': '😲',
    'sadness': '😢',
    'surprise': '😮'
}

def load_tokenizer():
    with open(TOKENIZER_PATH, "rb") as file:
        tokenizer = pickle.load(file)
    return tokenizer

def remove_special_characters(sentence, remove_digits=False):
    print(f'Removing special characters from sentence: {sentence}')
    pattern = r'/[^\w-]|_/' if not remove_digits else r'[^a-zA-Z\s]'  
    clean_text = re.sub(pattern, '', sentence)
    print(f'Cleaned sentence: {clean_text}')
    return clean_text

def preprocess_input(text, maxlen=18):
    # Download the NLTK resources and initialize the lemmatizer
    nltk.download("stopwords")
    nltk.download("wordnet")
    stop_words = stopwords.words("english")
    lemmatizer = WordNetLemmatizer()

    # Remove special characters
    text = remove_special_characters(text, remove_digits=True)

    # Apply lemmatization
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])

    # Remove stopwords
    text = " ".join([word for word in text.split() if word not in stop_words])

    # Text to lowercase
    text = text.lower()

    # Tokenize the input text
    tokenizer = load_tokenizer()
    text = tokenizer.texts_to_sequences([text])

    # Pad the input text
    text = pad_sequences(text, maxlen=maxlen)

    return text

def predict_sentiment(text, model_used):
    print(f"Predicting sentiment for tokenized text: {text}")
    prediction = model_used.predict(text)[0]
    print(f"Prediction: {prediction}")
    # prediction is a list of probabilities for each class
    # return the top 3 classes with the highest probabilities
    # as well as the corresponding emojis
    # as a list of tuples [(emoji, emotion, probability), ...]
    top_classes = prediction.argsort()[-3:][::-1]
    print(f"Top classes: {top_classes}")
    emotion_labels = list(emotion_to_emoji.keys())
    print(emotion_labels)
    top_classes_info = [(emotion_to_emoji[emotion_labels[top_class]], emotion_labels[top_class], prediction[top_class]) for top_class in top_classes]
    print(top_classes_info)
    return top_classes_info


/Users/blanca/.pyenv/versions/3.9.18/envs/myenv/bin/python


In [21]:
from pickle import UnpicklingError

def load_model_joblib(model_path):
    model_loaded = joblib.load(model_path)
    return model_loaded

def predict_sentiment_joblib(text, model_used):
    if isinstance(text, str):
        text = [text]
    print(f'text: {text}')
    print(f"Predicting sentiment for tokenized text: {text}")
    prediction = model_used.predict_proba(text)[0]
    print(f"Prediction: {prediction}")
    # prediction is a list of probabilities for each class
    # return the top 3 classes with the highest probabilities
    # as well as the corresponding emojis
    # as a list of tuples [(emoji, emotion, probability), ...]
    # Obtener las clases del modelo
    classes = model_used.classes_
    print("Classes:", classes)

    # Encontrar los índices de las tres mayores probabilidades
    top_indices = prediction.argsort()[-3:][::-1]
    print(f"Top classes indices: {top_indices}")

    # Recolectar la información de las tres mejores clases
    top_classes_info = [(emotion_to_emoji[classes[i]], classes[i], prediction[i]) for i in top_indices]
    print("Top classes information:", top_classes_info)

    return top_classes_info


In [24]:
from joblib import load
def main(text):

    # Dropdown for selecting the model
    model_selected = "Convolutional Neural Network with GloVe embeddings"

    # Load the selected model
    model_path = [model["path"] for model in MODELS if model["name"] == model_selected][0]
    print(f'Loading model from path: {model_path}')
    if model_path.endswith(".joblib"):
        model_loaded_joblib = joblib.load(model_path)
        print(f'Model loaded: {model_loaded_joblib}')
        result = predict_sentiment_joblib(text, model_loaded_joblib)
    else:
        model_loaded = load_model(model_path)
        print(f'Model loaded: {model_loaded}')
        if "Convolutional" in model_selected:
            text = preprocess_input(text, maxlen=19)
        else:
            text = preprocess_input(text, maxlen=18)
        # Predict the sentiment
        result = predict_sentiment(text, model_loaded)
        # Display the sentiment analysis results
    for emotion in result:
        print(f"Emoji: {emotion[0]}, Emotion: {emotion[1]}, Probability: {emotion[2]}")
        


In [26]:
# Run the app
text = "I am so happy today"
if __name__ == "__main__":
    if len(text)>18:
        main(text)
    else:
        print("Text must be at least 18 characters long")

Loading model from path: models/cnn_model_glove.keras


Model loaded: <keras.src.engine.sequential.Sequential object at 0x131e27220>
Removing special characters from sentence: I am so happy today
Cleaned sentence: I am so happy today
Predicting sentiment for tokenized text: [[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1 119
  286]]


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/blanca/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/blanca/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Prediction: [1.93475056e-02 9.83530562e-03 3.87222245e-02 8.94250199e-02
 9.16875675e-02 2.40979642e-02 2.35543773e-02 2.14404929e-02
 3.45964858e-18 6.94395741e-03 4.08765748e-02 5.81160821e-02
 3.15549485e-02 1.37034655e-02 1.09884795e-02 9.98274889e-03
 3.63386027e-03 1.29760895e-02 3.71070555e-03 3.84331554e-01
 1.88727807e-02 4.43505235e-02 3.01145017e-02 1.17333299e-02]
Top classes: [19  4  3]
['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'joy', 'love', 'neutral', 'optimism', 'realization', 'sadness', 'surprise']
[('😊', 'optimism', 0.38433155), ('👍', 'approval', 0.09168757), ('😑', 'annoyance', 0.08942502)]
Emoji: 😊, Emotion: optimism, Probability: 0.3843315541744232
Emoji: 👍, Emotion: approval, Probability: 0.09168756753206253
Emoji: 😑, Emotion: annoyance, Probability: 0.08942501991987228
