In [1]:
import sys
print(sys.executable)

import re

# import streamlit as st

import pickle

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import pandas as pd

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import joblib


TOKENIZER_PATH = "models/tokenizer.pickle"

MODELS = [
    {
        "name": "Recurrent Neural Network",
        "path": "models/rnn.h5"
    },
    {
        "name": "Recurrent Neural Network with GloVe embeddings",
        "path": "models/rnn_glove.h5"
    },
    {
        "name": "Convolutional Neural Network",
        "path": "models/cnn_model.keras"
    },
    {
        "name": "Convolutional Neural Network with GloVe embeddings",
        "path": "models/cnn_model_glove.keras"
    },
    {
        "name": "Logistic Regression with Bag-of-Words",
        "path": "models/logReg_bow.joblib"
    },
    {
        "name": "Logistic Regression with TF-IDF",
        "path": "models/logReg_tfidf.joblib"
    },
    {
        "name": "Naive Bayes with TF-IDF",
        "path": "models/naive_tfidf.joblib"
    },
    {
        "name": "Naive Bayes with Bag-of-Words",
        "path": "models/naive_tfidf.joblib"
    },
]

emotion_to_emoji = {
    'admiration': '🤩',
    'amusement': '😄',
    'anger': '😡',
    'annoyance': '😑',
    'approval': '👍',
    'caring': '🥰',
    'confusion': '😕',
    'curiosity': '🤔',
    'desire': '😏',
    'disappointment': '😞',
    'disapproval': '👎',
    'disgust': '🤢',
    'embarrassment': '😳',
    'excitement': '😃',
    'fear': '😨',
    'gratitude': '🙏',
    'joy': '😀',
    'love': '❤️',
    'neutral': '😐',
    'optimism': '😊',
    'realization': '😲',
    'sadness': '😢',
    'surprise': '😮'
}

def load_tokenizer():
    with open(TOKENIZER_PATH, "rb") as file:
        tokenizer = pickle.load(file)
    return tokenizer

def remove_special_characters(sentence, remove_digits=False):
    print(f'Removing special characters from sentence: {sentence}')
    pattern = r'/[^\w-]|_/' if not remove_digits else r'[^a-zA-Z\s]'  
    clean_text = re.sub(pattern, '', sentence)
    print(f'Cleaned sentence: {clean_text}')
    return clean_text

def preprocess_input(text):
    # Download the NLTK resources and initialize the lemmatizer
    nltk.download("stopwords")
    nltk.download("wordnet")
    stop_words = stopwords.words("english")
    lemmatizer = WordNetLemmatizer()

    # Remove special characters
    text = remove_special_characters(text, remove_digits=True)

    # Apply lemmatization
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])

    # Remove stopwords
    text = " ".join([word for word in text.split() if word not in stop_words])

    # Text to lowercase
    text = text.lower()

    # Tokenize the input text
    tokenizer = load_tokenizer()
    text = tokenizer.texts_to_sequences([text])

    # Pad the input text
    text = pad_sequences(text, maxlen=18)

    return text

def predict_sentiment(text, model_used):
    print(f"Predicting sentiment for tokenized text: {text}")
    prediction = model_used.predict(text)[0]
    print(f"Prediction: {prediction}")
    # prediction is a list of probabilities for each class
    # return the top 3 classes with the highest probabilities
    # as well as the corresponding emojis
    # as a list of tuples [(emoji, emotion, probability), ...]
    top_classes = prediction.argsort()[-3:][::-1]
    print(f"Top classes: {top_classes}")
    emotion_labels = list(emotion_to_emoji.keys())
    print(emotion_labels)
    top_classes_info = [(emotion_to_emoji[emotion_labels[top_class]], emotion_labels[top_class], prediction[top_class]) for top_class in top_classes]
    print(top_classes_info)
    return top_classes_info


/Users/blanca/.pyenv/versions/3.9.18/envs/myenv/bin/python


2024-05-01 18:43:24.862469: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from pickle import UnpicklingError

def load_model_joblib(model_path):
    model_loaded = joblib.load(model_path)
    return model_loaded

def predict_sentiment_joblib(text, model_used):
    if isinstance(text, str):
        text = [text]
    print(f'text: {text}')
    print(f"Predicting sentiment for tokenized text: {text}")
    prediction = model_used.predict_proba(text)[0]
    print(f"Prediction: {prediction}")
    # prediction is a list of probabilities for each class
    # return the top 3 classes with the highest probabilities
    # as well as the corresponding emojis
    # as a list of tuples [(emoji, emotion, probability), ...]
    # Obtener las clases del modelo
    classes = model_used.classes_
    print("Classes:", classes)

    # Encontrar los índices de las tres mayores probabilidades
    top_indices = prediction.argsort()[-3:][::-1]
    print(f"Top classes indices: {top_indices}")

    # Recolectar la información de las tres mejores clases
    top_classes_info = [(emotion_to_emoji[classes[i]], classes[i], prediction[i]) for i in top_indices]
    print("Top classes information:", top_classes_info)

    return top_classes_info


In [3]:
from joblib import load
def main(text):

    # Dropdown for selecting the model
    model_selected = "Convolutional Neural Network with GloVe embeddings"

    # Load the selected model
    model_path = [model["path"] for model in MODELS if model["name"] == model_selected][0]
    print(f'Loading model from path: {model_path}')
    if model_path.endswith(".joblib"):
        model_loaded_joblib = joblib.load(model_path)
        print(f'Model loaded: {model_loaded_joblib}')
        result = predict_sentiment_joblib(text, model_loaded_joblib)
    else:
        model_loaded = load_model(model_path)
        print(f'Model loaded: {model_loaded}')
        text = preprocess_input(text)
        # Predict the sentiment
        result = predict_sentiment(text, model_loaded)
        # Display the sentiment analysis results
    for emotion in result:
        print(f"Emoji: {emotion[0]}, Emotion: {emotion[1]}, Probability: {emotion[2]}")
        


In [4]:
model_path='models/rnn.h5'
model_loaded = load_model(model_path)

In [5]:
# Run the app
text = "I am so happy today"
if __name__ == "__main__":
    if len(text)>18:
        main(text)
    else:
        print("Text must be at least 18 characters long")

Loading model from path: models/cnn_model_glove.keras


TypeError: Error when deserializing class 'InputLayer' using config={'batch_shape': [None, 19], 'dtype': 'float32', 'sparse': False, 'name': 'input_layer_2'}.

Exception encountered: Unrecognized keyword arguments: ['batch_shape']