In [60]:
import re
import string
import numpy as np
import tensorflow as tf
import pandas as pd
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import binary_crossentropy
from transformers import DistilBertTokenizerFast
from transformers import TFDistilBertModel
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [61]:
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ree\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ree\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [62]:
MAX_LEN = 128 

In [63]:
LSTM_MODEL_PATH = "../models/lstm.keras"
LSTM_TOKENIZER_PATH = "../models/token/lstm_tokenizer.pkl"
BERT_MODEL_PATH = "../models/bert.keras" 
BERT_TOKENIZER_PATH = "../models/token/bert_tokenizer" 

In [64]:
emo_list = [
    "admiration", "amusement", "anger", "annoyance", "approval", "caring",
    "confusion", "curiosity", "desire", "disappointment", "disapproval",
    "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief",
    "joy", "love", "nervousness", "optimism", "pride", "realization", "relief",
    "remorse", "sadness", "surprise", "neutral"
]

In [65]:
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()

In [66]:
negations = {"not", "no", "never", "n't"}

In [67]:
stop_words = stop_words.difference(negations)

In [68]:
def clean(text):
    text = text.lower()
    text = re.sub(r"["
                  "\U0001F600-\U0001F64F"
                  "\U0001F300-\U0001F5FF"
                  "\U0001F680-\U0001F6FF"
                  "\U0001F1E0-\U0001F1FF"
                  "\U00002700-\U000027BF"
                  "\U000024C2-\U0001F251"
                  "]+", '', text)
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"<.*?>", "", text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return " ".join(words)

In [69]:
def predict_bert(text, model, tokenizer, thresholds):
    inputs = tokenizer([text], padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors="tf")
    probs = model.predict({"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"]}, verbose=0)[0]
    pred = (probs > thresholds).astype(int)
    results = [(emo, round(float(prob), 2)) for emo, prob, p in zip(emo_list, probs, pred) if p == 1]
    return results if results else [("other", round(float(max(probs)), 2))]

In [70]:
def predict_lstm(text, model, tokenizer, thresholds):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=MAX_LEN, padding='post', truncating='post')
    probs = model.predict(padded, verbose=0)[0]
    pred = (probs > thresholds).astype(int)
    results = [(emo, round(float(prob), 2)) for emo, prob, p in zip(emo_list, probs, pred) if p == 1]
    return results if results else [("other", round(float(max(probs)), 2))]

In [71]:
bert_model = tf.keras.models.load_model(BERT_MODEL_PATH, custom_objects={"TFDistilBertModel": TFDistilBertModel})
bert_tokenizer = DistilBertTokenizerFast.from_pretrained(BERT_TOKENIZER_PATH)

In [72]:
def loss_fn(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred)

In [73]:
lstm_model = load_model(LSTM_MODEL_PATH, custom_objects={'loss_fn': loss_fn})

In [74]:
with open(LSTM_TOKENIZER_PATH, 'rb') as f:
    lstm_tokenizer = pickle.load(f)

In [75]:
thresholds_bert = np.load("../models/thresholds/thresholds_bert.npy")
thresholds_lstm = np.load("../models/thresholds/thresholds_lstm.npy")

In [88]:
if __name__ == "__main__":
    text = input("Enter your text: ")
    cleaned = clean(text)
    print("\nCleaned text:", cleaned)

    print("\nDistilBERT Predictions:")
    bert_results = predict_bert(cleaned, bert_model, bert_tokenizer, thresholds_bert)
    for emo, prob in bert_results:
        print(f"  {emo:<15} : {prob}")

    print("\nLSTM Predictions:")
    lstm_results = predict_lstm(cleaned, lstm_model, lstm_tokenizer, thresholds_lstm)
    for emo, prob in lstm_results:
        print(f"  {emo:<15} : {prob}")

Enter your text:  he doesnt like it but im not so sure



Cleaned text: doesnt like im not sure

DistilBERT Predictions:
  confusion       : 0.63
  disapproval     : 0.24

LSTM Predictions:
  disapproval     : 0.43
  neutral         : 0.47
