## Predictor Setup

In [7]:
import re
import spacy
import tensorflow as tf
from nltk.corpus import stopwords
from tensorflow.keras.models import load_model


In [8]:
# Load spaCy
nlp = spacy.load("en_core_web_sm")

# Setup stopwords
stop_words = set(stopwords.words("english"))
stop_words -= {"not", "nor", "no", "again"}
add_stopwords = set(["movie", "film", "one", "the", "scene",
                     "this", "story", "would", "really", "and", "also"])
stop_words = stop_words.union(add_stopwords)

In [9]:
# Preprocessing functions
def remove_html(text):
    text = re.sub(r"<[\w]+ />", " ", text)
    text = re.sub("n't", " not", text)
    return text

def clean_text(text):
    text = text.lower()
    text = re.sub(r"\b\w{1,1}\b", " ", text)  # remove single characters
    text = re.sub(r"[^a-z]", " ", text)       # keep letters only
    text = re.sub(r"\s+", " ", text)          # collapse whitespace
    return text

def process_and_filter_non_entities(text):
    doc = nlp(text)
    non_entity_lemmas = [token.lemma_ for token in doc if token.ent_type_ != "PERSON"]
    non_entity_lemmas = [token for token in non_entity_lemmas if token.lower() not in stop_words]
    return " ".join(non_entity_lemmas)

def preprocess_text(text):
    text = remove_html(text)
    text = process_and_filter_non_entities(text)
    text = clean_text(text)
    return text

def preprocess_text_inference(text):
    text = remove_html(text)
    # Only clean text (lowercase, remove extra spaces)
    text = clean_text(text)
    return text

In [10]:
# Prediction function
def predict_sentiment(model, text):
    # Preprocess the input
    processed_text = preprocess_text_inference(text)
    
    # If text is very short, repeat it to mimic longer review
    word_count = len(processed_text.split())
    if word_count < 20:  # threshold
        repeat_times = (20 // word_count) + 1  # repeat enough times
        processed_text = " ".join([processed_text] * repeat_times)
    
    # Convert to tensor (required by TextVectorization layer)
    text_tensor = tf.constant([processed_text])
    
    # Get model logits
    logits = model.predict(text_tensor, verbose=0)
    
    # Convert logits to probability
    prob = tf.nn.sigmoid(logits[0]).numpy().item()
    # prob = prob + 0.2
    
    # Determine sentiment
    sentiment = "Positive :)" if prob > 0.5 else "Negative :("
    
    # Print result
    print(f"Review preview: {text[:120]}...")
    print(f"Sentiment: {sentiment} (score={prob:.4f})")
    
    return prob

In [11]:
# Load the pre-trained model
model_path = "./models/model_NN_final.keras"
model = load_model(model_path)

  saveable.load_own_variables(weights_store.get(inner_path))


## Enter your review here!

In [12]:
review = "the movie is funny!"

## Enjoy sentiment prediction!

In [13]:
predict_sentiment(model, review)

Review preview: the movie is funny!...
Sentiment: Positive :) (score=0.8704)


0.8703766465187073