# Predictions

## Enviroment setup

In [1]:
# Connect with Google Drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Paths

BASE_PATH = "/content/drive/MyDrive/TEC/AI/ProyectoBenji"

PATHS = {
  "CLEAN_DATA_PATH": f"{BASE_PATH}/data/clean",
  "PROCESSED_DATA_PATH": f"{BASE_PATH}/data/processed",
  "MODELS_PATH": f"{BASE_PATH}/models"
}

In [3]:
!pip install -q gradio

In [4]:
# Data manipulation
import pandas as pd
import numpy as np
import re

# SciKit Learn
from sklearn.preprocessing import LabelEncoder

# TensorFlow
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Utilities
import pickle
import warnings

warnings.filterwarnings('ignore')

## Predictions

In [6]:
print("=" * 80)
print("LOADING LSTM SENTIMENT CLASSIFIER")
print("=" * 80)

model_path = f"{PATHS["MODELS_PATH"]}/finetuned_model_best.keras"

model = tf.keras.models.load_model(model_path)

with open(f'{PATHS["PROCESSED_DATA_PATH"]}/word_to_index.pkl', 'rb') as f:
    word_to_index = pickle.load(f)

with open(f'{PATHS["PROCESSED_DATA_PATH"]}/label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

MAX_SEQUENCE_LENGTH = 200

MAX_SEQUENCE_LENGTH = 200

print("Model loaded successfully!")
print(f"Vocabulary size: {len(word_to_index):,}")
print(f"Classes: {label_encoder.classes_}")

LOADING LSTM SENTIMENT CLASSIFIER
Model loaded successfully!
Vocabulary size: 20,000
Classes: ['negative' 'neutral' 'positive']


In [7]:
def predict_sentiment(text):
    """
    Predict sentiment of a text review

    Args:
        text (str): Review text

    Returns:
        dict: Prediction results with sentiment and probabilities
    """
    # Clean text (basic cleaning - adjust to match your preprocessing)
    text_clean = text.lower()
    text_clean = re.sub(r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF\U00002702-\U000027B0\U000024C2-\U0001F251]+', '', text_clean) # Remove emojis
    text_clean = re.sub(r'[^\w\s]', '', text_clean)  # Remove punctuation
    text_clean = re.sub(r'\s+', ' ', text_clean).strip()  # Remove extra spaces

    # Tokenize
    tokens = text_clean.split()

    # Convert to sequence
    sequence = [word_to_index.get(token, 1) for token in tokens]  # 1 = <OOV>

    # Pad sequence
    padded = pad_sequences([sequence], maxlen=MAX_SEQUENCE_LENGTH, padding='post', value=0)

    # Predict
    prediction = model.predict(padded, verbose=0)[0]

    # Get predicted class and confidence
    predicted_class_idx = np.argmax(prediction)
    predicted_sentiment = label_encoder.classes_[predicted_class_idx]
    confidence = prediction[predicted_class_idx] * 100

    # Return results
    return {
        'sentiment': predicted_sentiment,
        'confidence': confidence,
        'probabilities': {
            'negative': prediction[0] * 100,
            'neutral': prediction[1] * 100,
            'positive': prediction[2] * 100
        }
    }

In [8]:
print("\n" + "=" * 80)
print("TESTING WITH EXAMPLES")
print("=" * 80)

test_reviews = [
    "This restaurant is absolutely amazing! The food was delicious and service was perfect.",
    "Terrible experience. The food was cold and the staff was rude. Never coming back!",
    "It was okay, nothing special. Average food and service.",
    "I love this place! Best pizza in town. Highly recommended!",
    "Worst meal ever. Disgusting and overpriced."
]

for i, review in enumerate(test_reviews, 1):
    result = predict_sentiment(review)

    print(f"\nReview {i}:")
    print(f"   Text: {review[:80]}...")
    print(f"   Sentiment: {result['sentiment'].upper()}")
    print(f"   Confidence: {result['confidence']:.1f}%")
    print(f"   Probabilities:")
    print(f"      Negative: {result['probabilities']['negative']:.1f}%")
    print(f"      Neutral:  {result['probabilities']['neutral']:.1f}%")
    print(f"      Positive: {result['probabilities']['positive']:.1f}%")


TESTING WITH EXAMPLES

Review 1:
   Text: This restaurant is absolutely amazing! The food was delicious and service was pe...
   Sentiment: POSITIVE
   Confidence: 99.4%
   Probabilities:
      Negative: 0.0%
      Neutral:  0.6%
      Positive: 99.4%

Review 2:
   Text: Terrible experience. The food was cold and the staff was rude. Never coming back...
   Sentiment: NEGATIVE
   Confidence: 100.0%
   Probabilities:
      Negative: 100.0%
      Neutral:  0.0%
      Positive: 0.0%

Review 3:
   Text: It was okay, nothing special. Average food and service....
   Sentiment: NEUTRAL
   Confidence: 79.4%
   Probabilities:
      Negative: 19.7%
      Neutral:  79.4%
      Positive: 0.9%

Review 4:
   Text: I love this place! Best pizza in town. Highly recommended!...
   Sentiment: POSITIVE
   Confidence: 99.9%
   Probabilities:
      Negative: 0.0%
      Neutral:  0.1%
      Positive: 99.9%

Review 5:
   Text: Worst meal ever. Disgusting and overpriced....
   Sentiment: NEGATIVE
   Confidenc

In [None]:
import gradio as gr

def gradio_interface_fn(text):
    results = predict_sentiment(text)
    # Gradio expects probabilities as floats between 0 and 1 for the Label component
    probs = {k: v / 100 for k, v in results['probabilities'].items()}
    return probs

# Create Gradio Interface
iface = gr.Interface(
    fn=gradio_interface_fn,
    inputs=gr.Textbox(lines=2, placeholder="Write your review here..."),
    outputs=gr.Label(num_top_classes=3, label="Sentiment"),
    title="Sentiment Analysis LSTM",
    description="Enter a review to analyze its sentiment."
)

iface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://d0a48c5601ce02b2fe.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
