In [3]:
import pandas as pd
import numpy as np
import joblib
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

try:
    model = joblib.load('models/best_sentiment_model_logistic_regression.pkl')
    tfidf = joblib.load('models/tfidf_vectorizer.pkl')
    print("✅ Model and vectorizer loaded successfully!")
except:
    print("❌ Could not load model. Make sure you've run the training notebook first.")

✅ Model and vectorizer loaded successfully!


In [5]:
def preprocess_text(text):
    """Preprocess text for prediction"""
    text = text.lower()
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'http\S+|www\S+', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words and len(token) > 2]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

In [6]:
def predict_sentiment(text):
    """Predict sentiment of given text"""
    if not text.strip():
        return "Please enter some text", 0.0, "neutral"
    processed_text = preprocess_text(text)
    text_tfidf = tfidf.transform([processed_text])
    prediction = model.predict(text_tfidf)[0]
    probabilities = model.predict_proba(text_tfidf)[0]
    confidence = max(probabilities)
    prob_negative = probabilities[0] if model.classes_[0] == 'negative' else probabilities[1]
    prob_positive = probabilities[1] if model.classes_[1] == 'positive' else probabilities[0]
    return prediction, confidence, prob_negative, prob_positive

test_result = predict_sentiment("This movie was terrible and boring!")
print(f"Prediction: {test_result}")

Prediction: ('negative', np.float64(0.9981920416069534), np.float64(0.9981920416069534), np.float64(0.001807958393046624))


In [8]:
text_input = widgets.Textarea(
    value='Enter your text here...',
    placeholder='Type your review or text here',
    description='Text:',
    layout=widgets.Layout(width='100%', height='100px')
)

predict_button = widgets.Button(
    description='🔍 Analyze Sentiment',
    button_style='primary',
    layout=widgets.Layout(width='200px')
)

output_area = widgets.Output()

def on_predict_button_clicked(b):
    with output_area:
        clear_output()
        text = text_input.value
        if text and text != 'Enter your text here...':
            prediction, confidence, prob_neg, prob_pos = predict_sentiment(text)
            print("🎯 SENTIMENT ANALYSIS RESULTS")
            print("=" * 40)
            print(f"📝 Input Text: {text[:100]}{'...' if len(text) > 100 else ''}")
            print(f"️ Predicted Sentiment: {prediction.upper()}")
            print(f"🎲 Confidence: {confidence:.2%}")
            print(f"📊 Probability Breakdown:")
            print(f" 😊 Positive: {prob_pos:.2%}")
            print(f" 😞 Negative: {prob_neg:.2%}")

            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
            colors = ['red' if prediction == 'negative' else 'green']
            ax1.barh(['Confidence'], [confidence], color=colors)
            ax1.set_xlim(0, 1)
            ax1.set_title('Prediction Confidence')
            ax1.set_xlabel('Confidence Score')

            sentiments = ['Negative', 'Positive']
            probabilities = [prob_neg, prob_pos]
            colors = ['lightcoral', 'lightgreen']
            bars = ax2.bar(sentiments, probabilities, color=colors)
            ax2.set_ylim(0, 1)
            ax2.set_title('Sentiment Probabilities')
            ax2.set_ylabel('Probability')

            for bar, prob in zip(bars, probabilities):
                height = bar.get_height()
                ax2.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                         f'{prob:.2%}', ha='center', va='bottom')

            plt.tight_layout()
            plt.show()
        else:
            print("⚠️ Please enter some text to analyze!")

predict_button.on_click(on_predict_button_clicked)

display(widgets.VBox([
    widgets.HTML("<h2>🤖 Interactive Sentiment Analysis Tool</h2>"),
    text_input,
    predict_button,
    output_area
]))

VBox(children=(HTML(value='<h2>🤖 Interactive Sentiment Analysis Tool</h2>'), Textarea(value='Enter your text h…

In [9]:
sample_texts = [
    "This movie was absolutely fantastic! Great acting and storyline.",
    "Terrible film, complete waste of time and money.",
    "The movie was okay, nothing special but not bad either.",
    "I loved the cinematography but the plot was confusing.",
    "Worst movie I've ever seen, don't watch it!",
    "Amazing performances by all actors, highly recommended!",
    "The movie had its moments but overall disappointing.",
    "Brilliant direction and excellent screenplay.",
    "Boring and predictable, fell asleep halfway through.",
    "A masterpiece of modern cinema, truly inspiring!"
]

print("🧪 BATCH TESTING WITH SAMPLE TEXTS")
print("=" * 50)
results = []

for i, text in enumerate(sample_texts, 1):
    prediction, confidence, prob_neg, prob_pos = predict_sentiment(text)
    results.append({
        'Text': text[:50] + '...' if len(text) > 50 else text,
        'Prediction': prediction,
        'Confidence': f"{confidence:.2%}",
        'Positive_Prob': f"{prob_pos:.2%}",
        'Negative_Prob': f"{prob_neg:.2%}"
    })
    emoji = "😊" if prediction == 'positive' else "😞"
    print(f"{i:2d}. {emoji} {prediction.upper()} ({confidence:.2%}) - "
          f"{text[:60]}{'...' if len(text) > 60 else ''}")
results_df = pd.DataFrame(results)
print(f"\n📊 DETAILED RESULTS:")
display(results_df)

🧪 BATCH TESTING WITH SAMPLE TEXTS
 1. 😊 POSITIVE (90.48%) - This movie was absolutely fantastic! Great acting and storyl...
 2. 😞 NEGATIVE (99.82%) - Terrible film, complete waste of time and money.
 3. 😞 NEGATIVE (99.55%) - The movie was okay, nothing special but not bad either.
 4. 😊 POSITIVE (56.29%) - I loved the cinematography but the plot was confusing.
 5. 😞 NEGATIVE (97.71%) - Worst movie I've ever seen, don't watch it!
 6. 😊 POSITIVE (98.95%) - Amazing performances by all actors, highly recommended!
 7. 😞 NEGATIVE (91.62%) - The movie had its moments but overall disappointing.
 8. 😊 POSITIVE (97.05%) - Brilliant direction and excellent screenplay.
 9. 😞 NEGATIVE (98.92%) - Boring and predictable, fell asleep halfway through.
10. 😊 POSITIVE (92.83%) - A masterpiece of modern cinema, truly inspiring!
