In [18]:
%%writefile my_code.py
import streamlit as st
import torch
import torch.nn as nn
import pickle
from fastai.text.all import load_learner
import os


# Page config

st.set_page_config(page_title="Sentiment Analysis Project", layout="centered")
st.title("Sentiment Analysis on IMDb Reviews")
st.write("Custom LSTM and AWD-LSTM ")


# Fixed thresholds for Neutral/Mixed
-
THRESHOLD_LOW = 0.35
THRESHOLD_HIGH = 0.65

def get_sentiment_label(prob, low=THRESHOLD_LOW, high=THRESHOLD_HIGH):
    if prob < low:
        return "Negative"
    elif prob > high:
        return "Positive"
    else:
        return "Neutral / Mixed"

# Strong words override

STRONG_NEGATIVE = ["bad", "terrible", "awful", "worst", "boring"]
STRONG_POSITIVE = ["good", "great", "fantastic", "excellent", "best"]

def override_strong_words(review, label):
    review_lower = review.lower()
    if any(word in review_lower for word in STRONG_NEGATIVE):
        return "Negative"
    if any(word in review_lower for word in STRONG_POSITIVE):
        return "Positive"
    return label


# Input

review = st.text_area(
    "Enter Movie Review",
    height=150,
    placeholder="Type a movie review here..."
)


# Custom LSTM

MAX_LEN = 150

class SentimentLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=128):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = self.embedding(x)
        _, (hidden, _) = self.lstm(x)
        return torch.sigmoid(self.fc(hidden[-1])).squeeze()

@st.cache_resource
def load_custom_lstm():
    if not os.path.exists("word2idx.pkl") or not os.path.exists("custom_lstm_sentiment.pth"):
        st.error("Custom LSTM files missing!")
        return None, None
    with open("word2idx.pkl", "rb") as f:
        word2idx = pickle.load(f)
    model = SentimentLSTM(len(word2idx))
    model.load_state_dict(torch.load("custom_lstm_sentiment.pth", map_location="cpu"))
    model.eval()
    return model, word2idx

def encode_text(text, word2idx):
    tokens = text.lower().split()
    encoded = [word2idx.get(w, 1) for w in tokens]
    if len(encoded) < MAX_LEN:
        encoded += [0] * (MAX_LEN - len(encoded))
    else:
        encoded = encoded[:MAX_LEN]
    return torch.tensor(encoded).unsqueeze(0)


# AWD-LSTM / ULMFiT

@st.cache_resource
def load_ulmfit():
    if not os.path.exists("awd_lstm_ulmfit_sentiment.pkl"):
        return None
    return load_learner("awd_lstm_ulmfit_sentiment.pkl")


# Prediction with ensemble

if st.button("Analyze Sentiment"):
    if not review.strip():
        st.warning("Please enter a review.")
    else:
        custom_model, word2idx = load_custom_lstm()
        ulmfit_model = load_ulmfit()

        # Custom LSTM
        if custom_model and word2idx:
            text_tensor = encode_text(review, word2idx)
            with torch.no_grad():
                custom_prob = custom_model(text_tensor).item()
            custom_label = get_sentiment_label(custom_prob)
            custom_label = override_strong_words(review, custom_label)
        else:
            custom_prob = None
            custom_label = "N/A"

        # AWD-LSTM
        if ulmfit_model:
            pred, _, probs = ulmfit_model.predict(review)
            max_prob = max(probs).item()
            awd_label = get_sentiment_label(max_prob)
            awd_label = override_strong_words(review, awd_label)
        else:
            pred = None
            max_prob = None
            awd_label = "N/A"

        # Ensemble: Neutral/Mixed if predictions differ
        if custom_label != "N/A" and awd_label != "N/A":
            if custom_label != awd_label:
                ensemble_label = "Neutral / Mixed"
            else:
                ensemble_label = awd_label
        else:
            ensemble_label = awd_label if awd_label != "N/A" else custom_label

        # Display results
        st.subheader("Final Prediction (Ensemble)")
        st.write(f"Sentiment: {ensemble_label}")

        st.subheader("Model Comparison")
        st.write(f"Custom LSTM: {custom_label} (Confidence: {custom_prob:.2f})" if custom_prob else f"Custom LSTM: {custom_label}")
        st.write(f"AWD-LSTM: {awd_label} (Confidence: {max_prob:.2f})" if max_prob else f"AWD-LSTM: {awd_label}")

# Footer
st.markdown("---")
st.caption("Final Project | Sentiment Analysis using AWD-LSTM  and Custom LSTM ")


Overwriting my_code.py
