<a href="https://colab.research.google.com/github/Rakshithts/myprojects/blob/main/sentiment_analysis_using_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ipywidgets


In [None]:
import numpy as np
import re
import os
import ipywidgets as widgets
from IPython.display import display, clear_output
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.datasets import imdb

# Parameters
max_features = 10000
maxlen = 100
embedding_dim = 100

# Load IMDB
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
word_index = imdb.get_word_index()
reverse_word_index = {v: k for k, v in word_index.items()}

# Decode to raw text
def decode_review(encoded):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded])

train_text = [' '.join([reverse_word_index.get(i - 3, '?') for i in seq]) for seq in x_train]
test_text = [' '.join([reverse_word_index.get(i - 3, '?') for i in seq]) for seq in x_test]

# Clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z']", " ", text)
    return text

train_text = [clean_text(text) for text in train_text]
test_text = [clean_text(text) for text in test_text]

# Tokenizer
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(train_text)
x_train_seq = tokenizer.texts_to_sequences(train_text)
x_test_seq = tokenizer.texts_to_sequences(test_text)
x_train_pad = pad_sequences(x_train_seq, maxlen=maxlen)
x_test_pad = pad_sequences(x_test_seq, maxlen=maxlen)

# Load GloVe embeddings
embedding_index = {}
with open("/content/glove.6B.100d.txt", encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embedding_index[word] = coefs

# Build embedding matrix
embedding_matrix = np.zeros((max_features, embedding_dim))
for word, i in tokenizer.word_index.items():
    if i < max_features:
        embedding_vector = embedding_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

# Build model
model = Sequential()
model.add(Embedding(max_features, embedding_dim, weights=[embedding_matrix],
                    input_length=maxlen, trainable=False))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train
model.fit(x_train_pad, y_train, batch_size=64, epochs=4, validation_split=0.2)

# Evaluate
_, acc = model.evaluate(x_test_pad, y_test)
print(f"Model Test Accuracy: {acc:.3f}")


In [None]:
# Prediction helpers
def preprocess_input(text):
    text = clean_text(text)
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=maxlen)
    return padded

def predict_sentiment(text):
    padded_input = preprocess_input(text)
    pred = model.predict(padded_input)[0][0]
    sentiment = "Positive 😊" if pred >= 0.5 else "Negative 😠"
    return sentiment, pred

# UI Widgets
input_box = widgets.Textarea(
    value='',
    placeholder='Type your movie review here...',
    description='Review:',
    layout=widgets.Layout(width='100%', height='100px'),
    style={'description_width': 'initial'}
)

output_box = widgets.Output()
button = widgets.Button(description="Predict Sentiment", button_style='info')

# Button click handler
def on_button_clicked(b):
    with output_box:
        clear_output()
        user_text = input_box.value
        sentiment, conf = predict_sentiment(user_text)
        print(f"Review: {user_text}")
        print(f"Sentiment: {sentiment} (Confidence: {conf:.2f})")

button.on_click(on_button_clicked)

# Display interface
display(input_box, button, output_box)
