In [None]:
!pip install tensorflow gradio nltk scikit-learn pandas h5py


Collecting gradio
  Downloading gradio-5.15.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.28.1 (from gradio)
  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Co

In [None]:
import pickle
import random
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import movie_reviews
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, LSTM, Bidirectional, Input, Lambda
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Layer
import gradio as gr


In [None]:
# Download dataset
nltk.download('movie_reviews')

# Load data
documents = [(movie_reviews.raw(fileid), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

# Convert to DataFrame
df = pd.DataFrame(documents, columns=['review', 'sentiment'])
df['sentiment'] = df['sentiment'].map({'pos': 1, 'neg': 0})

print(df.head())  # Display dataset sample


[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


                                              review  sentiment
0  georges polti once wrote a paper called " the ...          0
1   " well this is not mission : difficult , mr ....          1
2  note : some may consider portions of the follo...          0
3  on the basis of this film alone , i never woul...          1
4  have you ever been in an automobile accident w...          0


In [None]:
# Tokenization
tokenizer = Tokenizer(num_words=20000)
tokenizer.fit_on_texts(df['review'])
sequences = tokenizer.texts_to_sequences(df['review'])
X = pad_sequences(sequences, maxlen=200)
y = df['sentiment'].values

# Save tokenizer
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

print("Tokenizer saved!")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Tokenizer saved!


In [None]:
# CNN Model
cnn_model = Sequential([
    Embedding(20000, 128, input_length=200),
    Conv1D(128, 5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(10, activation='relu'),
    Dense(1, activation='sigmoid')
])

cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train CNN Model
cnn_model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

# Save CNN Model
cnn_model.save("cnn_sentiment_model.h5")
print("CNN Model saved!")




Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 183ms/step - accuracy: 0.5172 - loss: 0.6924 - val_accuracy: 0.5375 - val_loss: 0.6857
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 169ms/step - accuracy: 0.9125 - loss: 0.6367 - val_accuracy: 0.6800 - val_loss: 0.6774
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 247ms/step - accuracy: 0.9997 - loss: 0.5561 - val_accuracy: 0.5775 - val_loss: 0.6629
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 169ms/step - accuracy: 0.9997 - loss: 0.4125 - val_accuracy: 0.7375 - val_loss: 0.6089
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 222ms/step - accuracy: 1.0000 - loss: 0.2077 - val_accuracy: 0.7475 - val_loss: 0.5586




CNN Model saved!


In [None]:
from tensorflow.keras.models import Model  # Import Model class

# Define Attention Layer
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def call(self, inputs):
        query, value = inputs
        score = K.batch_dot(query, value, axes=[2, 2])  # Compute similarity
        weights = K.softmax(score)  # Normalize scores
        context = K.batch_dot(weights, value)  # Weighted sum
        return context

# HAN Model (Functional API)
inputs = Input(shape=(200,))  # Input layer
embedding = Embedding(20000, 128, input_length=200)(inputs)  # Embedding layer
lstm_out = Bidirectional(LSTM(64, return_sequences=True))(embedding)  # Bidirectional LSTM

# Apply Attention Layer
query = lstm_out
value = lstm_out
context_vector = AttentionLayer()([query, value])

# Use Lambda Layer for Slicing Instead of GetItem
context_vector = Lambda(lambda x: x[:, -1, :])(context_vector)  # Slice last timestep for final context

# Fully Connected Layer
dense_out = Dense(1, activation='sigmoid')(context_vector)  # Output layer

# Compile the Model using Keras Functional API
han_model = Model(inputs, dense_out)  # Define the model

han_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the HAN model
han_model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

# Save the HAN Model
han_model.save("han_sentiment_model.h5")
print("HAN Model saved!")


Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 726ms/step - accuracy: 0.5079 - loss: 0.6920 - val_accuracy: 0.7100 - val_loss: 0.6723
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 674ms/step - accuracy: 0.7342 - loss: 0.5791 - val_accuracy: 0.7400 - val_loss: 0.5246
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 658ms/step - accuracy: 0.8996 - loss: 0.3179 - val_accuracy: 0.7450 - val_loss: 0.5875
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 649ms/step - accuracy: 0.9594 - loss: 0.1280 - val_accuracy: 0.7675 - val_loss: 0.5197
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 699ms/step - accuracy: 0.9904 - loss: 0.0524 - val_accuracy: 0.7875 - val_loss: 0.5459




HAN Model saved!


In [None]:
# Load models
cnn_model = load_model("cnn_sentiment_model.h5")
han_model = load_model("han_sentiment_model.h5", custom_objects={"AttentionLayer": AttentionLayer})

# Load tokenizer
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

print("Models and tokenizer loaded successfully!")

# Function to preprocess text
def preprocess_text(text, tokenizer, max_len=200):
    sequence = tokenizer.texts_to_sequences([text])
    return pad_sequences(sequence, maxlen=max_len)

# Test prediction
sample_text = "This movie was fantastic!"
processed_input = preprocess_text(sample_text, tokenizer)

cnn_pred = cnn_model.predict(processed_input)[0][0]
han_pred = han_model.predict(processed_input)[0][0]

print(f"CNN Prediction: {cnn_pred:.4f} ({'Positive' if cnn_pred >= 0.5 else 'Negative'})")
print(f"HAN Prediction: {han_pred:.4f} ({'Positive' if han_pred >= 0.5 else 'Negative'})")




Models and tokenizer loaded successfully!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 501ms/step
CNN Prediction: 0.3317 (Negative)
HAN Prediction: 0.0010 (Negative)


In [None]:
def preprocess_input(text, tokenizer, max_length=200):
    # Convert the text to a sequence of tokens
    sequence = tokenizer.texts_to_sequences([text])

    # Pad the sequence to the desired length
    padded_sequence = pad_sequences(sequence, maxlen=max_length)

    return padded_sequence

def predict_sentiment(text, threshold=0.5):
    # Preprocess the input text
    preprocessed_text = preprocess_input(text, tokenizer)

    # Get the model's prediction for CNN and HAN
    cnn_prediction = cnn_model.predict(preprocessed_text)
    han_prediction = han_model.predict(preprocessed_text)

    # Print raw predictions for CNN and HAN
    print(f"CNN Model Prediction: {cnn_prediction[0][0]:.4f}")
    print(f"HAN Model Prediction: {han_prediction[0][0]:.4f}")

    # Compare predictions to threshold
    if cnn_prediction >= threshold:
        print("CNN Model: Positive Sentiment")
    else:
        print("CNN Model: Negative Sentiment")

    if han_prediction >= threshold:
        print("HAN Model: Positive Sentiment")
    else:
        print("HAN Model: Negative Sentiment")

# Test the prediction with a sample input
test_text = "The movie was amazing!"
predict_sentiment(test_text)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
CNN Model Prediction: 0.4655
HAN Model Prediction: 0.0008
CNN Model: Negative Sentiment
HAN Model: Negative Sentiment


In [None]:
# Gradio Interface
def predict_sentiment(text):
    processed_input = preprocess_text(text, tokenizer)

    cnn_pred = cnn_model.predict(processed_input)[0][0]
    han_pred = han_model.predict(processed_input)[0][0]

    cnn_result = "Positive" if cnn_pred >= 0.5 else "Negative"
    han_result = "Positive" if han_pred >= 0.5 else "Negative"

    return f"CNN Model Prediction: {cnn_result} ({cnn_pred:.4f})\nHAN Model Prediction: {han_result} ({han_pred:.4f})"

# Launch Gradio
interface = gr.Interface(fn=predict_sentiment, inputs="text", outputs="text")
interface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fd1db441677fc090d4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


