In [7]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
import numpy as np

# Load CSV dataset
df = pd.read_csv('IMDB.csv')
# Prepare the data
reviews = df['review'].values
labels = df['sentiment'].values # Convert to float32 after mapping

# Tokenize and pad sequences
vocab_size = 10000
max_len = 300
embedding_dim = 32

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(reviews)
tokenizer.fit_on_texts(labels)
rsequences = tokenizer.texts_to_sequences(reviews)
lsequences = tokenizer.texts_to_sequences(labels)
rpadded_sequences = pad_sequences(rsequences, maxlen=max_len)
lpadded_sequences = pad_sequences(lsequences, maxlen=max_len)



# Split data into training and test sets
split = int(0.8 * len(rpadded_sequences))
x_train, x_test = rpadded_sequences[:split], rpadded_sequences[split:]
split = int(0.8 * len(lpadded_sequences))
y_train, y_test = lpadded_sequences[:split], lpadded_sequences[split:]
print(np.ndim(rpadded_sequences),np.ndim(lpadded_sequences))
# Build the model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_len),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.8)

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


2 2
Epoch 1/5




ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 300), output.shape=(None, 1)

In [6]:
model.save('sentiment.h5')



In [7]:
# Prediction function for new text input
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np

def predict_sentiment(text):
    tokenizer = Tokenizer(num_words=vocab_size)
    tokenizer.fit_on_texts([text])  # Tokenize the new input text
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_length)
    
    prediction = model.predict(padded_sequence)
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    return sentiment

# Example usage of prediction function
sample_text = "This movie was amazing! I really loved it."
print("Predicted Sentiment:", predict_sentiment(sample_text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 380ms/step
Predicted Sentiment: Negative


In [9]:
# Function to predict sentiment
def predict_sentiment(review):
    # Preprocess the input
    seq = tokenizer.texts_to_sequences([review])
    padded = pad_sequences(seq, maxlen=max_len)
    # Get the prediction
    prediction = model.predict(padded)
    # Return the sentiment
    return "Positive" if prediction[0][0] > 0.4 else "Negative"

# Input loop for user interaction
while True:
    user_input = input("Enter a movie review (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    sentiment = predict_sentiment(user_input)
    print(f"Sentiment: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Sentiment: Negative
