In [1]:
import re
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load the dataset
df = pd.read_csv('reviews.csv')  # Replace with your CSV file path

# Strip any hidden spaces in columns
df.columns = df.columns.str.strip()

# Text cleaning function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters and numbers
    stop_words = set("i,me,my,myself,we,our,ours,ourselves,you,your,yours,yourself,yourselves".split(","))
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)

# Clean the reviews
df['cleaned_review'] = df['review'].apply(clean_text)

# Tokenization
max_words = 5000  # Vocabulary size
max_sequence_length = 100  # Max length of each review
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(df['cleaned_review'])
sequences = tokenizer.texts_to_sequences(df['cleaned_review'])

# Padding sequences
X = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Target variable (use 'class' column as mentioned)
y = df['class'].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LSTM Model
model = Sequential([
    Embedding(input_dim=max_words, output_dim=16, input_length=max_sequence_length),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_data=(X_test, y_test))

# User input for prediction
while True:
    user_input = input("Enter a review (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    user_input_clean = clean_text(user_input)
    user_sequence = tokenizer.texts_to_sequences([user_input_clean])
    user_padded = pad_sequences(user_sequence, maxlen=max_sequence_length, padding='post')
    prediction = model.predict(user_padded)
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    print("Sentiment:", sentiment)

ModuleNotFoundError: No module named 'tensorflow.python'