In [1]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
import numpy as np
import joblib


nltk.download('stopwords')
nltk.download('wordnet')


df = pd.read_csv('file:///Users/family/Downloads/amazon_alexa.tsv', sep='\t')

df['verified_reviews'] = df['verified_reviews'].fillna('').astype(str)


def preprocess_text(text):
    text = re.sub(r'\W', ' ', text)  
    text = re.sub(r'\d+', ' ', text)  
    text = text.lower()  
    stop_words = set(stopwords.words('english'))
    text = ' '.join(word for word in text.split() if word not in stop_words)  
    lemmatizer = WordNetLemmatizer()
    text = ' '.join(lemmatizer.lemmatize(word) for word in text.split()) 
    return text


df['cleaned_review'] = df['verified_reviews'].apply(preprocess_text)


X = df['cleaned_review']
y = df['feedback']  


tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X_pad = pad_sequences(X_seq, maxlen=200)


y_cat = to_categorical(y)


X_train, X_test, y_train, y_test = train_test_split(X_pad, y_cat, test_size=0.2, random_state=42)


model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(y_cat.shape[1], activation='softmax'))


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2)


loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')


model.save('sentiment_model.h5')
joblib.dump(tokenizer, 'tokenizer.pkl')

def preprocess_text_for_prediction(text):
    text = re.sub(r'\W', ' ', text)  
    text = re.sub(r'\d+', '', text)  
    text = text.lower()  
    stop_words = set(stopwords.words('english'))
    text = ' '.join(word for word in text.split() if word not in stop_words)
    lemmatizer = WordNetLemmatizer()
    text = ' '.join(lemmatizer.lemmatize(word) for word in text.split())
    return text


def predict_sentiment(text):
    
    cleaned_text = preprocess_text_for_prediction(text)
    
    
    sequence = tokenizer.texts_to_sequences([cleaned_text])
    padded_sequence = pad_sequences(sequence, maxlen=200)
    
   
    prediction = model.predict(padded_sequence)
    
    
    sentiment_label = np.argmax(prediction)
    
    return 'Positive' if sentiment_label == 1 else 'Negative'


new_review = "I hate this Product, Poor Quality!."
print(predict_sentiment(new_review)) 

new_review = "An amazing Product, One of a kind!"
print(predict_sentiment(new_review))  


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/family/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/family/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Epoch 1/20




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 145ms/step - accuracy: 0.8523 - loss: 0.4548 - val_accuracy: 0.9345 - val_loss: 0.2285
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 143ms/step - accuracy: 0.9288 - loss: 0.2310 - val_accuracy: 0.9345 - val_loss: 0.2133
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 143ms/step - accuracy: 0.9204 - loss: 0.2335 - val_accuracy: 0.9365 - val_loss: 0.1800
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 141ms/step - accuracy: 0.9521 - loss: 0.1349 - val_accuracy: 0.9444 - val_loss: 0.1795
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 142ms/step - accuracy: 0.9668 - loss: 0.0896 - val_accuracy: 0.9385 - val_loss: 0.1750
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 142ms/step - accuracy: 0.9788 - loss: 0.0608 - val_accuracy: 0.9425 - val_loss: 0.1815
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━



Loss: 0.3698
Accuracy: 0.9365
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
Negative
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Positive
