In [None]:
#test
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, SpatialDropout1D, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from imblearn.under_sampling import RandomUnderSampler
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
df = pd.read_csv("Hotel_Reviews.csv")

In [3]:
df["Full_Review"] = df["Positive_Review"] + " " + df["Negative_Review"]

In [4]:
df = df[["Full_Review", "Reviewer_Score"]]

In [5]:
df["Sentiment"] = df["Reviewer_Score"].apply(lambda x: 1 if x > 5 else 0)
df.drop(columns=["Reviewer_Score"], inplace=True)

In [6]:
df["Full_Review"] = df["Full_Review"].replace({"No Negative": "", "No Positive": ""}, regex=True)

In [7]:
df.head()

Unnamed: 0,Full_Review,Sentiment
0,Only the park outside of the hotel was beauti...,0
1,No real complaints the hotel was great great ...,1
2,Location was good and staff were ok It is cut...,1
3,Great location in nice surroundings the bar a...,0
4,Amazing location and building Romantic settin...,1


In [8]:
max_words = 10000  # Max unique words
max_length = 200  # Max length of sequences

In [9]:
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(df["Full_Review"])

In [10]:
X_sequences = tokenizer.texts_to_sequences(df["Full_Review"])
X_padded = pad_sequences(X_sequences, maxlen=max_length, padding="post", truncating="post")

In [11]:
y = df["Sentiment"].values

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42, stratify=y)

In [13]:
undersample = RandomUnderSampler(sampling_strategy=0.7, random_state=42)
X_train_under, y_train_under = undersample.fit_resample(X_train, y_train)



In [14]:
class_weights = compute_class_weight("balanced", classes=np.unique(y_train_under), y=y_train_under)
class_weights_dict = {0: class_weights[0], 1: class_weights[1]}
print("Computed Class Weights:", class_weights_dict)

Computed Class Weights: {0: np.float64(1.2142740819151534), 1: np.float64(0.8500056999544003)}


In [15]:
embedding_dim = 128
lstm_units = 64

lstm_model = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_length),
    SpatialDropout1D(0.2),
    Bidirectional(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2)),
    Dense(1, activation="sigmoid")  # Binary classification
])

lstm_model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])



In [None]:
history = lstm_model.fit(
    X_train_under, y_train_under,
    validation_data=(X_test, y_test),
    epochs=5, batch_size=64, verbose=1,
    class_weight=class_weights_dict
)

Epoch 1/5
[1m933/933[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m526s[0m 557ms/step - accuracy: 0.7863 - loss: 0.4545 - val_accuracy: 0.8220 - val_loss: 0.4000
Epoch 2/5
[1m933/933[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m551s[0m 590ms/step - accuracy: 0.8598 - loss: 0.3242 - val_accuracy: 0.8026 - val_loss: 0.4390
Epoch 3/5
[1m933/933[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m594s[0m 637ms/step - accuracy: 0.8749 - loss: 0.2922 - val_accuracy: 0.8522 - val_loss: 0.3294
Epoch 4/5
[1m164/933[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m7:20[0m 573ms/step - accuracy: 0.8872 - loss: 0.2698

In [None]:
y_pred_lstm = (lstm_model.predict(X_test) > 0.5).astype(int)

print("🔹 LSTM Classification Report:\n", classification_report(y_test, y_pred_lstm))

conf_matrix = confusion_matrix(y_test, y_pred_lstm)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="coolwarm", xticklabels=["Negative", "Positive"], yticklabels=["Negative", "Positive"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("LSTM Model - Confusion Matrix")
plt.show()