In [159]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam

In [160]:
# Load data
data = pd.read_csv(r'D:\BA prac\Practice\guest_interactions.csv')

In [161]:
print(data.columns)

Index(['review_id', 'guest_id', 'review_text', 'sentiment', 'booking_pattern',
       'satisfaction_score'],
      dtype='object')


In [162]:
# Step 2: Data preprocessing
# Clean and preprocess text data
data['review_text'] = data['review_text'].astype(str).str.lower().str.replace(r'[^a-z\s]', '', regex=True)
texts = data['review_text'].values
labels = data['sentiment'].values

In [163]:
# Encode interaction types as numerical labels
interaction_types = list(set(labels))
label_dict = {interaction: i for i, interaction in enumerate(interaction_types)}
encoded_labels = np.array([label_dict[label] for label in labels])

In [164]:

# Step 3: Tokenize the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [165]:
# Pad sequences for consistent input size
max_sequence_length = 100
X_text = pad_sequences(sequences, maxlen=max_sequence_length)

In [167]:
# Adding categorical features, ensure to process them properly
room_types_encoded = pd.get_dummies(data['booking_pattern']).values
X_combined = np.concatenate([X_text, room_types_encoded], axis=1)

In [168]:
# Use only text for simplicity in this iteration
X_combined = X_text

In [169]:
# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, encoded_labels, test_size=0.2, random_state=42)

In [170]:
# Step 5: Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=256))
model.add(LSTM(128, return_sequences=True))  # Increased units
model.add(Dropout(0.3))  # Adjusted dropout
model.add(LSTM(64))  # Possibly another LSTM layer
model.add(Dense(len(interaction_types), activation='softmax'))

In [171]:
# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [172]:
# Step 6: Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16s/step - accuracy: 0.6667 - loss: 1.0912 - val_accuracy: 0.0000e+00 - val_loss: 1.1849
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step - accuracy: 0.6667 - loss: 1.0281 - val_accuracy: 0.0000e+00 - val_loss: 1.2943
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - accuracy: 0.6667 - loss: 0.9499 - val_accuracy: 0.0000e+00 - val_loss: 1.4995
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - accuracy: 0.6667 - loss: 0.8545 - val_accuracy: 0.0000e+00 - val_loss: 1.9148
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - accuracy: 0.6667 - loss: 0.7286 - val_accuracy: 0.0000e+00 - val_loss: 2.6988
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.6667 - loss: 0.6677 - val_accuracy: 0.0000e+00 - val_loss: 3.5081
Epoch 7/20
[1m1/

In [173]:
# Step 7: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.0000e+00 - loss: 4.4787
Test Accuracy: 0.00


In [174]:
# Step 8: Make predictions
def predict_interaction(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length)
    prediction = model.predict(padded_sequence)
    predicted_label = interaction_types[np.argmax(prediction)]
    return predicted_label

In [175]:
# Example prediction
example_text = "Amazing experience, will definitely come back!"
predicted_interaction = predict_interaction(example_text)
print(f'Predicted Interaction Type: {predicted_interaction}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 958ms/step
Predicted Interaction Type: positive
