In [3]:
# base model, not tuned

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Embedding
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

df = pd.read_csv("../data/processed/preprocessed_reviews.csv")
print("Data shape:", df.shape)

# label ground truth with 1 if 4 stars or more, 0 if 2 stars or less
# 1 positive, 0 negative
df['label'] = df['score'].apply(lambda x: 1 if x >= 4 else 0)

# tokenize text data
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['content'])
sequences = tokenizer.texts_to_sequences(df['content'])

# replace out-of-vocab indices with 0 so we can pad
sequences = [[token if token < 3000 else 0 for token in seq] for seq in sequences]

# pad sequences to the same length
X = pad_sequences(sequences, maxlen=300)
y = df['label'].values

# split into 80:20 train:test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

df.head()

vocab_size = 10000
embedding_dim = 128
lstm_out = 196

# make model using some baseline hyperparameters
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
# model summary
model.summary()

history = model.fit(X_train, y_train, 
                    epochs=10, verbose=1, 
                    validation_split=0.1)

Data shape: (10381, 37)


Epoch 1/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 574ms/step - accuracy: 0.6720 - loss: 0.6020 - val_accuracy: 0.8351 - val_loss: 0.3727
Epoch 2/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 589ms/step - accuracy: 0.8747 - loss: 0.3285 - val_accuracy: 0.8520 - val_loss: 0.3630
Epoch 3/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 577ms/step - accuracy: 0.8921 - loss: 0.2759 - val_accuracy: 0.8628 - val_loss: 0.3244
Epoch 4/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 583ms/step - accuracy: 0.9125 - loss: 0.2445 - val_accuracy: 0.8700 - val_loss: 0.3478
Epoch 5/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 559ms/step - accuracy: 0.9181 - loss: 0.2208 - val_accuracy: 0.8568 - val_loss: 0.3512
Epoch 6/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 571ms/step - accuracy: 0.9337 - loss: 0.1823 - val_accuracy: 0.8616 - val_loss: 0.3646
Epoc

In [5]:
import random
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
binary_predictions = (y_pred > 0.5).astype(int)

# convert y_test to a numpy array
y_test_array = np.array(y_test)

# get 10 random indices
random_indices = random.sample(range(len(y_test_array)), 10)

# loop through random indices
for idx in random_indices:
    review_text = df.loc[idx, 'content']

    print(f"Review: {review_text[:200]}...")  # print first 200 chars of review
    print(f"True Label: {y_test_array[idx]}")  # print true sentiment of review (0 for negative, 1 for positive)
    print(f"Predicted Label: {binary_predictions[idx]}")  # print predicted sentiment of review (0 for negative, 1 for positive)
    print("------------------------------------------------------------------")

print("Accuracy:", accuracy_score(y_test, binary_predictions))

[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 92ms/step
Review: Good Clean app, No crash issues. No data loss. Easy to use widget....
True Label: 0
Predicted Label: [0]
------------------------------------------------------------------
Review: Extremely complicated to use. can't understand a thing. good app, but it needs a whole tutorial because it does so much...
True Label: 1
Predicted Label: [1]
------------------------------------------------------------------
Review: What good is a task management app if it cannot give reminders...
True Label: 1
Predicted Label: [1]
------------------------------------------------------------------
Review: Simple but flexible app for managing todo lists and notifications....
True Label: 1
Predicted Label: [1]
------------------------------------------------------------------
Review: I began using this app 18months ago after having a baby and feeling that i was never feeling organised. It has been a game changer in helping me sche

In [17]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time

# load and shuffle data
unshuffled_df = pd.read_csv("../data/processed/preprocessed_reviews.csv")
unshuffled_df['original_index'] = unshuffled_df.index
df = unshuffled_df.sample(frac=1, random_state=42).reset_index(drop=True)
df['label'] = df['score'].apply(lambda x: 1 if x >= 4 else 0)

# tokenize
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['content'])
sequences = tokenizer.texts_to_sequences(df['content'])

# replace out-of-vocab with 0
sequences = [[token if token < 3000 else 0 for token in seq] for seq in sequences]
X = pad_sequences(sequences, maxlen=300)
y = df['label'].values

# 80:20 train:test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# hyperparameter grid
learning_rates = [0.01, 0.001]
momentums = [0.5, 0.9]
lstm_units_list = [128, 256]
lstm_layers_list = [1, 2]

embedding_dim = 128
vocab_size = 10000

best_model = None
best_params = None
best_acc = 0

epochs = 5

# manual grid search
for lr in learning_rates:
    for momentum in momentums:
        for lstm_units in lstm_units_list:
            for lstm_layers in lstm_layers_list:
                print(f"\nTraining with lr={lr}, momentum={momentum}, lstm_units={lstm_units}, lstm_layers={lstm_layers}")
                start_time = time.time()
    
                # build model
                model = Sequential()
                model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim))
                for _ in range(lstm_layers - 1):
                    model.add(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
                model.add(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2))
                model.add(Dense(1, activation='sigmoid'))
    
                optimizer = SGD(learning_rate=lr, momentum=momentum)
                model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
                # train model
                history = model.fit(X_train, y_train, epochs=epochs, batch_size=64,
                                    validation_split=0.1, verbose=0)
    
                # evaluate model
                val_acc = history.history['val_accuracy'][-1]
                test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

                # track time
                elapsed = time.time() - start_time
                print(f"Validation Accuracy: {val_acc:.4f}, Test Accuracy: {test_acc:.4f}, Time: {elapsed:.2f} seconds")

                # check if model performance is better than previous best
                if val_acc > best_acc:
                    best_acc = test_acc
                    best_model = model
                    best_params = [lr, momentum, lstm_units]

print("Best test accuracy:",  best_acc)
print("Best params:", best_params)
best_model.save("lstm_best.keras")


Training with lr=0.01, momentum=0.9, lstm_units=128, lstm_layers=1, dropout=0.2
Validation Accuracy: 0.5319, Test Accuracy: 0.5359, Time: 45.11 seconds
Best test accuracy: 0.5358690619468689
Best params: [0.01, 0.9, 128]


In [19]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time

# load and shuffle data
unshuffled_df = pd.read_csv("../data/processed/preprocessed_reviews.csv")
unshuffled_df['original_index'] = unshuffled_df.index
df = unshuffled_df.sample(frac=1, random_state=42).reset_index(drop=True)
df['label'] = df['score'].apply(lambda x: 1 if x >= 4 else 0)

# tokenize
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['content'])
sequences = tokenizer.texts_to_sequences(df['content'])

# replace out-of-vocab with 0
sequences = [[token if token < 3000 else 0 for token in seq] for seq in sequences]
X = pad_sequences(sequences, maxlen=300)
y = df['label'].values

# train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# chosen hyperparameters
lr = 0.01
momentum = 0.9
lstm_units = 128
lstm_layers = 1
embedding_dim = 128
vocab_size = 10000

epochs = 100

print(f"\nTraining with lr={lr}, momentum={momentum}, lstm_units={lstm_units}, lstm_layers={lstm_layers}")
start_time = time.time()

# build model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim))
model.add(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
optimizer = SGD(learning_rate=lr, momentum=momentum)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# train model
history = model.fit(X_train, y_train, epochs=epochs, batch_size=64,
                    validation_split=0.1, verbose=1)

# briefly evaluate model (more detailed evaluation in later cell)
val_acc = history.history['val_accuracy'][-1]
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=1)

# calculate time took to train model
elapsed = time.time() - start_time
print(f"Validation Accuracy: {val_acc:.4f}, Test Accuracy: {test_acc:.4f}, Time: {elapsed:.2f} seconds")


Training with lr=0.01, momentum=0.9, lstm_units=128, lstm_layers=1
Epoch 1/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 343ms/step - accuracy: 0.5068 - loss: 0.6916 - val_accuracy: 0.5319 - val_loss: 0.6845
Epoch 2/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 346ms/step - accuracy: 0.5529 - loss: 0.6784 - val_accuracy: 0.5909 - val_loss: 0.6610
Epoch 3/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 344ms/step - accuracy: 0.5968 - loss: 0.6660 - val_accuracy: 0.6005 - val_loss: 0.6594
Epoch 4/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 346ms/step - accuracy: 0.5948 - loss: 0.6624 - val_accuracy: 0.6065 - val_loss: 0.6561
Epoch 5/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 353ms/step - accuracy: 0.6072 - loss: 0.6577 - val_accuracy: 0.6101 - val_loss: 0.6510
Epoch 6/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 353ms/step - accuracy: 

In [21]:
import random
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
binary_predictions = (y_pred > 0.5).astype(int)

# convert y_test to a numpy array
y_test_array = np.array(y_test)

# get 10 random indices
random_indices = random.sample(range(len(y_test_array)), 10)

# loop through random indices
for idx in random_indices:
    review_text = unshuffled_df.loc[df.loc[idx, 'original_index'], 'content']

    print(f"Review: {review_text[:200]}...")  # print first 200 chars of review
    print(f"True Label: {y_test_array[idx]}")  # print true sentiment of review (0 for negative, 1 for positive)
    print(f"Predicted Label: {binary_predictions[idx]}")  # print predicted sentiment of review (0 for negative, 1 for positive)
    print("------------------------------------------------------------------")

print("Accuracy:", accuracy_score(y_test, binary_predictions))


[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 44ms/step
Review: Nice 👍 one...
True Label: 0
Predicted Label: [0]
------------------------------------------------------------------
Review: This is great! After having played it for three days I was begging my mom for chores to do!...
True Label: 0
Predicted Label: [0]
------------------------------------------------------------------
Review: Difficult to use...
True Label: 1
Predicted Label: [0]
------------------------------------------------------------------
Review: Hum hai al Ghj...
True Label: 0
Predicted Label: [0]
------------------------------------------------------------------
Review: The backup feature does not work. I am stuck having to copy over 1000 items in my lists manually. I understand syncing is a premium feature, but the backup feature is not and I wish I hadn't bothered ...
True Label: 1
Predicted Label: [1]
------------------------------------------------------------------
Review: Excellent...
True

In [36]:
import os
import json
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# get predictions
pred_out = model.predict(X_test)
binary_predictions = (y_pred > 0.5).astype(int)
y_test_array = np.array(y_test)

# compute metrics
acc  = accuracy_score(y_test_array, binary_predictions)
prec = precision_score(y_test_array, binary_predictions)
rec  = recall_score(y_test_array, binary_predictions)
f1   = f1_score(y_test_array, binary_predictions)

clf_rep = classification_report(
    y_test_array, binary_predictions,
    target_names=["Negative", "Positive"],
    digits=4
)

# create evaluation directory if doesn't exist
os.makedirs("../evaluation", exist_ok=True)

# save numeric metrics as JSON
metrics = {
    "model": "lstm",
    "accuracy": acc,
    "precision": prec,
    "recall": rec,
    "f1": f1
}
with open("../evaluation/lstm_metrics.json", "w") as fp:
    json.dump(metrics, fp, indent=2)

# create confusion matrix plo
cm = confusion_matrix(y_test_array, binary_predictions)
plt.figure(figsize=(5, 4))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=["Neg","Pos"],
    yticklabels=["Neg","Pos"]
)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("LSTM Confusion Matrix")
plt.tight_layout()
plt.savefig("../evaluation/lstm_confusion_matrix.png")
plt.close()

print("evaluation metrics & plots saved to /evaluation/")

[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step
evaluation metrics & plots saved to /evaluation/


In [37]:
# create models directory
os.makedirs("../models", exist_ok=True)

# save model
model.save('../models/lstm-sentiment.keras')
print("model saved to /models/")

model saved to /models/
