In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
import tensorflow_datasets as tfds


dataset, info = tfds.load('yelp_polarity_reviews', with_info=True)




In [None]:
import tensorflow_datasets as tfds

train_data, test_data = tfds.load('yelp_polarity_reviews', split=['train', 'test'], as_supervised=True)


training_sentences = []
training_labels = []

testing_sentences = []
testing_labels = []

for s,l in train_data:
  training_sentences.append(s.numpy().decode('utf8'))
  training_labels.append(l.numpy())

for s,l in test_data:
  testing_sentences.append(s.numpy().decode('utf8'))
  testing_labels.append(l.numpy())

training_labels_final = np.array(training_labels)
testing_labels_final = np.array(testing_labels)

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

vocab_size = 10000
max_length = 120
trunc_type='post'
oov_tok = "<OOV>"

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def preprocess_text(text):
    text = text.lower()

    text = re.sub(r'[^a-zA-Z\s]', '', text)

    tokens = word_tokenize(text)

    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]

    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

training_sentences_preprocessed = [preprocess_text(sentence) for sentence in training_sentences]
testing_sentences_preprocessed = [preprocess_text(sentence) for sentence in testing_sentences]

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)

tokenizer.fit_on_texts(training_sentences_preprocessed)
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(training_sentences_preprocessed)
padded = pad_sequences(sequences, maxlen=max_length, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences_preprocessed)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length)



In [None]:
import matplotlib.pyplot as plt

def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()

In [None]:
embedding_dim = 16
dense_dim = 6

model_flatten = tf.keras.Sequential([   #adding layer seq
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(dense_dim, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_flatten.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

model_flatten.summary()

In [None]:
NUM_EPOCHS = 5
BATCH_SIZE = 128

history_flatten = model_flatten.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

In [None]:
plot_graphs(history_flatten, 'accuracy')
plot_graphs(history_flatten, 'loss')

In [None]:
adjusted_learning_rate = 0.0001
adjusted_batch_size = 64

model_flatten.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=adjusted_learning_rate), metrics=['accuracy'])

history_fine_flatten = model_flatten.fit(padded,
                                         training_labels_final,
                                         epochs=NUM_EPOCHS,
                                         batch_size=adjusted_batch_size,
                                         validation_data=(testing_padded, testing_labels_final))

plot_graphs(history_fine_flatten, 'accuracy')
plot_graphs(history_fine_flatten, 'loss')


In [None]:
embedding_dim = 16
lstm_dim = 32
dense_dim = 6

model_lstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_dim)),
    tf.keras.layers.Dense(dense_dim, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

model_lstm.summary()

In [None]:
NUM_EPOCHS = 5
BATCH_SIZE = 128

history_lstm = model_lstm.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

In [None]:
plot_graphs(history_lstm, 'accuracy')
plot_graphs(history_lstm, 'loss')

In [None]:
adjusted_learning_rate = 0.0001
adjusted_batch_size = 64

model_lstm.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=adjusted_learning_rate), metrics=['accuracy'])

history_fine_lstm = model_lstm.fit(padded,
                                   training_labels_final,
                                   epochs=NUM_EPOCHS,
                                   batch_size=adjusted_batch_size,
                                   validation_data=(testing_padded, testing_labels_final))

plot_graphs(history_fine_lstm, 'accuracy')
plot_graphs(history_fine_lstm, 'loss')


In [None]:
import tensorflow as tf

embedding_dim = 16
gru_dim = 32
dense_dim = 6

model_gru = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_dim)),
    tf.keras.layers.Dense(dense_dim, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_gru.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

model_gru.summary()

In [None]:
NUM_EPOCHS = 5
BATCH_SIZE = 128

history_gru = model_gru.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

In [None]:
plot_graphs(history_gru, 'accuracy')
plot_graphs(history_gru, 'loss')

In [None]:
adjusted_learning_rate = 0.0001
adjusted_batch_size = 64

model_gru.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=adjusted_learning_rate), metrics=['accuracy'])

history_fine_gru = model_gru.fit(padded,
                                 training_labels_final,
                                 epochs=NUM_EPOCHS,
                                 batch_size=adjusted_batch_size,
                                 validation_data=(testing_padded, testing_labels_final))

plot_graphs(history_fine_gru, 'accuracy')
plot_graphs(history_fine_gru, 'loss')


In [None]:
embedding_dim = 16
filters = 128 #detect featuresMap of each filter
kernel_size = 5
dense_dim = 6

model_conv = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Conv1D(filters, kernel_size, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(), #avg of featureMap
    tf.keras.layers.Dense(dense_dim, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_conv.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

model_conv.summary()

In [None]:
NUM_EPOCHS = 5
BATCH_SIZE = 128 # numer of samples to process

history_conv = model_conv.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

In [None]:
plot_graphs(history_conv, 'accuracy')
plot_graphs(history_conv, 'loss')

In [None]:
adjusted_learning_rate = 0.0001
adjusted_batch_size = 64

model_conv.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=adjusted_learning_rate), metrics=['accuracy'])

history_fine = model_conv.fit(padded,
                              training_labels_final,
                              epochs=NUM_EPOCHS,  # Corrected variable name here
                              batch_size=adjusted_batch_size,
                              validation_data=(testing_padded, testing_labels_final))

plot_graphs(history_fine, 'accuracy')
plot_graphs(history_fine, 'loss')


In [None]:
import tensorflow as tf

vocab_size = 10000
max_length = 120
embedding_dim = 16
lstm_dim = 32
dense_dim = 6
dropout_rate = 0.2

# Define the model
model_lstm_dropout = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_dim, return_sequences=True)),
    tf.keras.layers.Dropout(dropout_rate),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(lstm_dim)),
    tf.keras.layers.Dropout(dropout_rate),
    tf.keras.layers.Dense(dense_dim, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model_lstm_dropout.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Display the model summary
model_lstm_dropout.summary()

# Assuming padded, training_labels_final, testing_padded, and testing_labels_final are defined
history_lstm_dropout = model_lstm_dropout.fit(
    padded, training_labels_final,
    epochs=1,
    batch_size=128,
    validation_data=(testing_padded, testing_labels_final)
)

# Print accuracy after dropout
print("Accuracy after dropout:", history_lstm_dropout.history['accuracy'][-1])
print("Validation accuracy after dropout:", history_lstm_dropout.history['val_accuracy'][-1])


In [None]:
history_lstm = model_lstm.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

lstm_loss, lstm_accuracy = model_lstm.evaluate(testing_padded, testing_labels_final)
print("LSTM Model - Loss:", lstm_loss)
print("LSTM Model - Accuracy:", lstm_accuracy)

history_flatten = model_flatten.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

flatten_loss, flatten_accuracy = model_flatten.evaluate(testing_padded, testing_labels_final)
print("Flatten Model - Loss:", flatten_loss)
print("Flatten Model - Accuracy:", flatten_accuracy)

history_gru = model_gru.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

gru_loss, gru_accuracy = model_gru.evaluate(testing_padded, testing_labels_final)
print("GRU Model - Loss:", gru_loss)
print("GRU Model - Accuracy:", gru_accuracy)

history_conv = model_conv.fit(padded, training_labels_final, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_data=(testing_padded, testing_labels_final))

conv_loss, conv_accuracy = model_conv.evaluate(testing_padded, testing_labels_final)
print("Conv1D Model - Loss:", conv_loss)
print("Conv1D Model - Accuracy:",conv_accuracy)
