In [1]:
import os
import gensim
import numpy as np
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from collections import Counter
from bs4 import BeautifulSoup
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import contractions



In [2]:
def load_dataset(directory, label):
    data = []
    for filename in os.listdir(directory):
        with open(os.path.join(directory, filename), 'r') as file:
            review = file.read()
            data.append((review, label))
    return data

directory_path = "/Users/mrbinit/Downloads/aclImdb" 

train_pos_dir = os.path.join(directory_path, 'train', 'pos')
train_neg_dir = os.path.join(directory_path, 'train', 'neg')
test_pos_dir = os.path.join(directory_path, 'test', 'pos')
test_neg_dir = os.path.join(directory_path, 'test', 'neg')
val_pos_dir = os.path.join(directory_path, 'val', 'pos')
val_neg_dir = os.path.join(directory_path, 'val', 'neg')

train_data = load_dataset(train_pos_dir, 1) + load_dataset(train_neg_dir, 0) #1 represents positive and 0 represents negative sentiments
test_data = load_dataset(test_pos_dir, 1) + load_dataset(test_neg_dir, 0)

#split the test set into a validation set (15,000 samples) and a test set (10,000 samples)
val_data = test_data[:15000]
test_data = test_data[15000:25000]

#separate the reviews and labels from the train, test, and validation data
train_reviews, train_labels = zip(*train_data)
test_reviews, test_labels = zip(*test_data)
val_reviews, val_labels = zip(*val_data)

#check the lengths of train, test, and validation datasets
train_length = len(train_reviews)
test_length = len(test_reviews)
val_length = len(val_reviews)

print("Train dataset length:", train_length)
print("Test dataset length:", test_length)
print("Validation dataset length:", val_length)


Train dataset length: 25000
Test dataset length: 10000
Validation dataset length: 15000


In [3]:
print("Train Data:", train_reviews[:5])  
print("Test Data:", test_reviews[:5])   
print("Validation Data:", val_reviews[:5])

Train Data: ('For a movie that gets no respect there sure are a lot of memorable quotes listed for this gem. Imagine a movie where Joe Piscopo is actually funny! Maureen Stapleton is a scene stealer. The Moroni character is an absolute scream. Watch for Alan "The Skipper" Hale jr. as a police Sgt.', 'Bizarre horror movie filled with famous faces but stolen by Cristina Raines (later of TV\'s "Flamingo Road") as a pretty but somewhat unstable model with a gummy smile who is slated to pay for her attempted suicides by guarding the Gateway to Hell! The scenes with Raines modeling are very well captured, the mood music is perfect, Deborah Raffin is charming as Cristina\'s pal, but when Raines moves into a creepy Brooklyn Heights brownstone (inhabited by a blind priest on the top floor), things really start cooking. The neighbors, including a fantastically wicked Burgess Meredith and kinky couple Sylvia Miles & Beverly D\'Angelo, are a diabolical lot, and Eli Wallach is great fun as a wily p

In [4]:
#Regular expressions (regex) are sequences of characters that define a search pattern. They are used for string manipulation, searching, and pattern matching within text. 
import re
def has_html_tags(text):
    pattern = re.compile(r'<[^>]+>')  # Regular expression to match HTML tags
    return bool(pattern.search(text))

# Check for HTML tags in each dataset
def check_html_tags(dataset):
    for review, _ in dataset:
        if has_html_tags(review):
            return True
    return False

#check for HTML tags in each dataset
train_has_html = check_html_tags(train_data)
test_has_html = check_html_tags(test_data)
val_has_html = check_html_tags(val_data)
#print output
print("Train dataset contains HTML tags:", train_has_html)
print("Test dataset contains HTML tags:", test_has_html)
print("Validation dataset contains HTML tags:", val_has_html)


Train dataset contains HTML tags: True
Test dataset contains HTML tags: True
Validation dataset contains HTML tags: True


In [5]:
def has_url(text):
    pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    return bool(pattern.search(text))

#condition to check for url
def check_for_urls(dataset):
    for review, _ in dataset:
        if has_url(review):
            return True
    return False
#check whether there is URL or not
train_has_url = check_for_urls(train_data)
test_has_url = check_for_urls(test_data)
val_has_url = check_for_urls(val_data)

print("Train dataset contains URLs:", train_has_url)
print("Test dataset contains URLs:", test_has_url)
print("Validation dataset contains URLs:", val_has_url)

Train dataset contains URLs: True
Test dataset contains URLs: True
Validation dataset contains URLs: True


In [6]:
def has_special_characters(text):
    pattern = re.compile(r'[^a-zA-Z0-9\s]')
    return bool(pattern.search(text))

def check_for_special_characters(dataset):
    for review, _ in dataset:
        if has_special_characters(review):
            return True
    return False

train_has_special_chars = check_for_special_characters(train_data)
test_has_special_chars = check_for_special_characters(test_data)
val_has_special_chars = check_for_special_characters(val_data)
print("Train dataset contains special characters:", train_has_special_chars)
print("Test dataset contains special characters:", test_has_special_chars)
print("Validation dataset contains special characters:", val_has_special_chars)


Train dataset contains special characters: True
Test dataset contains special characters: True
Validation dataset contains special characters: True


In [7]:
#preprocess function
def preprocess_text(text):
    #remove  HTML tags
    text = BeautifulSoup(text, "html.parser").get_text()

    #remove urls
    text = re.sub(r'http\S+', '', text)

    #expand contractions 
    text = contractions.fix(text)

    #tokenize 
    tokens = word_tokenize(text)

    #remove non-alphabetic tokens and convert to lowercase
    tokens = [word.lower() for word in tokens if word.isalpha()]

    #remove stopwords 
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if not word in stop_words]

    #lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    return tokens

In [8]:
# Preprocess all the data
train_reviews_processed = [preprocess_text(review) for review in train_reviews]
test_reviews_processed = [preprocess_text(review) for review in test_reviews]
val_reviews_processed = [preprocess_text(review) for review in val_reviews]

  text = BeautifulSoup(text, "html.parser").get_text()


In [9]:
print("Cleaned Train Data:", train_reviews_processed[:5])  
print("Cleaned Test Data:", test_reviews_processed[:5])   
print("Cleaned Validation Data:", val_reviews_processed[:5])

Cleaned Train Data: [['movie', 'get', 'respect', 'sure', 'lot', 'memorable', 'quote', 'listed', 'gem', 'imagine', 'movie', 'joe', 'piscopo', 'actually', 'funny', 'maureen', 'stapleton', 'scene', 'stealer', 'moroni', 'character', 'absolute', 'scream', 'watch', 'alan', 'skipper', 'hale', 'police', 'sgt'], ['bizarre', 'horror', 'movie', 'filled', 'famous', 'face', 'stolen', 'cristina', 'raines', 'later', 'tv', 'flamingo', 'road', 'pretty', 'somewhat', 'unstable', 'model', 'gummy', 'smile', 'slated', 'pay', 'attempted', 'suicide', 'guarding', 'gateway', 'hell', 'scene', 'raines', 'modeling', 'well', 'captured', 'mood', 'music', 'perfect', 'deborah', 'raffin', 'charming', 'cristina', 'pal', 'raines', 'move', 'creepy', 'brooklyn', 'height', 'brownstone', 'inhabited', 'blind', 'priest', 'top', 'floor', 'thing', 'really', 'start', 'cooking', 'neighbor', 'including', 'fantastically', 'wicked', 'burgess', 'meredith', 'kinky', 'couple', 'sylvia', 'mile', 'beverly', 'diabolical', 'lot', 'eli', 'wa

In [10]:
def find_max_sequence_length(train_reviews_processed):
    max_length = 0
    for tokens in train_reviews_processed:
        sequence_length = len(tokens)
        if sequence_length > max_length:
            max_length = sequence_length
    return max_length

#assuming tokenized_texts is a list of tokenized texts after preprocessing
max_sequence_length = find_max_sequence_length(train_reviews_processed)
print("Maximum sequence length:", max_sequence_length)

Maximum sequence length: 1394


In [11]:
# Train Word2Vec model
word2vec_model = gensim.models.Word2Vec(sentences=train_reviews_processed + test_reviews_processed + val_reviews_processed,
                                        vector_size=100, window=5, min_count=5, workers=4)

# Function to get vector representation of a sentence
def get_sentence_vector(tokens):
    vector = np.zeros((100,))
    count = 0
    for word in tokens:
        if word in word2vec_model.wv:
            vector += word2vec_model.wv[word]
            count += 1
    if count != 0:
        vector /= count
    return vector
    
#get sentence vector for all the dataset 
train_vectors = [get_sentence_vector(tokens) for tokens in train_reviews_processed]
test_vectors = [get_sentence_vector(tokens) for tokens in test_reviews_processed]
val_vectors = [get_sentence_vector(tokens) for tokens in val_reviews_processed]

In [12]:
#convert lists to numpy arrays 
X_train = np.array(train_vectors)
X_test = np.array(test_vectors)
X_val = np.array(val_vectors)


y_train = np.array(train_labels)
y_test = np.array(test_labels)
y_val = np.array(val_labels)

In [14]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the CNN model
def build_cnn_model(input_shape):
    model = models.Sequential()
    
    # Convolutional layers
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    
    # Flatten layer
    model.add(layers.Flatten())
    
    # Dense layers
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))  # Output layer with softmax activation for classification
    
    return model

# Input shape of your data (assuming you're dealing with image data)
input_shape = (28, 28, 1)  # Example input shape (28x28 grayscale images)
model = build_cnn_model(input_shape)

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Since the labels are integers
              metrics=['accuracy'])

# Print model summary
model.summary()


In [25]:

from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# Assuming train_reviews_processed, test_reviews_processed, and val_reviews_processed are defined
# and contain lists of tokenized reviews.

# Vectorize text data
vectorizer = CountVectorizer(max_features=5000) # Adjust max_features as needed
train_vectors = vectorizer.fit_transform([' '.join(review) for review in train_reviews_processed])
test_vectors = vectorizer.transform([' '.join(review) for review in test_reviews_processed])
val_vectors = vectorizer.transform([' '.join(review) for review in val_reviews_processed])

# Pad sequences to ensure uniform length for input to CNN
max_len = 500 # Choose an appropriate length
X_train = pad_sequences(train_vectors.toarray(), maxlen=max_len)
X_test = pad_sequences(test_vectors.toarray(), maxlen=max_len)
X_val = pad_sequences(val_vectors.toarray(), maxlen=max_len)

# Define CNN model
model = Sequential()
model.add(Embedding(input_dim=vectorizer.vocabulary_.size+1, output_dim=100, input_length=max_len))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(35))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Assuming y_train, y_val, and y_test are defined and contain the corresponding labels
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val))

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)


AttributeError: 'dict' object has no attribute 'size'

In [16]:
import numpy as np

# Reshape sentence vectors to match the input shape of the model
X_train_reshaped = np.expand_dims(X_train, axis=-1)
X_val_reshaped = np.expand_dims(X_val, axis=-1)
X_test_reshaped = np.expand_dims(X_test, axis=-1)

# Train the model with reshaped input data
history = model.fit(X_train_reshaped, y_train, epochs=10, validation_data=(X_val_reshaped, y_val))

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test)
print("Test Accuracy:", test_accuracy)



Epoch 1/10


ValueError: Exception encountered when calling Conv2D.call().

[1mNegative dimension size caused by subtracting 3 from 1 for '{{node sequential_1_1/conv2d_1_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential_1_1/ExpandDims, sequential_1_1/conv2d_1_1/convolution/ReadVariableOp)' with input shapes: [?,100,1,1], [3,3,1,32].[0m

Arguments received by Conv2D.call():
  • inputs=tf.Tensor(shape=(None, 100, 1, 1), dtype=float32)

In [28]:
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# Assuming train_reviews_processed, test_reviews_processed, and val_reviews_processed are defined
# and contain lists of tokenized reviews.

# Vectorize text data
vectorizer = CountVectorizer(max_features=5000) # Adjust max_features as needed
train_vectors = vectorizer.fit_transform([' '.join(review) for review in train_reviews_processed])
test_vectors = vectorizer.transform([' '.join(review) for review in test_reviews_processed])
val_vectors = vectorizer.transform([' '.join(review) for review in val_reviews_processed])

# Pad sequences to ensure uniform length for input to CNN
max_len = 500 # Choose an appropriate length
X_train = pad_sequences(train_vectors.toarray(), maxlen=max_len)
X_test = pad_sequences(test_vectors.toarray(), maxlen=max_len)
X_val = pad_sequences(val_vectors.toarray(), maxlen=max_len)

# Define CNN model
model = Sequential()
model.add(Embedding(input_dim=len(vectorizer.vocabulary_)+1, output_dim=100))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(3)) # Adjusted pool size to 3
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Assuming y_train, y_val, and y_test are defined and contain the corresponding labels
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val))

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)


Epoch 1/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 170ms/step - accuracy: 0.5163 - loss: 0.6905 - val_accuracy: 0.5224 - val_loss: 0.7635
Epoch 2/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 161ms/step - accuracy: 0.6510 - loss: 0.6195 - val_accuracy: 0.6982 - val_loss: 0.6010
Epoch 3/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 174ms/step - accuracy: 0.6983 - loss: 0.5649 - val_accuracy: 0.7231 - val_loss: 0.5637
Epoch 4/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 167ms/step - accuracy: 0.7048 - loss: 0.5519 - val_accuracy: 0.6615 - val_loss: 0.6375
Epoch 5/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 196ms/step - accuracy: 0.7221 - loss: 0.5295 - val_accuracy: 0.7193 - val_loss: 0.5595
Epoch 6/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 182ms/step - accuracy: 0.7346 - loss: 0.5159 - val_accuracy: 0.7474 - val_loss: 0.5434
Epoch 7/10

In [41]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch

# Function to build the model with hyperparameters
def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=len(vectorizer.vocabulary_)+1, output_dim=hp.Int('embedding_dim', min_value=50, max_value=100, step=10)))
    
    # Add convolutional layers
    for i in range(hp.Int('num_conv_layers', 1, 3)):
        model.add(Conv1D(filters=hp.Int(f'conv_{i}_filters', min_value=64, max_value=128, step=16), 
                         kernel_size=hp.Int(f'conv_{i}_kernel_size', min_value=3, max_value=5, step=2), 
                         activation='relu'))
        model.add(MaxPooling1D(pool_size=hp.Int(f'pool_{i}_size', min_value=2, max_value=5, step=1)))

    model.add(Flatten())
    model.add(Dense(units=hp.Int('dense_units', min_value=64, max_value=128, step=16), activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_rate', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=Adam(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')), 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    
    return model


In [42]:
# Create tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='my_dir',
    project_name='helloworld')

# Search space summary
tuner.search_space_summary()

# Run hyperparameter search
tuner.search(X_train, y_train, epochs=5, validation_data=(X_val, y_val))

# Get best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build model with best hyperparameters
model = tuner.hypermodel.build(best_hps)

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)

Trial 5 Complete [00h 10m 02s]
val_accuracy: 0.7646222313245138

Best val_accuracy So Far: 0.7705555359522501
Total elapsed time: 00h 51m 45s
Epoch 1/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 132ms/step - accuracy: 0.5995 - loss: 0.6431 - val_accuracy: 0.7555 - val_loss: 0.5300
Epoch 2/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 119ms/step - accuracy: 0.7244 - loss: 0.5361 - val_accuracy: 0.7716 - val_loss: 0.5084
Epoch 3/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 119ms/step - accuracy: 0.7412 - loss: 0.5084 - val_accuracy: 0.7423 - val_loss: 0.5305
Epoch 4/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 111ms/step - accuracy: 0.7505 - loss: 0.4995 - val_accuracy: 0.7684 - val_loss: 0.5009
Epoch 5/10
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 117ms/step - accuracy: 0.7664 - loss: 0.4764 - val_accuracy: 0.7523 - val_loss: 0.5235
Epoch 6/10
[1m196/196[0m [32m━

In [58]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch

# Function to build the model with hyperparameters
def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=len(vectorizer.vocabulary_)+1, output_dim=hp.Int('embedding_dim', min_value=100, max_value=300, step=50)))
    
    # Add convolutional layers
    for i in range(hp.Int('num_conv_layers', 2, 4)):
        model.add(Conv1D(filters=hp.Int(f'conv_{i}_filters', min_value=64, max_value=128, step=16), 
                         kernel_size=hp.Int(f'conv_{i}_kernel_size', min_value=3, max_value=7, step=2), 
                         activation=hp.Choice(f'conv_{i}_activation', ['relu', 'tanh', 'sigmoid']),
                         kernel_regularizer=l2(hp.Float(f'conv_{i}_kernel_regularizer', min_value=1e-6, max_value=1e-3, sampling='log'))))
        model.add(MaxPooling1D(pool_size=hp.Int(f'pool_{i}_size', min_value=2, max_value=5, step=1)))

    model.add(Flatten())
    model.add(Dense(units=hp.Int('dense_units', min_value=128, max_value=256, step=32), activation=hp.Choice('dense_activation', ['relu', 'tanh', 'sigmoid'])))
    model.add(Dropout(rate=hp.Float('dropout_rate', min_value=0.3, max_value=0.6, step=0.1)))
    model.add(Dense(1, activation='sigmoid'))
    
    optimizer_choice = hp.Choice('optimizer', ['adam', 'rmsprop'])
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-3, sampling='log')
    if optimizer_choice == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    else:
        optimizer = RMSprop(learning_rate=learning_rate)
    
    model.compile(optimizer=optimizer, 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    
    return model

# Create tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,  # Increase the number of trials for better exploration
    executions_per_trial=3,
    directory='my2_dir',
    project_name='helloworld_v2')

# Search space summary
tuner.search_space_summary()

# Run hyperparameter search
tuner.search(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Get best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build model with best hyperparameters
model = tuner.hypermodel.build(best_hps)

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train, y_train, epochs=20, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)


Trial 1 Complete [00h 19m 47s]
val_accuracy: 0.8333333134651184

Best val_accuracy So Far: 0.8333333134651184
Total elapsed time: 00h 19m 47s

Search: Running Trial #2

Value             |Best Value So Far |Hyperparameter
100               |250               |embedding_dim
2                 |3                 |num_conv_layers
96                |64                |conv_0_filters
5                 |3                 |conv_0_kernel_size
sigmoid           |sigmoid           |conv_0_activation
2.1388e-06        |7.8941e-06        |conv_0_kernel_regularizer
5                 |4                 |pool_0_size
64                |128               |conv_1_filters
5                 |5                 |conv_1_kernel_size
tanh              |sigmoid           |conv_1_activation
3.4284e-05        |1.7368e-05        |conv_1_kernel_regularizer
4                 |3                 |pool_1_size
224               |128               |dense_units
relu              |sigmoid           |dense_activation
0.5    