In [None]:
# Get IMDB dataset
!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -xf aclImdb_v1.tar.gz

In [1]:
#1 Create the BoW feature vectors
from sklearn.feature_extraction.text import CountVectorizer
import os

# Define the path to the dataset
dataset_path = 'aclImdb/'

# Load the dataset
def load_dataset(split):
    texts = []
    labels = []
    for category in ['pos', 'neg']:
        category_path = os.path.join(dataset_path, split, category)
        for filename in os.listdir(category_path):
            with open(os.path.join(category_path, filename), 'r', encoding='utf-8') as file:
                text = file.read()
                texts.append(text)
                labels.append(1 if category == 'pos' else 0)
    return texts, labels

# Load the training and testing datasets
train_texts, train_labels = load_dataset('train')
test_texts, test_labels = load_dataset('test')

# Initialize the CountVectorizer
vectorizer = CountVectorizer(max_features=5000)  # You can adjust max_features if needed

# Fit and transform the training data
X_train = vectorizer.fit_transform(train_texts)

# Transform the testing data
X_test = vectorizer.transform(test_texts)


In [2]:
#2. Logistic Regression
from sklearn.linear_model import LogisticRegression

# Initialize the Logistic Regression model
lr_model = LogisticRegression(max_iter=1000)

# Train the model
lr_model.fit(X_train, train_labels)

# Evaluate the model
lr_train_accuracy = lr_model.score(X_train, train_labels)
lr_test_accuracy = lr_model.score(X_test, test_labels)

print(f"Logistic Regression - Training Accuracy: {lr_train_accuracy:.4f}")
print(f"Logistic Regression - Testing Accuracy: {lr_test_accuracy:.4f}")


Logistic Regression - Training Accuracy: 0.9639
Logistic Regression - Testing Accuracy: 0.8514


In [3]:
# MLP
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

# Convert sparse matrix to dense numpy array
X_train_dense = X_train.toarray()
X_test_dense = X_test.toarray()

# Initialize the MLP model
mlp_model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dense(1, activation='sigmoid')
])

# Compile the model
mlp_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Convert labels to numpy array
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

# Train the model
mlp_model.fit(X_train_dense, train_labels, epochs=5, batch_size=32, validation_data=(X_test_dense, test_labels))

# Evaluate the model
mlp_train_accuracy = mlp_model.evaluate(X_train_dense, train_labels, verbose=0)[1]
mlp_test_accuracy = mlp_model.evaluate(X_test_dense, test_labels, verbose=0)[1]

print(f"MLP - Training Accuracy: {mlp_train_accuracy:.4f}")
print(f"MLP - Testing Accuracy: {mlp_test_accuracy:.4f}")

2023-10-04 13:18:14.715909: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-04 13:18:14.770085: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-04 13:18:14.770126: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-04 13:18:14.770152: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-04 13:18:14.779175: I tensorflow/core/platform/cpu_feature_g

Epoch 1/5


2023-10-04 13:18:22.480681: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f6de3b1d800 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-10-04 13:18:22.480720: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
2023-10-04 13:18:22.480726: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (1): Tesla V100-SXM2-32GB, Compute Capability 7.0
2023-10-04 13:18:22.486440: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-10-04 13:18:22.655935: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2023-10-04 13:18:22.789076: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
MLP - Training Accuracy: 0.9918
MLP - Testing Accuracy: 0.8588


In [4]:
# CNN
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# Tokenize and pad the sequences
max_words = 5000
maxlen = 100  # Adjust as needed
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_texts)
X_train = tokenizer.texts_to_sequences(train_texts)
X_test = tokenizer.texts_to_sequences(test_texts)

X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

# Initialize the CNN model with embedding layer
cnn_model = Sequential([
    Embedding(input_dim=max_words, output_dim=50, input_length=maxlen),
    Conv1D(128, 5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(1, activation='sigmoid')
])

# Compile the model
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Convert labels to numpy array
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

# Train the model
cnn_model.fit(X_train, train_labels, epochs=5, batch_size=32, validation_data=(X_test, test_labels))

# Evaluate the model
cnn_train_accuracy = cnn_model.evaluate(X_train, train_labels, verbose=0)[1]
cnn_test_accuracy = cnn_model.evaluate(X_test, test_labels, verbose=0)[1]

print(f"CNN - Training Accuracy: {cnn_train_accuracy:.4f}")
print(f"CNN - Testing Accuracy: {cnn_test_accuracy:.4f}")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CNN - Training Accuracy: 0.9968
CNN - Testing Accuracy: 0.8500


In [7]:
# 3. SGD and Adam optimizers
import optuna
from sklearn.linear_model import LogisticRegression
from keras.optimizers import SGD, Adam

# Define the objective function for Logistic Regression
def objective_lr(trial):
    C = trial.suggest_float('C', 1e-5, 1e5)

    # Initialize the Logistic Regression model
    lr_model = LogisticRegression(max_iter=1000, C=C)

    # Train the model
    lr_model.fit(X_train, train_labels)

    # Evaluate the model
    accuracy = lr_model.score(X_test, test_labels)

    return accuracy

# Define the objective function for CNN
def objective_cnn_adam(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)

    # Initialize the CNN model
    cnn_model = Sequential([
        Embedding(input_dim=max_words, output_dim=50, input_length=maxlen),
        Conv1D(128, 5, activation='relu'),
        GlobalMaxPooling1D(),
        Dense(1, activation='sigmoid')
    ])

    # Compile the model with Adam optimizer
    optimizer = Adam(learning_rate=learning_rate)  # Use Adam optimizer here
    cnn_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Convert sparse matrices to dense
    X_train_dense = X_train
    X_test_dense = X_test

    # Train the model
    cnn_model.fit(X_train_dense, train_labels, epochs=10, batch_size=32, validation_data=(X_test_dense, test_labels), verbose=0)

    # Evaluate the model
    _, accuracy = cnn_model.evaluate(X_test_dense, test_labels, verbose=0)

    return accuracy

# Define the objective function for CNN with SGD optimizer
def objective_cnn_sgd(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)

    # Initialize the CNN model
    cnn_model = Sequential([
        Embedding(input_dim=max_words, output_dim=50, input_length=maxlen),
        Conv1D(128, 5, activation='relu'),
        GlobalMaxPooling1D(),
        Dense(1, activation='sigmoid')
    ])

    # Compile the model with SGD optimizer
    optimizer = SGD(learning_rate=learning_rate)  # Use SGD optimizer here
    cnn_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Convert sparse matrices to dense
    X_train_dense = X_train
    X_test_dense = X_test

    # Train the model
    cnn_model.fit(X_train_dense, train_labels, epochs=10, batch_size=32, validation_data=(X_test_dense, test_labels), verbose=0)

    # Evaluate the model
    _, accuracy = cnn_model.evaluate(X_test_dense, test_labels, verbose=0)

    return accuracy

# Create a study for Logistic Regression
study_lr = optuna.create_study(direction='maximize')
study_lr.optimize(objective_lr, n_trials=5)

# Get the best hyperparameters for Logistic Regression
best_params_lr = study_lr.best_params
best_accuracy_lr = study_lr.best_value

# Create a study for CNN using adam optimizer
study_cnn_adam = optuna.create_study(direction='maximize')
study_cnn_adam.optimize(objective_cnn_adam, n_trials=50)

# Create a study for CNN using adam optimizer
study_cnn_sgd = optuna.create_study(direction='maximize')
study_cnn_sgd.optimize(objective_cnn_sgd, n_trials=50)

# Get the best hyperparameters for CNN-adam
best_params_cnn_adam = study_cnn_adam.best_params
best_accuracy_cnn_adam = study_cnn_adam.best_value

# Get the best hyperparameters for CNN-sgd
best_params_cnn_sgd = study_cnn_sgd.best_params
best_accuracy_cnn_sgd = study_cnn_sgd.best_value

# Print the results
print(f"Logistic Regression - Best Parameters: {best_params_lr}")
print(f"Logistic Regression - Best Accuracy: {best_accuracy_lr:.4f}")

print(f"CNN(Adam optimizer) - Best Parameters: {best_params_cnn_adam}")
print(f"CNN(Adam optimizer) - Best Accuracy: {best_accuracy_cnn_adam:.4f}")

print(f"CNN(SGD optimizer) - Best Parameters: {best_params_cnn_sgd}")
print(f"CNN(SGD optimizer) - Best Accuracy: {best_accuracy_cnn_sgd:.4f}")


[I 2023-10-04 18:53:25,514] A new study created in memory with name: no-name-3b54b0ab-6121-435d-b22c-87356bb65c4b


[I 2023-10-04 18:53:25,606] Trial 0 finished with value: 0.5116 and parameters: {'C': 97285.13274100378}. Best is trial 0 with value: 0.5116.
[I 2023-10-04 18:53:25,703] Trial 1 finished with value: 0.5116 and parameters: {'C': 44666.142708819956}. Best is trial 0 with value: 0.5116.
[I 2023-10-04 18:53:25,835] Trial 2 finished with value: 0.5116 and parameters: {'C': 35911.804711238874}. Best is trial 0 with value: 0.5116.
[I 2023-10-04 18:53:25,975] Trial 3 finished with value: 0.5116 and parameters: {'C': 17140.220305938765}. Best is trial 0 with value: 0.5116.
[I 2023-10-04 18:53:26,119] Trial 4 finished with value: 0.5116 and parameters: {'C': 48814.04590725136}. Best is trial 0 with value: 0.5116.
[I 2023-10-04 18:53:26,121] A new study created in memory with name: no-name-92d6f70b-c0e9-40f0-9d0d-7d7bed6b97e9
[I 2023-10-04 18:54:55,519] Trial 0 finished with value: 0.8367999792098999 and parameters: {'learning_rate': 0.0004145185953903112}. Best is trial 0 with value: 0.836799979

Logistic Regression - Best Parameters: {'C': 97285.13274100378}
Logistic Regression - Best Accuracy: 0.5116
CNN(Adam optimizer) - Best Parameters: {'learning_rate': 0.0031619903466632814}
CNN(Adam optimizer) - Best Accuracy: 0.8610
CNN(SGD optimizer) - Best Parameters: {'learning_rate': 0.009741775202271735}
CNN(SGD optimizer) - Best Accuracy: 0.6669


# Comparison of Results:

## Logistic Regression

- Best Parameters: {'C': 97285.13274100378}
- Accuracy: 0.5116

This result is slightly better than random guessing. The 'C' parameter denotes the inverse of regularization strength, with smaller values specifying stronger regularization.

## CNN (Adam optimizer)

- Best Parameters: {'learning_rate': 0.0031619903466632814}
- Accuracy: 0.8610

This suggests that Adam provides a much better accuracy for the given problem using the Convolutional Neural Network (CNN) framework.

## CNN (SGD optimizer)

- Best Parameters: {'learning_rate': 0.009741775202271735}
- Accuracy: 0.6669

Although SGD is a widely used optimizer, in this instance, it lagged behind Adam in terms of accuracy.

# Replication and Merits of Adam vs. SGD:

The paper suggests that while Adam tends to converge faster than vanilla SGD and SGD with momentum, it may not generalize as well. However, well-tuned SGD with momentum can surpass Adam in terms of both training and test error. This might align with the results provided, where Adam has higher accuracy than SGD in the CNN models.

The claim of the “marginal value” of adaptive gradient methods (as cited in [222]) did not deter the growing use of Adam, especially in other domains such as GANs and reinforcement learning.

# Hyperparameter Optimization:

The provided results indicate that hyperparameter tuning was performed, given the 'Best Parameters' for each model. The paper remarks on the "tunability" of Adam, suggesting that it is more flexible and less sensitive to hyperparameter choices compared to SGD.

As mentioned on page 24, hyperparameter optimization might render the empirical results less relevant. This is because optimized hyperparameters can overshadow the innate benefits of an algorithm, making it challenging to make general comparisons.
