### IMDB RNN Using Tensor Flow and Keras
Sentiment analysis on the IMDB movie review dataset

# use bash install to ensure environment setup
!pip install tensorflow

In [1]:
# code to optimize GPU usage

import tensorflow as tf
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Use the first GPU

print("Built with CUDA: ", tf.test.is_built_with_cuda())


gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
else:
    print("No GPU was detected. This code will run on CPU.")

for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


2023-11-09 21:37:39.457476: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from pandas.core.computation.check import NUMEXPR_INSTALLED


Built with CUDA:  True
No GPU was detected. This code will run on CPU.


2023-11-09 21:37:42.811966: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2023-11-09 21:37:42.812012: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: neutrino
2023-11-09 21:37:42.812027: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: neutrino
2023-11-09 21:37:42.812166: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 470.223.2
2023-11-09 21:37:42.812191: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 470.199.2
2023-11-09 21:37:42.812198: E tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:312] kernel version 470.199.2 does not match DSO version 470.223.2 -- cannot find working devices in this configuration


In [2]:
# make nessicary imports
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

**Load and Prepare the IMDB dataset for training**

In [3]:
#movie_data = imdb.load_data()
#print(movie_data)

# set the number of words to consider to the n most frequent words
max_features = 10000

#set the sequence length, padds shorters sequences and truncates shorter ones
maxlen = 500

# load the IMDB data into 
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# imloy reversing the data set for better sentament analysis and pad the sequences
x_train = [x[::-1] for x in x_train]
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)

x_test = [x[::-1] for x in x_test]
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

**Create the RNN Architecture using Keras**

In [4]:
# define the input layer 
inputs = Input(shape=(None,), dtype='int32')

# add an  embedded layer that encodes the input vector of size 128
x = tf.keras.layers.Embedding(max_features, 128)(inputs)

# add long short term memory with 64 units and return entire sequence
x = LSTM(64, return_sequences=True)(x)

#add another layer with 64 units and do not return the full sequence just output
x = LSTM(64)(x)

# define the structure of the output as dense layer with single neuron
outputs = Dense(1, activation="sigmoid")(x)

# define model with inputs and outputs
model = Model(inputs,outputs)

# print a model summary to check design
model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 128)         1280000   
                                                                 
 lstm (LSTM)                 (None, None, 64)          49408     
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 1362497 (5.20 MB)
Trainable params: 1362497 (5.20 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


**Compile the Model**

In [5]:
# optimizer set to 'adam' to adaptively adjust weights
# choose loss and metrics to match binary output
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

**Train the Model**

In [6]:
# trains for 10 epochs
# update weights every 128 samples
# keep 20% of the data for validation
history = model.fit(x_train, y_train, batch_size=128, epochs=10, validation_split=0.2)

Epoch 1/10
  3/157 [..............................] - ETA: 2:49 - loss: 0.6932 - accuracy: 0.4948

KeyboardInterrupt: 

**Evaluate the Model**

In [None]:
# test the model on the test data set
test_loss, test_acc = model.evaluate(x_test, y_test)

# print the loss
print(f"Test Loss: {test_loss}, Test Accuracy: {test_acc}")

Test Loss: 0.5500058531761169, Test Accuracy: 0.8198400139808655


**Make Predictions**

In [None]:
#  Generates output predictions for the input samples from the test set.
predictions = model.predict(x_test)


**Save the model**

In [None]:
model.save("imdb_rnn_model.h5")


In [None]:
print(predictions)

[[0.0091227 ]
 [0.99352515]
 [0.9960085 ]
 ...
 [0.01743037]
 [0.29827625]
 [0.29701084]]
