In [70]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow.keras import models
from tensorflow.keras import layers

In [60]:
print(tf.__version__)

1.13.1


In [61]:
imdbReviews = keras.datasets.imdb
(train_images, train_labels), (test_images, test_labels) = imdbReviews.load_data(num_words=10000)

In [62]:
train_images.shape

(25000,)

In [63]:
data = np.concatenate((train_images, test_images), axis=0)
targets = np.concatenate((train_labels, test_labels), axis=0)

## Random stuff

In [64]:
print("Categories:", np.unique(targets))
print("Number of unique words:", len(np.unique(np.hstack(data))))

Categories: [0 1]
Number of unique words: 9998


In [65]:
length = [len(i) for i in data]
print("Average Review length:", np.mean(length))

Average Review length: 234.75892


## Vectorizing the data

In [66]:
# Vectorizing all reviews and filling them with 0s so that they all contain 10,000 numbers.

def vectorize(sequences, dimension = 10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results 

data = vectorize(data)
targets = np.array(targets).astype("float32")

## Splitting the data

In [67]:
# test will contain 10,000 reviews
test_x = data[:10000]
test_y = targets[:10000]

# traing will contain 40,000 reviews
train_x = data[10000:]
train_y = targets[10000:]

In [73]:
# Dropout rate has been kept between 20% - 50%
model = models.Sequential()

# Input - Layer
# RELU supposedly gives a good start and yields a satisfactory result.
model.add(layers.Dense(50, activation = "relu", input_shape=(10000, )))

# Hidden - Layers
model.add(layers.Dropout(0.3, noise_shape=None, seed=None))
model.add(layers.Dense(50, activation = "relu"))
model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
model.add(layers.Dense(50, activation = "relu"))

# Output- Layer
model.add(layers.Dense(1, activation = "sigmoid"))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 50)                500050    
_________________________________________________________________
dropout_2 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 50)                2550      
_________________________________________________________________
dropout_3 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 51        
Total params: 505,201
Trainable params: 505,201
Non-trainable params: 0
_________________________________________________________________


## Compile the model

In [74]:
model.compile(
 optimizer = "adam",
 loss = "binary_crossentropy",
 metrics = ["accuracy"]
)

## Result

In [75]:
results = model.fit(
 train_x, train_y,
 epochs= 2,
 batch_size = 500,
 validation_data = (test_x, test_y)
)

Train on 40000 samples, validate on 10000 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/2
Epoch 2/2


## Accuracy

In [77]:
print("Test Accuracy:", np.mean(results.history["val_acc"]))

Test Accuracy: 0.8929
