In [0]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing
import tensorflow_datasets as tfds

In [0]:
max_len = 200   #Max length of all sentences (all sentences padded to this value)
n_words = 10000   #Only takes into account the 10000 most freq words in the imdb dataset
dim_embedding = 256  #No. of elements to represent each word

In [0]:
def load_data():
  (x_train,y_train),(x_test,y_test) = datasets.imdb.load_data(num_words=n_words)   

  #Pad the sentences to all be of same length
  x_train = preprocessing.sequence.pad_sequences(x_train,maxlen=max_len,)
  x_test = preprocessing.sequence.pad_sequences(x_test,maxlen=max_len)
  return (x_train, y_train), (x_test, y_test)


In [0]:
def build_model():
  model = models.Sequential()

  #Add embedding layer (To represent the sentences as matrices)
  model.add(layers.Embedding(input_dim = n_words ,output_dim = dim_embedding, input_length = max_len))
  model.add(layers.Dropout(0.3))
  model.add(layers.Conv1D(256 , 3,padding='valid',activation='relu'))
  model.add(layers.GlobalMaxPooling1D())
  model.add(layers.Dense(128,activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(1,activation='sigmoid'))
  return model

In [5]:
(x_train,y_train),(x_test, y_test) = load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [6]:
model = build_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 200, 256)          2560000   
_________________________________________________________________
dropout (Dropout)            (None, 200, 256)          0         
_________________________________________________________________
conv1d (Conv1D)              (None, 198, 256)          196864    
_________________________________________________________________
global_max_pooling1d (Global (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 1

In [7]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

score = model.fit(x_train,y_train,batch_size=500, epochs = 200, validation_data = (x_test, y_test))

score = model.evaluate(x_test,y_test,batch_size = 500)

print(f'Test score:{score[0]}')
print(f'Test acc: {score[1]}')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78