In [55]:
import tensorflow as tf

In [56]:
print(tf.__version__)

2.2.0


In [57]:
import tensorflow_datasets as tfds

In [58]:
imdb, info = tfds.load("imdb_reviews", with_info = True, as_supervised = True)

In [59]:
import numpy as np
# Split data into testing and training
train_data, test_data = imdb['train'], imdb['test']

In [60]:
print(type(train_data))
print(type(test_data))



<class 'tensorflow.python.data.ops.dataset_ops.PrefetchDataset'>
<class 'tensorflow.python.data.ops.dataset_ops.PrefetchDataset'>


In [93]:
training_sentences = []
train_labels = []
testing_sentences = []
testing_labels = []

In [62]:
print(training_sentences)

[]


In [94]:
# Extracting labels and sentences from tensors
for s,l in train_data:
  training_sentences.append(s.numpy().decode('utf8'))
  train_labels.append(l.numpy())
  
for s,l in test_data:
  testing_sentences.append(s.numpy().decode('utf8'))
  testing_labels.append(l.numpy())


In [64]:
print(type(training_sentences))
print(len(training_sentences))
print(len(train_labels))
print(len(testing_sentences))

<class 'list'>
25000
25000
25000


In [65]:
# Sanity check
print(testing_sentences[0], testing_labels[0])
print(training_sentences[0], testing_labels[2])

There are films that make careers. For George Romero, it was NIGHT OF THE LIVING DEAD; for Kevin Smith, CLERKS; for Robert Rodriguez, EL MARIACHI. Add to that list Onur Tukel's absolutely amazing DING-A-LING-LESS. Flawless film-making, and as assured and as professional as any of the aforementioned movies. I haven't laughed this hard since I saw THE FULL MONTY. (And, even then, I don't think I laughed quite this hard... So to speak.) Tukel's talent is considerable: DING-A-LING-LESS is so chock full of double entendres that one would have to sit down with a copy of this script and do a line-by-line examination of it to fully appreciate the, uh, breadth and width of it. Every shot is beautifully composed (a clear sign of a sure-handed director), and the performances all around are solid (there's none of the over-the-top scenery chewing one might've expected from a film like this). DING-A-LING-LESS is a film whose time has come. 1
This was an absolutely terrible movie. Don't be lured in b

In [66]:
print(type(testing_labels))

<class 'list'>


In [95]:
# Converting labels to numpy arrays
train_labels_final = np.array(train_labels)
testing_labels_final = np.array(testing_labels)

In [68]:
print(train_labels_final.shape)
print(testing_labels_final.shape)

(25000,)
(25000,)


In [96]:
# Tokenizing the sentences - hyperparameters
vocab_size = 10000
embedding_dim = 16
max_length = 120
trunc_type = "post"
oov_tok = "<OOV>"

In [97]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [98]:
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences,maxlen=max_length)

In [79]:
print(testing_padded.shape)
print(len(testing_labels_final))
print(padded.shape)
print(len(train_labels_final))
print(testing_padded.shape)

(25000, 120)
25000
(25000, 120)
25000
(25000, 120)


In [80]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

print(decode_review(padded[3]))
print(training_sentences[3])

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? this is the kind of film for a snowy sunday afternoon when the rest of the world can go ahead with its own business as you <OOV> into a big arm chair and <OOV> for a couple of hours wonderful performances from cher and nicolas cage as always gently row the plot along there are no <OOV> to cross no dangerous waters just a warm and witty <OOV> through new york life at its best a family film in every sense and one that deserves the praise it received
This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with its own business as you descend into a big arm-chair and mellow for a couple of hours. Wonderful performances from Cher and Nicolas Cage (as always) gently row the plot along. There are no rapids to cross, no dangerous waters, just a warm and witty paddle through New York life at its best. A family film in every sense and one that deserves the praise it received.


In [99]:
# Defining the model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length = max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(6, activation = 'relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])

In [100]:
# Compile the model
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 120, 16)           160000    
_________________________________________________________________
flatten_8 (Flatten)          (None, 1920)              0         
_________________________________________________________________
dense_16 (Dense)             (None, 6)                 11526     
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 7         
Total params: 171,533
Trainable params: 171,533
Non-trainable params: 0
_________________________________________________________________


In [102]:
# Train the model
num_epochs = 10
model.fit(padded,
 train_labels_final,
 epochs=num_epochs,
 validation_data = (testing_padded, testing_labels_final))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fdef4201908>