In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd 

import tensorflow as tf

import tensorflow_hub as hub
import tensorflow_datasets as tfds

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

Version:  2.0.0
Eager mode:  True
Hub version:  0.7.0
GPU is NOT AVAILABLE


In [2]:
# Split the training set into 60% and 40%, so we'll end up with 15,000 examples
# for training, 10,000 examples for validation and 25,000 examples for testing.
train_validation_split = tfds.Split.TRAIN.subsplit([6, 4])

(train_data, validation_data), test_data = tfds.load(
    name="imdb_reviews", 
    split=(train_validation_split, tfds.Split.TEST),
    as_supervised=True)

In [3]:
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)

In [4]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 20)                400020    
_________________________________________________________________
dense (Dense)                (None, 16)                336       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 400,373
Trainable params: 400,373
Non-trainable params: 0
_________________________________________________________________


In [5]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [6]:
history = model.fit(train_data.shuffle(10000).batch(512),
                    epochs=20,
                    validation_data=validation_data.batch(512),
                    verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [7]:
results = model.evaluate(test_data.batch(512), verbose=2)

for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name, value))

49/49 - 11s - loss: 0.3191 - accuracy: 0.8632
loss: 0.319
accuracy: 0.863


In [17]:
history.model.weights

[<tf.Variable 'Variable:0' shape=(20001, 20) dtype=float32, numpy=
 array([[-0.60565156, -0.01413231,  0.79470503, ...,  0.7346931 ,
         -0.341707  , -0.43011868],
        [-1.2047577 ,  0.21791467,  1.1384295 , ...,  0.74889004,
         -0.41222748,  0.11590002],
        [-1.1825986 , -0.27781248, -1.8687841 , ..., -1.071256  ,
          1.0310836 ,  0.14684631],
        ...,
        [ 0.13454673, -0.15173355,  0.5368477 , ...,  0.8319293 ,
         -0.61366147, -1.0842572 ],
        [ 0.20673902, -0.1418509 , -0.5856179 , ..., -0.02331898,
          1.6080966 , -0.6525672 ],
        [ 0.03065004, -0.0212786 , -0.06213012, ..., -0.08174069,
          0.03637259,  0.00763808]], dtype=float32)>,
 <tf.Variable 'dense/kernel:0' shape=(20, 16) dtype=float32, numpy=
 array([[-0.17215075, -0.29930273, -0.30928624, -0.4828537 ,  0.28336754,
         -0.08719461,  0.23182857,  0.01301501, -0.16657364, -0.1382503 ,
          0.45111033,  0.1275746 , -0.12049769,  0.32172298,  0.05298095,


In [9]:
predictions = model.predict (test_data.batch(512), verbose=2)

49/49 - 13s


In [10]:
predictions.shape

(25000, 1)

In [12]:
predictions[:10]

array([[0.9310888 ],
       [0.439357  ],
       [0.99846447],
       [0.99758685],
       [0.46627742],
       [0.9876996 ],
       [0.5802702 ],
       [0.9912725 ],
       [0.98471165],
       [0.27497506]], dtype=float32)

In [13]:
def getOutputDF (size):
    test_examples_batch, test_labels_batch = next(iter(test_data.batch(size)))
    LabelDf = pd.DataFrame (test_labels_batch,columns =  ['Sentiment'])
    TextDf = pd.DataFrame (test_examples_batch,columns =  ['Text'])
    DataDf = TextDf.join (LabelDf)
    DataDf ['Predicted'] = predictions[:size]
    return DataDf

In [14]:
getOutputDF (10)

Unnamed: 0,Text,Sentiment,Predicted
0,tf.Tensor(I've watched the movie actually seve...,1,0.931089
1,"tf.Tensor(If you love Japanese monster movies,...",1,0.439357
2,"tf.Tensor(A film for mature, educated audience...",1,0.998464
3,tf.Tensor(I never saw any of The League's work...,1,0.997587
4,"tf.Tensor(To start off, this happens to be my ...",1,0.466277
5,tf.Tensor(`Shadow Magic' recaptures the joy an...,1,0.9877
6,tf.Tensor(I wish I could have voted this movie...,0,0.58027
7,tf.Tensor(Set in 1962 Hong Kong (in turbulent ...,1,0.991273
8,tf.Tensor(Good footage of World War I-era ship...,1,0.984712
9,tf.Tensor(I am from Romania ... and for that i...,0,0.274975


In [18]:
OutputDF = getOutputDF (25000)

In [19]:
OutputDF.to_csv ('Predicted_Sentiment.csv',index=False)

In [21]:
#!mkdir -p saved_model
history.model.save('my_model.h5')