In [None]:
import os
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

In [None]:
print('TensorFlow version:: ', tf.__version__)
print('Eager Training', tf.executing_eagerly())
print('TF hub version', hub.__version__)

### **Downloading Imdb Dataset**
> **Divide dataset into training and validation sets**

In [3]:
# tfds uses same datasets tfds.load function to import data
# params [name: 'Name of dataset', split: 'Train/Validation ratio', 'test', as_supervised: True/False ]
#  

data = tfds.load(
    name= 'imdb_reviews',
    split=('train[60%:]', 'train[:60%]', 'test'),
    as_supervised= True
)

[1mDownloading and preparing dataset imdb_reviews/plain_text/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]





0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteDV98OC/imdb_reviews-train.tfrecord


  0%|          | 0/25000 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteDV98OC/imdb_reviews-test.tfrecord


  0%|          | 0/25000 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteDV98OC/imdb_reviews-unsupervised.tfrecord


  0%|          | 0/50000 [00:00<?, ? examples/s]



[1mDataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.[0m


### **Exploring the dataset**

In [4]:
tr_data, val_data, ts_data = data

# Printing first 10 data texts and their labels
sample_data, sample_labels = next(iter(tr_data.batch(10)))
print(f'Printing out sample Data::\n', sample_data.numpy(), '\n') 
print(f'Printing out sample labels::\n', sample_labels.numpy(), '\n') 


Printing out sample Data::
 [b"This was a superb episode, one of the best of both seasons. Down right horror for a change, with a story that is way way above the average MOH episodes, if there is such a thing. A man's wife is almost burned to death in a tragic car wreck, in which he was driving. His airbag worked, her's didn't. She is burned beyond recognition (great makeup btw), and not given much of a chance to live without a full skin graft. BUT, even in a coma, she keeps dying but brought back by modern technology, and when she does die for a few minutes, her ghost appears as a very vengeful spirit. Carnage of course ensues, and also some extremely gory killings, and also, some extremely sexy scenes. What more could you ask for, you might ask? Well, not much, because this baby has it all, and a very satirical ending, that should leave a smile on most viewers faces. I just loved Rob Schmidt's (Wrong Turn) direction too, he has a great knack for horror. Excellent episode, this is one

### **Model Architecture!!**
> * **Layer 1: Embeddings Layer `from tf.hub`**
> * **Layer 2: Inner Dense Layer to capture data from pretrained model**
> * **Layer 3: Output layer, channels= 1**

In [5]:
# Download the embeddings Model from Tensorflow-hub
embeddings = hub.load('https://tfhub.dev/google/nnlm-en-dim50/2')

# Wrap the downloaded Model into a KerasLayer 
# Input dimensions are [n, 50], where text_dimensions= 50
embd_lyr = hub.KerasLayer(embeddings, input_shape= [], dtype=tf.string, trainable=True)

# Test the example on sample data
embd_lyr(sample_data[:2])

<tf.Tensor: shape=(2, 50), dtype=float32, numpy=
array([[ 9.98500526e-01,  1.42269671e-01, -1.08490080e-01,
         5.50904214e-01, -1.82107440e-03, -1.70186520e-01,
         4.80401814e-02,  8.29843059e-02, -5.55341125e-01,
         3.47942233e-01, -3.75973806e-03,  1.84928983e-01,
         1.17614634e-01,  4.45207134e-02, -2.78748691e-01,
        -4.36327457e-01, -2.43938137e-02,  3.45040441e-01,
        -3.77704911e-02, -9.74594891e-01,  1.92764744e-01,
        -1.12006068e-01,  4.88444090e-01,  1.42627388e-01,
        -2.61446387e-01,  3.25711310e-01, -8.75484943e-01,
         1.75215572e-01,  4.11457866e-01, -5.22618234e-01,
        -3.05680692e-01,  5.29205263e-01,  2.97499716e-01,
        -2.20408395e-01, -2.85263330e-01,  3.97474617e-01,
         2.70368874e-01,  4.52965163e-02,  2.39334926e-01,
        -3.49456549e-01,  4.99933735e-02, -9.78616029e-02,
        -2.74775624e-01,  2.16394931e-01, -5.04736841e-01,
        -3.61713499e-01, -1.76829726e-01, -1.82192802e-01,
       

### **Creating the Model**

In [7]:
from tensorflow.keras import optimizers
import tensorflow.keras as tfk
import tensorflow.keras.layers as lyrs


model = tf.keras.Sequential()
model.add(embd_lyr)
model.add(lyrs.Dense(24, activation='relu'))
model.add(lyrs.Dropout(0.2))
model.add(lyrs.Dense(1, activation='sigmoid'))

model.summary()

model.compile(
    loss=tfk.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tfk.optimizers.Adam(),
    metrics=tf.metrics.BinaryAccuracy()
)


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 50)                48190600  
                                                                 
 dense_1 (Dense)             (None, 24)                1224      
                                                                 
 dropout (Dropout)           (None, 24)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 25        
                                                                 
Total params: 48,191,849
Trainable params: 48,191,849
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_hsit = model.fit(
    tr_data.shuffle(10000).batch(512),
    epochs=10,
    validation_data= val_data.batch(512),
    verbose=1
)

loss, acc = model.evaluate(
    ts_data.batch(512),
    verbose=2
)

print(f'loss: {loss}')
print(f'Accuracy: {acc}')



> * **This Model overfit inits first training so we are going to use dropout**
> * **In our second model, we are going to use Pretrained Embeddings Model with Normalization to keep check on over-fitting**



### **Retrying with a different pre-trained model**

**`This Last Cell is not working due to some bug, and I can't fix-it`**

In [17]:
from tensorflow.python.keras import activations
# embeddings2 = hub.load('https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2')
# Using a different Model to overcome this error
embeddings2 = hub.load('https://tfhub.dev/google/nnlm-en-dim128/2')
embd_lyr2 = hub.KerasLayer(embeddings, input_shape= (), dtype=tf.string, trainable=True)

# Creating the 2nd Model using Keras.Sequential
model2 = tfk.Sequential([
        embd_lyr2,
        lyrs.Dense(32, activation= 'relu'),
        lyrs.Dense(1, activation='sigmoid')
], name= 'Classification_Model_with_Normalization')

model.summary()

model2.compile(
    loss=tfk.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tfk.optimizers.Adam(),
    metrics=tf.metrics.BinaryAccuracy()
)

mod2_hist = model.fit(
    tr_data.shuffle(10000).batch(512),
    epochs= 15,
    validation_data= val_data,
    verbose= 1
)

# for name, value in zip(model2.metrics_names, results):
#   print(f'{name}, {value}')


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 50)                48190600  
                                                                 
 dense_1 (Dense)             (None, 24)                1224      
                                                                 
 dropout (Dropout)           (None, 24)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 25        
                                                                 
Total params: 48,191,849
Trainable params: 48,191,849
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15




ValueError: ignored