In [94]:
import logging
import argparse 
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger()
%config Completer.use_jedi = False # make autocompletion works in jupyter

args = argparse.Namespace()
args.data_folder = './data-ignored/imdb/'
args.val_fraction = 0.25
args.vocab_size = 2500
args.small_vocab_size = 250
args.epochs = 50
args.batch_size = 32

Path(args.data_folder).mkdir(parents=True, exist_ok=True)

ds, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True, data_dir=args.data_folder)
train_ds_len= tf.data.experimental.cardinality(ds['train']).numpy()
test_ds_len= tf.data.experimental.cardinality(ds['test']).numpy() 
print(train_ds_len)
for d in ds['train'].take(1):
    print(d)
    
# train_dataset = ds['train'].batch(args.batch_size)
train_dataset = ds['train']
val_dataset = ds['test'].take(int(args.val_fraction * (train_ds_len + test_ds_len)))
test_dataset = ds['test'].skip(int(args.val_fraction * (train_ds_len + test_ds_len)))

2022-01-17 15:00:51,717 : INFO : No config specified, defaulting to first: imdb_reviews/plain_text
2022-01-17 15:00:51,739 : INFO : Load dataset info from ./data-ignored/imdb/imdb_reviews/plain_text/1.0.0
2022-01-17 15:00:51,771 : INFO : Reusing dataset imdb_reviews (./data-ignored/imdb/imdb_reviews/plain_text/1.0.0)
2022-01-17 15:00:51,772 : INFO : Constructing tf.data.Dataset imdb_reviews for split None, from ./data-ignored/imdb/imdb_reviews/plain_text/1.0.0


25000
(<tf.Tensor: shape=(), dtype=string, numpy=b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.">, <tf.Tensor: shape=(), dtype=int64, numpy=0>)


2022-01-17 15:00:52.083377: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [95]:
import functools

@functools.lru_cache(maxsize=10)
def get_encoder(vocab_size=args.vocab_size):
    encoder = TextVectorization(max_tokens=vocab_size)
    encoder.adapt(train_dataset.map(lambda text, label: text))
    return encoder

### Baseline. Bag of words

In [3]:
### Baseline. Bag of words. Preprocessing in dataset creation step.

from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

def baseline_bag_of_words():
    encoder = get_encoder()
    
    # declaring outputs as an input cause you have to declare tf.variable outside of tf function 
    def build_bag_of_words(tokens, label, outputs=tf.Variable(tf.zeros(args.small_vocab_size))):
        # without it, tf saves the last state of the tensor
        outputs.assign(tf.zeros_like(outputs))
        for i in range(len(tokens)):
            output_idx = tokens[i]
            if output_idx >= tf.constant(args.small_vocab_size, dtype=tf.int64):
                output_idx = tf.constant(1, dtype=tf.int64)
            outputs[output_idx].assign(outputs[output_idx] + 1)
        return outputs, label

    ds_train = train_dataset.map(lambda sent, l: (get_encoder()(sent), l)).map(build_bag_of_words).cache().shuffle(10000).batch(args.batch_size)
    ds_val = val_dataset.map(lambda sent, l: (get_encoder()(sent), l)).map(build_bag_of_words).cache().shuffle(10000).batch(args.batch_size)
    # for d in ds_train.take(10):
    #     print(d)
        
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(250,)))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=keras.optimizers.Nadam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    
    model.summary()

    
    monitor='val_loss'
    early_stopping = keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
    reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
    
    history = model.fit(ds_train, validation_data=ds_val, epochs=args.epochs, callbacks=[early_stopping, reduce_lr_on_plateau])
    
    print('Val_accuracy:', max(history.history['val_accuracy']))
    print('Val_loss:', min(history.history['val_loss']))
    print('Accuracy:', max(history.history['accuracy']))

if True: 
    baseline_bag_of_words()

# val_accuracy: 0.785; val_loss 0.456; accuracy: 0.803

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                16064     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 16,129
Trainable params: 16,129
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50


2021-12-20 16:15:14.780774: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 1271 of 10000
2021-12-20 16:15:24.780641: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 2539 of 10000
2021-12-20 16:15:34.780499: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 3782 of 10000
2021-12-20 16:15:44.782971: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 5062 of 10000
2021-12-20 16:15:54.783161: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 6395 of 10000
2021-12-20 16:16:04.782261: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 7649 of 10000
2021-12-20 16:16:14.780100: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling 



2021-12-20 16:18:31.684886: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 1342 of 10000
2021-12-20 16:18:41.672330: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 2685 of 10000
2021-12-20 16:18:51.678592: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 3988 of 10000
2021-12-20 16:19:01.669707: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 5270 of 10000
2021-12-20 16:19:11.676965: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 6543 of 10000
2021-12-20 16:19:21.671241: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 7828 of 10000
2021-12-20 16:19:31.676103: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling 



2021-12-20 16:19:57.737089: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 2/50
Epoch 3/50
Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 00011: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 00015: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 16/50
Epoch 00016: early stopping
Val_accuracy: 0.7851200103759766
Val_loss: 0.45919713377952576
Accuracy: 0.8082000017166138


In [5]:
import tensorflow as tf

def stackoverflow_answer():
    class BagOfWords(tf.keras.layers.Layer):
        def __init__(self, vocabulary_size):
            super(BagOfWords, self).__init__()
            self.vocabulary_size = vocabulary_size

        def call(self, inputs):  
            batch_size = tf.shape(inputs)[0]
            outputs = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
            for i in range(batch_size):
                string = inputs[i]
                string_length = tf.shape(tf.where(tf.math.not_equal(string, b'')))[0]
                string = string[:string_length]
                string_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
                for s in string:
                    string_array = string_array.write(string_array.size(), tf.where(tf.equal(s, self.vocabulary_size), 1.0, 0.0))
                outputs = outputs.write(i, tf.cast(tf.reduce_any(tf.cast(string_array.stack(), dtype=tf.bool), axis=0), dtype=tf.float32))
            return outputs.stack()
        
    labels = [[1], [0], [1], [0]]

    texts  = ['All my cats in a row',
              'When my cat sits down, she looks like a Furby toy!',
              'The cat from the outer space',
              'Sunshine loves to sit like this for some reason.']

    DEFAULT_STRIP_REGEX = r'[!"#$%&()\*\+,-\./:;<=>?@\[\\\]^_`{|}~\']'
    tensor_of_strings = tf.constant(texts)
    tensor_of_strings = tf.strings.lower(tensor_of_strings)
    tensor_of_strings = tf.strings.regex_replace(tensor_of_strings, DEFAULT_STRIP_REGEX, "")
    split_strings = tf.strings.split(tensor_of_strings).to_tensor()
    print(split_strings)
    flattened_split_strings = tf.reshape(split_strings, (split_strings.shape[0] * split_strings.shape[1]))
    unique_words, _ = tf.unique(flattened_split_strings)
    unique_words = tf.random.shuffle(unique_words)
    print(unique_words)

    bag_of_words = BagOfWords(vocabulary_size = unique_words)
    train_dataset = tf.data.Dataset.from_tensor_slices((split_strings, labels))
    model = tf.keras.Sequential()
    model.add(bag_of_words)
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss = tf.keras.losses.BinaryCrossentropy())
    model.fit(train_dataset.batch(2), epochs=2)

if False:
    stackoverflow_answer()

tf.Tensor(
[[b'all' b'my' b'cats' b'in' b'a' b'row' b'' b'' b'' b'' b'']
 [b'when' b'my' b'cat' b'sits' b'down' b'she' b'looks' b'like' b'a'
  b'furby' b'toy']
 [b'the' b'cat' b'from' b'the' b'outer' b'space' b'' b'' b'' b'' b'']
 [b'sunshine' b'loves' b'to' b'sit' b'like' b'this' b'for' b'some'
  b'reason' b'' b'']], shape=(4, 11), dtype=string)
tf.Tensor(
[b'sunshine' b'for' b'outer' b'looks' b'down' b'all' b'like' b'this'
 b'she' b'sit' b'toy' b'loves' b'row' b'my' b'the' b'sits' b'some' b'when'
 b'in' b'space' b'furby' b'' b'to' b'cats' b'from' b'reason' b'a' b'cat'], shape=(28,), dtype=string)
Epoch 1/2
Epoch 2/2


In [None]:
### Baseline. Bag of words. Preprocessing in a layer.

from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

def baseline_bag_of_words_layer():
    class BagOfWords(tf.keras.layers.Layer):
        def __init__(self, vocabulary_size):
            super(BagOfWords, self).__init__()
            self.vocabulary_size = vocabulary_size
        def call(self, inputs):  
            batch_size = tf.shape(inputs)[0]
            outputs = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
            for i in range(batch_size):
                int_string = inputs[i]
                array_string = tf.TensorArray(dtype=tf.float32, size=self.vocabulary_size)
                array_string.unstack(tf.zeros(self.vocabulary_size))
                for int_word in int_string:
                    idx = int_word
                    idx = tf.cond(idx >= self.vocabulary_size, lambda: 1, lambda: tf.cast(idx, tf.int32))
                    array_string = array_string.write(idx, array_string.read(idx) + 1.0)
                outputs = outputs.write(i, array_string.stack())
            return outputs.stack()
        
    encoder = get_encoder(args.small_vocab_size)
    bag_of_words = BagOfWords(args.small_vocab_size)

    model = keras.models.Sequential()
    model.add(encoder)
    model.add(bag_of_words)
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    # instead of build() on model
    for d in train_dataset.batch(args.batch_size).take(1):
        model(d[0])
    model.compile(optimizer=keras.optimizers.Nadam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    model.summary()
    
    ds_train = train_dataset.shuffle(200000).batch(args.batch_size).prefetch(1)
    ds_val = val_dataset.batch(args.batch_size).prefetch(1)
    monitor='val_loss'
    early_stopping = keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
    reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
    
    history = model.fit(ds_train, validation_data=ds_val, epochs=args.epochs, callbacks=[early_stopping, reduce_lr_on_plateau])
    
    print('Val_accuracy:', max(history.history['val_accuracy']))
    print('Val_loss:', min(history.history['val_loss']))
    print('Accuracy:', max(history.history['accuracy']))

if True: 
    baseline_bag_of_words_layer()
    
# Val_accuracy: 0.7857599854469299
# Val_loss: 0.45941051840782166
# Accuracy: 0.7972400188446045

### Rnn with embedding

In [None]:
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

def rnn_with_embedding():
    encoder = get_encoder()
    
    model = keras.models.Sequential()
    model.add(encoder)
    model.add(keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True))
    model.add(keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=keras.optimizers.Nadam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    model.summary()
    
    monitor='val_loss'
    early_stopping = keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
    reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
    
    model.fit(train_dataset, epochs=args.epochs, validation_data=val_dataset, callbacks=[early_stopping, reduce_lr_on_plateau])

if False:
    rnn_with_embedding()

# Epoch 3/50
# 782/782 [======] - 314s 401ms/step - loss: 0.2752 - accuracy: 0.8867 - val_loss: 0.3107 - val_accuracy: 0.8667 - lr: 0.0010

### TFIDF

In [93]:
def tfidf():
    
    def get_tfidf_encoder():
        encoder = TextVectorization(max_tokens=args.vocab_size, output_mode="tf_idf")
        encoder.adapt(train_dataset.map(lambda text, label: text))
        return encoder
        
    encoder = get_tfidf_encoder()
    
    model = keras.models.Sequential()
    model.add(encoder)
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    # instead of build() on model
    for d in train_dataset.batch(args.batch_size).take(1):
        model(d[0])
    model.compile(optimizer=keras.optimizers.Nadam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    model.summary()
    
    ds_train = train_dataset.shuffle(200000).batch(args.batch_size).prefetch(1)
    ds_val = val_dataset.batch(args.batch_size).prefetch(1)
    monitor='val_loss'
    early_stopping = keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
    reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
    
    history = model.fit(ds_train, validation_data=ds_val, epochs=args.epochs, callbacks=[early_stopping, reduce_lr_on_plateau])
    
    print('Val_accuracy:', max(history.history['val_accuracy']))
    print('Val_loss:', min(history.history['val_loss']))
    print('Accuracy:', max(history.history['accuracy']))

if True: 
    tfidf()
    
# Val_accuracy: 0.870959997177124
# Val_loss: 0.34430137276649475
# Accuracy: 0.9621599912643433

Model: "sequential_45"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization_6 (TextV  (None, 2500)             1         
 ectorization)                                                   
                                                                 
 dense_79 (Dense)            (None, 64)                160064    
                                                                 
 dense_80 (Dense)            (None, 1)                 65        
                                                                 
Total params: 160,130
Trainable params: 160,129
Non-trainable params: 1
_________________________________________________________________
Epoch 1/50
Epoch 2/50
  1/782 [..............................] - ETA: 1:24 - loss: 0.5382 - accuracy: 0.7812

2022-01-12 18:19:24.176406: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 3/50
 20/782 [..............................] - ETA: 2s - loss: 0.1955 - accuracy: 0.9172  

2022-01-12 18:19:27.218414: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 4/50
 20/782 [..............................] - ETA: 2s - loss: 0.1026 - accuracy: 0.9719  

2022-01-12 18:19:30.250760: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 00004: early stopping
Val_accuracy: 0.870959997177124
Val_loss: 0.34430137276649475
Accuracy: 0.9621599912643433


2022-01-12 18:19:33.197737: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


### Word2Vec

In [101]:
import tensorflow_hub as hub
import os

def word2vec():
    
    # read the models remotely from google without downloading them
    os.environ["TFHUB_MODEL_LOAD_FORMAT"] = "UNCOMPRESSED"
    
    hub_layer = hub.KerasLayer("https://tfhub.dev/google/Wiki-words-250/2",
                           input_shape=[], dtype=tf.string)

    model = keras.Sequential()
    model.add(hub_layer)
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=keras.optimizers.Nadam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    model.summary()
    
    ds_train = train_dataset.shuffle(200000).batch(args.batch_size).prefetch(tf.data.AUTOTUNE)
    ds_val = val_dataset.batch(args.batch_size).prefetch(tf.data.AUTOTUNE)
    monitor='val_loss'
    early_stopping = keras.callbacks.EarlyStopping(monitor=monitor, patience=3, mode='auto', restore_best_weights=True, verbose=1)
    reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=2, min_delta=1e-4, mode='auto', verbose=1)
    
    history = model.fit(ds_train, validation_data=ds_val, epochs=args.epochs, callbacks=[early_stopping, reduce_lr_on_plateau])
    
    print('Val_accuracy:', max(history.history['val_accuracy']))
    print('Val_loss:', min(history.history['val_loss']))
    print('Accuracy:', max(history.history['accuracy']))

if False: 
    word2vec()
    
# loss: 0.4260 - accuracy: 0.8065 - val_loss: 0.4413 - val_accuracy: 0.7932

Model: "sequential_47"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_1 (KerasLayer)  (None, 250)               252343750 
                                                                 
 dense_83 (Dense)            (None, 64)                16064     
                                                                 
 dense_84 (Dense)            (None, 1)                 65        
                                                                 
Total params: 252,359,879
Trainable params: 16,129
Non-trainable params: 252,343,750
_________________________________________________________________
Epoch 1/50

2022-01-17 15:46:02.430234: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 2/50

2022-01-17 15:46:07.186301: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 3/50
Epoch 4/50
 22/782 [..............................] - ETA: 1s - loss: 0.4777 - accuracy: 0.7699  

2022-01-17 15:46:10.746094: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:46:13.682784: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 5/50
Epoch 6/50
 24/782 [..............................] - ETA: 1s - loss: 0.4535 - accuracy: 0.7812  

2022-01-17 15:46:17.070486: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:46:19.693249: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 7/50

2022-01-17 15:46:22.738540: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 8/50
Epoch 9/50
 24/782 [..............................] - ETA: 1s - loss: 0.4748 - accuracy: 0.7826  

2022-01-17 15:46:25.815949: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:46:28.396220: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 10/50
Epoch 11/50
 24/782 [..............................] - ETA: 1s - loss: 0.4098 - accuracy: 0.8138  

2022-01-17 15:46:31.593680: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 12/50
 25/782 [..............................] - ETA: 1s - loss: 0.4417 - accuracy: 0.7962  

2022-01-17 15:46:34.176108: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:46:36.777039: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 13/50

2022-01-17 15:46:39.841457: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 14/50

2022-01-17 15:46:42.925019: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 15/50

2022-01-17 15:46:46.012356: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 16/50

2022-01-17 15:46:49.075938: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 17/50
Epoch 18/50
 24/782 [..............................] - ETA: 1s - loss: 0.4139 - accuracy: 0.8138  

2022-01-17 15:46:52.252189: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:46:54.875597: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 19/50
Epoch 20/50
 25/782 [..............................] - ETA: 1s - loss: 0.4368 - accuracy: 0.7975  

2022-01-17 15:46:57.901524: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:47:00.485249: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.



Epoch 00020: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 21/50
Epoch 22/50
 25/782 [..............................] - ETA: 1s - loss: 0.4525 - accuracy: 0.8037  

2022-01-17 15:47:03.547961: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:47:06.091249: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 23/50

2022-01-17 15:47:09.148011: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 24/50
Epoch 00024: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 25/50
 24/782 [..............................] - ETA: 1s - loss: 0.4225 - accuracy: 0.8151  

2022-01-17 15:47:12.201004: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:47:14.753705: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 26/50

2022-01-17 15:47:17.917005: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.



Epoch 00026: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 27/50

2022-01-17 15:47:21.097696: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 28/50

2022-01-17 15:47:24.175000: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.



Epoch 00028: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 29/50
Epoch 30/50
 24/782 [..............................] - ETA: 1s - loss: 0.4150 - accuracy: 0.8086  

2022-01-17 15:47:27.299396: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.




2022-01-17 15:47:29.837255: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.



Epoch 00030: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-09.
Epoch 31/50
Epoch 32/50
 26/782 [..............................] - ETA: 1s - loss: 0.4648 - accuracy: 0.7849  

2022-01-17 15:47:32.945009: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 00032: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-10.
Epoch 33/50
 25/782 [..............................] - ETA: 1s - loss: 0.4150 - accuracy: 0.8263  

2022-01-17 15:47:35.548774: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 00033: early stopping
Val_accuracy: 0.7936000227928162
Val_loss: 0.44133269786834717
Accuracy: 0.8065599799156189


2022-01-17 15:47:38.099661: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


### Different embeddings, glove, bert, transformer. 