In [1]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
from tensorflow.keras import callbacks
from tensorflow.keras.layers import TextVectorization
import matplotlib.pyplot as plt

In [2]:
from preprocessing import load_tfrecord, list_files_in_path

In [3]:
train_set = load_tfrecord(list_files_in_path("datasets\\tfrecords\\train"))
valid_set = load_tfrecord(list_files_in_path("datasets\\tfrecords\\validation"))

In [4]:
"""taking the parameters from the first model see model_tuning.ipynb """
num_of_words = 10000
max_sentence_len = 200
embedding_dim = 16
lstm_1_dim = 32
lstm_2_dim = 32
learning_rate = 0.01

In [5]:
text_vectorize_dict = {
    "max_tokens": num_of_words,
    "output_mode": "int",
    "output_sequence_length":max_sentence_len
}

In [6]:
vectorize_layer = TextVectorization(**text_vectorize_dict)
vectorize_layer._name="Text_Vectorization_Layer"

In [7]:
vectorize_layer.adapt(train_set.map(lambda x,y: x))

In [8]:
class PositionalEncoding(layers.Layer):
    """this class is also used in transformer
        so for each position in sentence this class add a well defined position vector
    """
    def __init__(self, max_steps, max_dims, dtype=tf.float32, **kwargs):
        super().__init__(dtype=dtype, **kwargs)
        if max_dims % 2 == 1: max_dims += 1 # max_dims must be even
        p, i = np.meshgrid(np.arange(max_steps), np.arange(max_dims // 2))
        pos_emb = np.empty((1, max_steps, max_dims))
        pos_emb[0, :, ::2] = np.sin(p / 10000**(2 * i / max_dims)).T
        pos_emb[0, :, 1::2] = np.cos(p / 10000**(2 * i / max_dims)).T
        self.positional_embedding = tf.constant(pos_emb.astype(self.dtype))
    def call(self, inputs):
        shape = tf.shape(inputs)
        return inputs + self.positional_embedding[:, :shape[-2], :shape[-1]]

In [9]:
postional_encoding = PositionalEncoding(max_sentence_len,embedding_dim)
embedding_layer = layers.Embedding(input_dim=num_of_words,output_dim=embedding_dim,input_length=max_sentence_len)
lstm_layer_1 = layers.LSTM(lstm_1_dim,return_sequences=True)
lstm_layer_2 = layers.LSTM(lstm_2_dim)
output_layer = layers.Dense(1,activation="sigmoid")


In [10]:
"""in short our model is the 'encoder' in the transformer model """

model = Sequential([
    vectorize_layer,
    embedding_layer,
    postional_encoding,
    lstm_layer_1,
    lstm_layer_2,
    output_layer
])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Text_Vectorization_Layer (T  (None, 200)              0         
 extVectorization)                                               
                                                                 
 embedding (Embedding)       (None, 200, 16)           160000    
                                                                 
 positional_encoding (Positi  (None, 200, 16)          0         
 onalEncoding)                                                   
                                                                 
 lstm (LSTM)                 (None, 200, 32)           6272      
                                                                 
 lstm_1 (LSTM)               (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 1)                 3

In [12]:
model.compile(loss="BinaryCrossentropy",metrics=['accuracy'],optimizer=Adam(learning_rate))

In [13]:
"""le5t first test on 10 epochs"""
model.fit(train_set,
            epochs=10,
            batch_size=32,
            validation_data=valid_set)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1aae6c6d880>

In [15]:
"""for comparison we will try on a model without positional_encoding"""

tf.keras.backend.clear_session()
model = Sequential([
    vectorize_layer,
    embedding_layer,
    #postional_encoding, taking out this layer
    lstm_layer_1,
    lstm_layer_2,
    output_layer
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Text_Vectorization_Layer (T  (None, 200)              0         
 extVectorization)                                               
                                                                 
 embedding (Embedding)       (None, 200, 16)           160000    
                                                                 
 lstm (LSTM)                 (None, 200, 32)           6272      
                                                                 
 lstm_1 (LSTM)               (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 174,625
Trainable params: 174,625
Non-trainable params: 0
__________________________________________________

In [16]:
model.compile(loss="BinaryCrossentropy",metrics=['accuracy'],optimizer=Adam(learning_rate))

In [17]:
model.fit(train_set,
            epochs=10,
            batch_size=32,
            validation_data=valid_set)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1ab43ea6490>

In [None]:
"""it does not seem to be better!"""