In [None]:
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.preprocessing import text_dataset_from_directory
from tensorflow.keras.layers import SimpleRNN,LSTM
import numpy as np
import re
import string
import nltk
import numpy as np
from matplotlib import pyplot as plt
import pandas

In [None]:
train_directory = '...'
val_directory = '...'

In [None]:
train_dataset = text_dataset_from_directory(
        train_directory,
)

In [None]:
val_dataset = text_dataset_from_directory(
        val_directory,
        shuffle,
)

In [None]:
for review,label in train_dataset.take(1):
    print('Review',review)
    print('Label',label)

In [None]:
def preprocess_sentences(input_data):
    '''
    Input: raw reviews
    output: standardized reviews
    '''
    output=tf.strings.lower(input_data)
    outputs=tf.strings.regex_replace(output,"<[^>]+>","")
    outputs=tf.strings.regex_replace(output,"<[%s]"%re.esceape(string.punctuation)," ")
    outputs=tf.strings.regex_replace(output,"  "," ")
    
    return output

In [None]:
VOCAB_SIZE = 2000
SEQUENCE_LENGTH = 100

vectorize_layer=TextVectorization(
    standardize = preprocess_sentences,
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    output_sequence_length=SEQUENCE_LENGTH
)

In [None]:
training_data= train_dataset.map(lambda x,y:x)### input x and y and outputx
vectorize_layer.adapt(training_data)#### adapt the vectorize_layer to the training data

In [None]:
def vectorizer(review,label):
    return tf.one_hot(vectorize_layer(review),depth=VOCAB_SIZE),label

In [None]:
train_datastee=train_dataset.map(vectorizer)
val_datastee=val_dataset.map(vectorizer)

In [None]:
for review,label in train_dataset.take(1):
    print('Review',review)
    print('Label',label)

In [None]:
vectorize_layer.get_vocabulary()

In [None]:
train_dataset

In [None]:
train_dataset=train_dataset.cache().prefetch(buffer_size=AUTOTUNE)
val_dataset=val_dataset.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
class RNNCell(tf.keras.layer.Layer):
    def __init__(self,units=4,hidden_size=7):
        super(RNNCell,self).__init__()
        
        self.units=units
        self.hidden_size=hidden_size
    def build(self,input_shape):
        
        self.w_hh=self.add_weight(
            shape=(self.hidden_size,self.hidden_size),
            initializer='random_normal',
            trainable=True)
        
        self.w_hx=self.add_weight(
            shape=(input_shape[-1],self.hidden_size),
            initializer='random_normal',
            trainable=True)
        
        self.w_yh=self.add_weight(
            shape=(self.hidden_size,self.units),
            initializer='random_normal',
            trainable=True)
        
        self.b_h=self.add_weight(
            shape=(self.hidden_size,),
            initializer='random_normal',
            trainable=True)
        
        self.b_y=self.add_weight(
            shape=(self.units,),
            initializer='random_normal',
            trainable=True)
    def call(self,inputs,h_prev=None):
        if h_prev ==None:
            h_prev = tf.zeros([inputs.shape[0],self.hidden_size])
        h=tf.nn.tanh(tf.matmul(h_prev,self.w_hh)+tf.matmul(inputs,self.w_hx)+self.b_h)
        h=tf.nn.tanh(tf.matmul(h,self.w_yh)+self.b_y)
        return tf.constant(h),tf.constant(y)

In [None]:
class RNN(tf.keras.layers.Layer):
    def __init__(self,units):
        super(RNN,self).__init__():
        self.rnn=RNNCell(units)
        
    def call(self,inputs):
        outputs = []
        h,y=self.rnn(inputs[:,0,:])
        outputs.append(y)
        
        for i in range(i,inputs.shape[-2]):
            h,y=self.rnn(inputs[:,i,:],h)
            outputs.append(y)
        shape=np.array(ouputs).shape
        return tf.reshape(outputs,[shape[1],shape[0],shape[2]])

In [None]:
inputs=tf.zeros([8,100,2000])
layer=SimpleRNN(4,activation='tanh',use_bias=True,return_sequences=False,name='layer1')
outputs=layer(inputs)

In [None]:
inputs=tf.keras.layers.Input(shape=(SEQUENCE_LENGTH,VOCAB_SIZE))

EMBEDDING_DIM=100
embedding=tf.keras.layers.Embedding(VOCAB_SIZE,EMBEDDING_DIM)
model=tf.keras.models.Sequential([
    inputs,
    tf.keras.layers.LSTM(20,activation='tanh',use_bias=True,return_sequences=True,name='layer1'),
    tf.keras.layers.Dense(1,activation='relu',name='layer2'),
    tf.keras.layers.Reshape((SEQUENCE_LENGTH,)name='layer3'),
    tf.keras.layers.Dense(1,activation='sigmoid',name='layer4'),
])
model.summary()

In [None]:
LR = 1e-4
EPOCH = 50
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(lr=LR),
    metrics=['accuracy'])

In [None]:
checkpoint_filepath='...'
callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='loss',
    mode='min',
    save_best_only=True
)

In [None]:
history=model.fit(train_dataset,validation_data=val_dataset,verbose=1,epochs=EPOCH,callbcaks=[callback])

In [None]:

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model_loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
test_data=tf.data.Dataset.from_tensor_slices([["This is a bad movie, i really didn't love it"],
                                              ["I really loved the movie, it reminds me of my past"],])

In [None]:
def vectorizer_test(review):
    return tf.one_hot(vectorize_layer(review),depth=VOCAB_SIZE)

In [None]:
test_dataset=test_data.map(vectorizer_test)

In [None]:
model.predict(test_dataset)