In [2]:
from datetime import datetime
import pandas as pd
import numpy as np
import os,sys
import json
from math import ceil
import warnings
warnings.filterwarnings("ignore")

In [3]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import (
    Dense,
    SimpleRNN,
    Embedding,
    LeakyReLU,
    ReLU
)
from tensorflow.keras.activations import (
    sigmoid,
    tanh
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import (
    EarlyStopping,
    TensorBoard
)
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy 

In [4]:
from tensorflow.keras.models import (
    save_model,
    load_model
)

In [17]:
def get_transformed_data():

    (x_train,y_train),(x_test,y_test)=imdb.load_data()

    mx=0
    for sent in x_train:
        mx=max(mx,len(sent))

    sent_max_len=ceil(mx/100)*100

    print(f"sentence maximum length : {sent_max_len}")

    mx=0
    for sent1,sent2 in zip(x_train,x_test):
        if sent1:
            mx=max(mx,max(sent1))
        if sent2:
            mx=max(mx,max(sent2))

    max_features=ceil(mx/100)*100

    print(f"vocabulary size : {max_features}")

    x_train_padded=pad_sequences(
        x_train,
        padding="post",
        maxlen=sent_max_len
    )
    x_test_padded=pad_sequences(
        x_test,
        padding="post",
        maxlen=sent_max_len
    )

    return x_train_padded,y_train,x_test_padded,y_test,max_features,sent_max_len

In [6]:
def construct_model(
        max_features,
        sent_max_len
):
    n=int(ceil(np.sqrt(max_features)))

    model=Sequential()
    model.add(
        Embedding(
            input_dim=max_features,
            output_dim=n,
            input_length=sent_max_len
        )
    )
    model.add(
        SimpleRNN(
            units=int(n/2),
            activation="relu"
        )
    )
    model.add(
        Dense(
            units=int(n/4),
            activation="relu"
        )
    )
    model.add(
        Dense(
            units=1,
            activation="tanh"
        )
    )

    return model


In [7]:
def compile_model(
        model,
        loss,
        optimizer,
        metrics
):
    model.compile(
        loss=loss,
        optimizer=optimizer,
        metrics=metrics
    )

    return model

In [8]:
def get_callbacks():
    file_name=datetime.now().strftime("%d_%m_%y__%Hh_%Mm_%Ss")
    log_path=os.path.join(os.getcwd(),"tensorboard_files")
    os.makedirs(log_path,exist_ok=True)
    log_dir=os.path.join(log_path,file_name)

    tensorboard=TensorBoard(
        log_dir=log_dir,
        histogram_freq=1
    )

    '''
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=0,
        verbose=0,
        mode='auto',
        baseline=None,
        restore_best_weights=False,
        start_from_epoch=0
    )
    '''

    early_stopping=EarlyStopping(
        monitor="val_loss",
        patience=3,
        mode="auto",
        restore_best_weights=True   
    )

    return [tensorboard,early_stopping]



In [29]:
def train_model(
        model,
        x_train,
        y_train,
        epochs,
        batch_size,
        validation_split
):
    history=model.fit(
        x_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        callbacks=get_callbacks()   
    )

    return model,history



In [22]:

def evaluate_model(history):
    print(history.history["accuracy"])
    print(history.history["loss"])
    print(history.history["val_accuracy"])
    print(history.history["val_loss"])



In [23]:
def save_model_file(
        model_file_path,
        model
):
    try:
        file_dir=os.path.dirname(model_file_path)
        os.makedirs(file_dir,exist_ok=True)
        
        save_model(
            model=model,
            filepath=model_file_path
        )

    except Exception as e:
        print(f"exception occured : {e}")
        

In [None]:
def run_model():
    x_train,y_train,x_test,y_test,max_features,sent_max_len=get_transformed_data()

    model=construct_model(
        max_features=max_features,
        sent_max_len=sent_max_len
    )

    loss=BinaryCrossentropy()
    optimizer=Adam(learning_rate=0.005)
    metrics=["accuracy"]

    model=compile_model(
        model=model,
        loss=loss,
        optimizer=optimizer,
        metrics=metrics
    )

    model,history=train_model(
        model=model,
        x_train=x_train,
        y_train=y_train,
        epochs=5,
        validation_split=0.2
    )

    evaluate_model(history=history)

    model_file_path="/data/model/model.h5"

    save_model_file(
        model=model,
        model_file_path=model_file_path
    )

In [27]:

x_train,y_train,x_test,y_test,max_features,sent_max_len=get_transformed_data()

model=construct_model(
    max_features=max_features,
    sent_max_len=sent_max_len
)

loss=BinaryCrossentropy()
optimizer=Adam(learning_rate=0.005)
metrics=["accuracy"]

model=compile_model(
    model=model,
    loss=loss,
    optimizer=optimizer,
    metrics=metrics
)


sentence maximum length : 2500
vocabulary size : 88600


In [None]:
model,history=train_model(
    model=model,
    x_train=x_train,
    y_train=y_train,
    epochs=1,
    batch_size=32,
    validation_split=0.2
)


Epoch 1/5
[1m 93/625[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m19:17[0m 2s/step - accuracy: 0.4742 - loss: 8.4743

KeyboardInterrupt: 

In [None]:

evaluate_model(history=history)

model_file_path="/data/model/model.h5"

save_model_file(
    model=model,
    model_file_path=model_file_path
)