In [3]:
from datetime import datetime
import pandas as pd
import numpy as np
import os,sys
import json
from math import ceil
import warnings
warnings.filterwarnings("ignore")

In [4]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import (
    Dense,
    SimpleRNN,
    Embedding,
    LeakyReLU,
    ReLU
)
from tensorflow.keras.activations import (
    sigmoid,
    tanh
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import (
    EarlyStopping,
    TensorBoard
)
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy 

In [5]:
from tensorflow.keras.models import (
    save_model,
    load_model
)

In [6]:
def get_transformed_data():

    (x_train,y_train),(x_test,y_test)=imdb.load_data()

    mx=0
    for sent in x_train:
        mx=max(mx,len(sent))

    sent_max_len=ceil(mx/100)*100

    print(f"sentence maximum length : {sent_max_len}")

    mx=0
    for sent1,sent2 in zip(x_train,x_test):
        if sent1:
            mx=max(mx,max(sent1))
        if sent2:
            mx=max(mx,max(sent2))

    max_features=ceil(mx/100)*100

    print(f"vocabulary size : {max_features}")

    x_train_padded=pad_sequences(
        x_train,
        padding="post",
        maxlen=sent_max_len
    )
    x_test_padded=pad_sequences(
        x_test,
        padding="post",
        maxlen=sent_max_len
    )

    return x_train_padded,y_train,x_test_padded,y_test,max_features,sent_max_len

In [7]:
def construct_model(
        max_features,
        sent_max_len
):
    n=int(ceil(np.sqrt(max_features)))

    model=Sequential()
    model.add(
        Embedding(
            input_dim=max_features,
            output_dim=n,
            input_length=sent_max_len
        )
    )
    model.add(
        SimpleRNN(
            units=int(n/2),
            activation="relu"
        )
    )
    model.add(
        Dense(
            units=int(n/4),
            activation="relu"
        )
    )
    model.add(
        Dense(
            units=1,
            activation="tanh"
        )
    )

    return model


In [8]:
def compile_model(
        model,
        loss,
        optimizer,
        metrics
):
    model.compile(
        loss=loss,
        optimizer=optimizer,
        metrics=metrics
    )

    return model

In [9]:
def get_callbacks():
    file_name=datetime.now().strftime("%d_%m_%y__%Hh_%Mm_%Ss")
    log_path=os.path.join(os.getcwd(),"tensorboard_files")
    os.makedirs(log_path,exist_ok=True)
    log_dir=os.path.join(log_path,file_name)

    tensorboard=TensorBoard(
        log_dir=log_dir,
        histogram_freq=1
    )

    '''
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=0,
        verbose=0,
        mode='auto',
        baseline=None,
        restore_best_weights=False,
        start_from_epoch=0
    )
    '''

    early_stopping=EarlyStopping(
        monitor="val_loss",
        patience=3,
        mode="auto",
        restore_best_weights=True   
    )

    return [tensorboard,early_stopping]



In [10]:
def train_model(
        model,
        x_train,
        y_train,
        epochs,
        batch_size,
        validation_split
):
    history=model.fit(
        x_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        callbacks=get_callbacks()   
    )

    return model,history



In [11]:

def evaluate_model(history):
    print(history.history["accuracy"])
    print(history.history["loss"])
    print(history.history["val_accuracy"])
    print(history.history["val_loss"])



In [12]:
def save_model_file(
        model_file_path,
        model
):
    try:
        file_dir=os.path.dirname(model_file_path)
        os.makedirs(file_dir,exist_ok=True)
        
        save_model(
            model=model,
            filepath=model_file_path
        )

    except Exception as e:
        print(f"exception occured : {e}")
        

In [13]:
def run_model():
    x_train,y_train,x_test,y_test,max_features,sent_max_len=get_transformed_data()

    model=construct_model(
        max_features=max_features,
        sent_max_len=sent_max_len
    )

    loss=BinaryCrossentropy()
    optimizer=Adam(learning_rate=0.005)
    metrics=["accuracy"]

    model=compile_model(
        model=model,
        loss=loss,
        optimizer=optimizer,
        metrics=metrics
    )

    model,history=train_model(
        model=model,
        x_train=x_train,
        y_train=y_train,
        epochs=5,
        validation_split=0.2
    )

    evaluate_model(history=history)

    model_file_path="/data/model/model.h5"

    save_model_file(
        model=model,
        model_file_path=model_file_path
    )

In [14]:

x_train,y_train,x_test,y_test,max_features,sent_max_len=get_transformed_data()

model=construct_model(
    max_features=max_features,
    sent_max_len=sent_max_len
)

loss=BinaryCrossentropy()
optimizer=Adam(learning_rate=0.005)
metrics=["accuracy"]

model=compile_model(
    model=model,
    loss=loss,
    optimizer=optimizer,
    metrics=metrics
)


sentence maximum length : 2500
vocabulary size : 88600


In [16]:
model,history=train_model(
    model=model,
    x_train=x_train,
    y_train=y_train,
    epochs=5,
    batch_size=32,
    validation_split=0.2
)

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1299s[0m 2s/step - accuracy: 0.4985 - loss: 8.0840 - val_accuracy: 0.5062 - val_loss: 7.9591
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m775s[0m 1s/step - accuracy: 0.4985 - loss: 8.0840 - val_accuracy: 0.5062 - val_loss: 7.9591
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1773s[0m 3s/step - accuracy: 0.4985 - loss: 8.0840 - val_accuracy: 0.5062 - val_loss: 7.9591
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1655s[0m 3s/step - accuracy: 0.4985 - loss: 8.0840 - val_accuracy: 0.5062 - val_loss: 7.9591


In [17]:
model_file_path=os.path.join(os.getcwd(),"data","model","model.h5")

save_model_file(
    model=model,
    model_file_path=model_file_path
)



In [21]:
%load_ext tensorboard

tensorboard_file_path="tensorboard_files/09_08_25__10h_53m_16s"

%tensorboard --logdir {tensorboard_file_path}

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 2476), started 0:00:02 ago. (Use '!kill 2476' to kill it.)

In [24]:
y_pred=model.predict(x_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 473ms/step


In [22]:
loss,accuracy=model.evaluate(x_test,y_test)

print(f"loss : {loss}")
print(f"accuracy : {accuracy}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m522s[0m 668ms/step - accuracy: 0.5000 - loss: 8.0590
loss : 8.059041023254395
accuracy : 0.5


In [25]:
from sklearn.metrics import accuracy_score

# Convert probabilities to 0 or 1 using a threshold of 0.5
y_pred_classes = (y_pred > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred_classes)
print(f"Accuracy: {accuracy}")



Accuracy: 0.5
