# Imports

In [None]:
import tensorflow as tf
import os
import json

In [None]:
tf.__version__

# 1. Constants

## 1.1. Paths

In [None]:
DATASET_PATH = "/run/media/ishrak/Ishrak/IUT/Thesis/dataset/tfrecords/"
TRAIN_DATASET_PATH = os.path.join(DATASET_PATH, "train")
TEST_DATASET_PATH = os.path.join(DATASET_PATH, "test")
MODEL_DIR = "/run/media/ishrak/Ishrak/IUT/Thesis/model_dir"
CKPT_PATH = os.path.join(MODEL_DIR, "ckpt")
LOG_PATH = os.path.join(MODEL_DIR, "logs")
LOG_TEXT_PATH = os.path.join(LOG_PATH, "log.txt")
TENSORBOARD_PATH = os.path.join(LOG_PATH, "tensorboard_logs")

## 1.2. Constant values

In [None]:
with open("config.json") as fp:
    consts = json.load(fp)
    BUFFER_SIZE = consts["BUFFER_SIZE"]
    BATCH_SIZE = consts["BATCH_SIZE"]
    EPOCHS = consts["EPOCHS"]
    NUM_LAYERS = consts["NUM_LAYERS"]
    D_MODEL = consts["D_MODEL"]
    DFF = consts["DFF"]
    NUM_HEADS = consts["NUM_HEADS"]
    SUMMARY_LENGTH = consts["SUMMARY_LENGTH"]
    TEXT_LENGTH = consts["TEXT_LENGTH"]
    START_TOKEN = consts["START_TOKEN"]
    END_TOKEN = consts["END_TOKEN"]
    VOCAB_SIZE = consts["VOCAB_SIZE"]
    ENCODER_VOCAB_SIZE = consts["ENCODER_VOCAB_SIZE"]
    DECODER_VOCAB_SIZE = consts["DECODER_VOCAB_SIZE"]
    VOCAB_DIM = consts["VOCAB_DIM"]
    CKPT_TO_KEEP = consts["CKPT_TO_KEEP"]
    del consts

# 2. Loading Dataset
The dataset consists of articles scraped from Prothom Alo news site. The dataset contains titles, contents and tags of many article.


In [None]:
train_tfrecord_files = [
    os.path.join(TRAIN_DATASET_PATH, file_name)
    for file_name in os.listdir(TRAIN_DATASET_PATH)
]
test_tfrecord_files = [
    os.path.join(TEST_DATASET_PATH, file_name)
    for file_name in os.listdir(TEST_DATASET_PATH)
]

In [None]:
from data_manipulation.create_tfrecord_dataset import create_tfrecord_dataset

In [None]:
train_dataset = create_tfrecord_dataset(
    tfrecord_files=train_tfrecord_files,
    batch_size=BATCH_SIZE,
    cache_buffer_size=BUFFER_SIZE,
    prefetch_buffer_size=tf.data.experimental.AUTOTUNE,
    input_feature_length=TEXT_LENGTH,
    output_feature_length=SUMMARY_LENGTH
)

In [None]:
test_dataset = create_tfrecord_dataset(
    tfrecord_files=test_tfrecord_files,
    batch_size=BATCH_SIZE,
    cache_buffer_size=BUFFER_SIZE,
    prefetch_buffer_size=tf.data.experimental.AUTOTUNE,
    input_feature_length=TEXT_LENGTH,
    output_feature_length=SUMMARY_LENGTH
)

# 2. Model

## 2.1. Model Architecture

In [None]:
from model.transformer import Transformer
transformer = Transformer(
    NUM_LAYERS, 
    D_MODEL, 
    NUM_HEADS, 
    DFF,
    ENCODER_VOCAB_SIZE, 
    DECODER_VOCAB_SIZE, 
    pe_input=ENCODER_VOCAB_SIZE, 
    pe_target=DECODER_VOCAB_SIZE,
)

## 2.2. Adam optimizer
Used adam optimizer with custom learning rate scheduling.

In [None]:
from training.custom_scheduler import CustomSchedule
learning_rate = CustomSchedule(D_MODEL)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

## 2.3. Checkpoints

In [None]:
ckpt = tf.train.Checkpoint(transformer=transformer, optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, CKPT_PATH, max_to_keep=CKPT_TO_KEEP)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print ('Latest checkpoint restored!!')

## 2.4. Compiling model

In [None]:
transformer.compile(optimizer=optimizer)

## 2.5. Custom Callback

In [None]:
from helpers.logger_callback import LoggerCallback
logger_callback = LoggerCallback(
    log_dir = TENSORBOARD_PATH,
    ckpt_manager = ckpt_manager
)

## 2.5. Training Model

In [None]:
transformer.fit(
    x = train_dataset,
    epochs = EPOCHS,
    validation_data = test_dataset,
    callbacks = [logger_callback]
)