## Arsitektur Model

                                +----------------+
                                |   Input Text   |
                                +--------+-------+
                                         |
                                         v
                                +--------+--------+
                                |  SentencePiece  |
                                |   Tokenization  |
                                +--------+--------+
                                         |
                                         v
                                +--------+--------+
                                |  Embedding      |
                                |  Layer          |
                                +--------+--------+
                                         |
                                         v
                                +--------+--------+
                                |  Positional     |
                                |  Encoding       |
                                +--------+--------+
                                         |
                                         v
                       +-----------------+-----------------+
                       |                                     |
                       v                                     v
              +--------+--------+                  +--------+--------+
              | Transformer     |                  | Transformer     |
              | Encoder Block   |                  | Decoder Block   |
              +--------+--------+                  +--------+--------+
                       |                                     |
                       v                                     v
              +--------+--------+                  +--------+--------+
              | Transformer     |                  | Transformer     |
              | Encoder Block   |                  | Decoder Block   |
              +--------+--------+                  +--------+--------+
                       |                                     |
                       v                                     v
              +--------+--------+                  +--------+--------+
              | Transformer     |                  | Transformer     |
              | Encoder Block   |                  | Decoder Block   |
              +--------+--------+                  +--------+--------+
                       |                                     |
                       v                                     v
              +--------+--------+                  +--------+--------+
              | Flatten         |                  | Final Dense    |
              | Layer           |                  | Layer          |
              +--------+--------+                  +--------+--------+
                       |                                     |
                       v                                     v
               +-------+--------+                   +--------+--------+
               |      Output     |                   |    Output       |
               +-----------------+                   +-----------------+


### Import Libraries and Define Helper Functions

In [43]:
import os
import tensorflow as tf
import numpy as np
import sentencepiece as spm
import matplotlib.pyplot as plt

# Function to read and clean text files
def read_text_files(folder_path):
    texts = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                content = file.read().strip()
                if content:  # Check if the file is not empty
                    cleaned_content = clean_text(content)
                    texts.append(cleaned_content)
    return texts

def clean_text(text):
    # Remove unwanted characters
    unwanted_chars = ['*', '#', '_', ')', '(', '!', '?', '.', ',', '-']
    for char in unwanted_chars:
        text = text.replace(char, '')
    return text


### Read Dataset and Train SentencePiece Tokenizer

In [44]:
# Read and clean dataset
folder_path = './Dataset/nlp_dataset'
texts = read_text_files(folder_path)

# Save the cleaned texts to a temporary file for SentencePiece training
with open("cleaned_texts.txt", "w", encoding="utf-8") as f:
    for text in texts:
        f.write(f"{text}\n")

In [45]:
def count_txt_files(folder_path):
    return sum(1 for filename in os.listdir(folder_path) if filename.endswith(".txt"))

num_files = count_txt_files(folder_path)
print(f"Number of .txt files in folder: {num_files}")

Number of .txt files in folder: 1000


In [46]:
texts = read_text_files(folder_path)
print(f"Number of texts read: {len(texts)}")

Number of texts read: 579


In [47]:
# Tentukan ukuran kosakata yang sesuai dengan jumlah token unik dalam data
# Misalnya, kita gunakan 6000 sebagai ukuran kosakata yang lebih kecil dari 6111
vocab_size = 6000

# Train SentencePiece model
spm.SentencePieceTrainer.train(input='cleaned_texts.txt', model_prefix='m', vocab_size=vocab_size)

# Load the SentencePiece model
sp = spm.SentencePieceProcessor(model_file='m.model')

# Tokenize the dataset
tokenized_texts = [sp.encode(text, out_type=int) for text in texts]

# Prepare the data for TensorFlow
tokenized_texts = [np.array(text) for text in tokenized_texts]

# Convert tokenized texts to tensor format
vectorized_texts = tf.ragged.constant(tokenized_texts, dtype=tf.int64).to_tensor()

# Prepare the dataset for training
batch_size = 8  # Mengurangi ukuran batch lebih lanjut
dataset = tf.data.Dataset.from_tensor_slices((vectorized_texts, vectorized_texts))
dataset = dataset.batch(batch_size, drop_remainder=True)

### Define Positional Encoding and Transformer Block

In [48]:
# Define Positional Encoding
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, position, d_model):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(position, d_model)
    
    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return pos * angle_rates
    
    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(np.arange(position)[:, np.newaxis],
                                     np.arange(d_model)[np.newaxis, :],
                                     d_model)
        
        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
        
        pos_encoding = angle_rads[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)
    
    def call(self, x):
        return x + self.pos_encoding[:, :tf.shape(x)[1], :]

# Define Transformer Block
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(TransformerBlock, self).__init__()

        self.mha = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
    
    def call(self, x, training, mask):
        attn_output = self.mha(x, x, x, attention_mask=mask)  # (batch_size, input_seq_len, d_model)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)

        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
        
        return out2


### Define Transformer Model

In [61]:
# Define the full Transformer model
class TransformerModel(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input, pe_target, rate=0.1):
        super(TransformerModel, self).__init__()

        self.encoder_embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
        self.decoder_embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        
        self.pos_encoding = PositionalEncoding(pe_input, d_model)
        
        self.enc_layers = [TransformerBlock(d_model, num_heads, dff, rate) for _ in range(num_layers)]
        self.dec_layers = [TransformerBlock(d_model, num_heads, dff, rate) for _ in range(num_layers)]
        
        self.dropout = tf.keras.layers.Dropout(rate)
        
        self.final_layer = tf.keras.layers.Dense(target_vocab_size)

    def create_padding_mask(self, seq):
        seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
        return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

    def create_look_ahead_mask(self, size):
        mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
        return mask  # (seq_len, seq_len)

    def call(self, inputs, training):
        inp, tar = inputs
        
        enc_padding_mask = self.create_padding_mask(inp)
        look_ahead_mask = self.create_look_ahead_mask(tf.shape(tar)[1])
        dec_padding_mask = self.create_padding_mask(tar)

        inp = self.encoder_embedding(inp)  # (batch_size, input_seq_len, d_model)
        tar = self.decoder_embedding(tar)  # (batch_size, target_seq_len, d_model)

        inp = self.pos_encoding(inp)
        tar = self.pos_encoding(tar)

        for enc_layer in self.enc_layers:
            inp = enc_layer(inp, training, enc_padding_mask)

        for dec_layer in self.dec_layers:
            tar = dec_layer(tar, training, look_ahead_mask)

        final_output = self.final_layer(tar)  # (batch_size, target_seq_len, target_vocab_size)
        
        return final_output


### Compile and Train the Model

In [62]:
# Hyperparameters
num_layers = 4  # Asli: 12, disesuaikan menjadi 8, kemudian 6, dan akhirnya 4
d_model = 128   # Asli: 768, disesuaikan menjadi 256, kemudian 128
num_heads = 4   # Asli: 12, disesuaikan menjadi 8, kemudian 4
dff = 512       # Asli: 3072, disesuaikan menjadi 2048, kemudian 1024, dan akhirnya 512
input_vocab_size = 6000  # Tetap
target_vocab_size = 6000  # Tetap
pe_input = 5000  # Kurangi nilai pe_input
pe_target = 5000  # Kurangi nilai pe_target

# Instantiate and compile the model
transformer = TransformerModel(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input, pe_target)

# Define the learning rate schedule
learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True
)

optimizer = tf.keras.optimizers.Adam(learning_rate)

transformer.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Mengurangi ukuran batch menjadi 8
batch_size = 8

# Build the model by calling it on a batch of data
input_batch = tf.random.uniform((batch_size, 256), dtype=tf.int64, minval=0, maxval=input_vocab_size)
target_batch = tf.random.uniform((batch_size, 256), dtype=tf.int64, minval=0, maxval=target_vocab_size)

_ = transformer((input_batch, target_batch), training=False)

# Display model summary
transformer.summary()

# Train the model
epochs = 10
history = transformer.fit(dataset, epochs=epochs)

# Save the model
model_save_path = './models/complex_transformer_nlp_model.h5'
transformer.save(model_save_path)


Model: "transformer_model_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_40 (Embedding)    multiple                  768000    
                                                                 
 embedding_41 (Embedding)    multiple                  768000    
                                                                 
 positional_encoding_20 (Pos  multiple                 0         
 itionalEncoding)                                                
                                                                 
 transformer_block_264 (Tran  multiple                 396032    
 sformerBlock)                                                   
                                                                 
 transformer_block_265 (Tran  multiple                 396032    
 sformerBlock)                                                   
                                              

OperatorNotAllowedInGraphError: in user code:

    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None

    OperatorNotAllowedInGraphError: Exception encountered when calling layer "transformer_model_20" (type TransformerModel).
    
    in user code:
    
        File "C:\Users\gabri\AppData\Local\Temp\ipykernel_19748\2130157609.py", line 27, in call  *
            inp, tar = inputs
    
        OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
    
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(8, 4656), dtype=int64)
      • training=True


In [42]:
# Train the model
epochs = 10
history = transformer.fit(dataset, epochs=epochs)

# Save the model
model_save_path = './models/complex_transformer_nlp_model.h5'
transformer.save(model_save_path)

Epoch 1/10


OperatorNotAllowedInGraphError: in user code:

    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None

    OperatorNotAllowedInGraphError: Exception encountered when calling layer "transformer_model_13" (type TransformerModel).
    
    in user code:
    
        File "C:\Users\gabri\AppData\Local\Temp\ipykernel_19748\2130157609.py", line 27, in call  *
            inp, tar = inputs
    
        OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
    
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(8, 4656), dtype=int64)
      • training=True


In [33]:
import tensorflow as tf

# Memastikan TensorFlow mendeteksi GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1
