## Import Library and Utility Function

In [1]:
import os
import json
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Input, MultiHeadAttention, LayerNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Fungsi untuk memuat kata-kata dari file
def load_words(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        words = file.read().splitlines()
    return words

# Fungsi untuk memuat kata-kata slang
def load_slang_words(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        slang_dict = json.loads(file.read())
    return slang_dict

# Fungsi untuk membaca file teks dari folder
def read_text_files(folder_path):
    texts = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                content = file.read().strip()
                if content:  # Check if the file is not empty
                    cleaned_content = clean_text(content)
                    texts.append(cleaned_content)
    return texts

# Fungsi untuk membersihkan teks
def clean_text(text):
    unwanted_chars = ['*', '#', '_', ')', '(', '!', '?', '.', ',', '-']
    for char in unwanted_chars:
        text = text.replace(char, '')
    return text


## Load Data

In [2]:
# Memuat data untuk dataset 1 (kosakata)
root_words = load_words('../Dataset/indonesian_word/combined_root_words.txt')
stop_words = load_words('../Dataset/indonesian_word/combined_stop_words.txt')
slang_dict = load_slang_words('../Dataset/indonesian_word/combined_slang_words.txt')

# Gabungkan semua kata untuk pembentukan kosakata
all_words = list(set(root_words + stop_words + list(slang_dict.keys()) + list(slang_dict.values())))

# Memuat dataset 2 (paragraf tentang Python)
dataset2_texts = read_text_files('../Dataset/nlp_dataset')


## Configure GPU

In [3]:
# Configure TensorFlow to use a specific amount of GPU memory
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)])  # Set memory limit to 2GB
    except RuntimeError as e:
        print(e)

# Enable mixed precision training
from tensorflow.keras.mixed_precision import Policy, set_global_policy

# Set mixed precision policy to use float16
policy = Policy('mixed_float16')
set_global_policy(policy)


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3050 6GB Laptop GPU, compute capability 8.6


## Data Prepeare

In [4]:
# Tokenizer untuk dataset 1
tokenizer = Tokenizer(num_words=len(all_words), oov_token='<OOV>')
tokenizer.fit_on_texts(all_words)

# Tokenisasi dan padding untuk dataset 1
dataset1_sequences = tokenizer.texts_to_sequences(all_words)
dataset1_padded = pad_sequences(dataset1_sequences, padding='post')

# Dataset dan target untuk pelatihan
dataset1_inputs = dataset1_padded[:, :-1]
dataset1_targets = dataset1_padded[:, 1:]

# Create a dataset from the input and target sequences
train_dataset1 = tf.data.Dataset.from_tensor_slices((dataset1_inputs, dataset1_targets))
train_dataset1 = train_dataset1.shuffle(buffer_size=1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)


## Arsitektur words model

In [5]:
# Hyperparameters
embedding_dim = 256
lstm_units = 512
vocab_size = len(all_words)  # Adjust based on your vocabulary size

# Input Layer
inputs = Input(shape=(None,), name="inputs")

# Embedding Layer
embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim, name="embedding_layer")(inputs)

# LSTM Layer
lstm_output = LSTM(lstm_units, return_sequences=True, name="lstm_layer")(embedding)

# Output Layer for Vocabulary Understanding
outputs = Dense(vocab_size, activation='softmax', name="output_layer")(lstm_output)

# Model
vocab_model = Model(inputs=inputs, outputs=outputs, name="vocab_model")

# Compile Model with mixed precision optimizer
optimizer = Adam()
vocab_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model Summary
vocab_model.summary()


Model: "vocab_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inputs (InputLayer)         [(None, None)]            0         
                                                                 
 embedding_layer (Embedding)  (None, None, 256)        7588608   
                                                                 
 lstm_layer (LSTM)           (None, None, 512)         1574912   
                                                                 
 output_layer (Dense)        (None, None, 29643)       15206859  
                                                                 
Total params: 24,370,379
Trainable params: 24,370,379
Non-trainable params: 0
_________________________________________________________________


## Train Words model

In [7]:
accumulation_steps = 4  # Simulate batch size of 4 times larger

@tf.function
def train_step_vocabulary(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = vocab_model(inputs, training=True)
        loss = tf.keras.losses.sparse_categorical_crossentropy(targets, predictions)
    scaled_loss = loss / accumulation_steps
    gradients = tape.gradient(scaled_loss, vocab_model.trainable_variables)
    return gradients, loss

# Custom training loop for dataset 1
for epoch in range(10):  # Number of epochs
    print(f'Epoch {epoch + 1}/{10}')
    accum_gradients = [tf.zeros_like(var, dtype=tf.float32) for var in vocab_model.trainable_variables]
    for step, (inputs, targets) in enumerate(train_dataset1):
        gradients, loss = train_step_vocabulary(inputs, targets)
        accum_gradients = [
            accum_grad + (grad if grad is not None else tf.zeros_like(accum_grad))
            for accum_grad, grad in zip(accum_gradients, gradients)
        ]
        
        if (step + 1) % accumulation_steps == 0:
            optimizer.apply_gradients(zip(accum_gradients, vocab_model.trainable_variables))
            accum_gradients = [tf.zeros_like(var, dtype=tf.float32) for var in vocab_model.trainable_variables]
    
    # Apply remaining gradients if the number of steps is not a multiple of accumulation_steps
    if step % accumulation_steps != 0:
        optimizer.apply_gradients(zip(accum_gradients, vocab_model.trainable_variables))
    
    print(f'Loss: {loss.numpy().mean()}')
    
# Save the embedding layer weights for future use
vocab_model.save_weights('../saved_model/base_model_saved/base_model_03/vocab_model_weights.h5')



Epoch 1/10
Loss: 10.296875
Epoch 2/10
Loss: 10.296875
Epoch 3/10
Loss: 10.296875
Epoch 4/10
Loss: 10.296875
Epoch 5/10
Loss: 10.296875
Epoch 6/10
Loss: 10.296875
Epoch 7/10
Loss: 10.296875
Epoch 8/10
Loss: 10.296875
Epoch 9/10
Loss: 10.296875
Epoch 10/10
Loss: 10.296875


## Arsitektur Model Attention

In [8]:
# Load pretrained embedding weights
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, name="embedding_layer")
embedding_layer.build((None,))
embedding_layer.set_weights(vocab_model.get_layer('embedding_layer').get_weights())

# Transformer Layer Function
def transformer_layer(x, d_model, num_heads, dff, rate=0.1):
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x, x)
    attn_output = Dropout(rate)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(x + attn_output)
    
    ffn_output = tf.keras.Sequential([
        Dense(dff, activation='relu'), 
        Dense(d_model)])(out1)
    
    ffn_output = Dropout(rate)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)
    
    return out2

# Parameters
embedding_dim = 256
num_transformer_layers = 4
num_heads = 8
dff = 1024

# Model with attention layers
inputs = Input(shape=(None,), name="inputs")
embedding = embedding_layer(inputs)

transformer_output = embedding
for _ in range(num_transformer_layers):
    transformer_output = transformer_layer(transformer_output, embedding_dim, num_heads, dff)

outputs = Dense(vocab_size, activation='softmax', name="output_layer")(transformer_output)

context_model = Model(inputs=inputs, outputs=outputs, name="context_model")

# Compile Model with mixed precision optimizer
optimizer = Adam()
context_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

context_model.summary()


Model: "context_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputs (InputLayer)            [(None, None)]       0           []                               
                                                                                                  
 embedding_layer (Embedding)    (None, None, 256)    7588608     ['inputs[0][0]']                 
                                                                                                  
 multi_head_attention (MultiHea  (None, None, 256)   2103552     ['embedding_layer[0][0]',        
 dAttention)                                                      'embedding_layer[0][0]']        
                                                                                                  
 dropout (Dropout)              (None, None, 256)    0           ['multi_head_attentio

## Dataset 2 Prep

In [9]:
# Tokenizer untuk dataset 2 (menggunakan tokenizer yang sama)
dataset2_sequences = tokenizer.texts_to_sequences(dataset2_texts)
dataset2_padded = pad_sequences(dataset2_sequences, padding='post')

# Dataset dan target untuk pelatihan
dataset2_inputs = dataset2_padded[:, :-1]
dataset2_targets = dataset2_padded[:, 1:]

# Create a dataset from the input and target sequences
train_dataset2 = tf.data.Dataset.from_tensor_slices((dataset2_inputs, dataset2_targets))
train_dataset2 = train_dataset2.shuffle(buffer_size=1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)


## Training Model Attention

In [10]:
# Custom training loop for dataset 2 with gradient accumulation
accumulation_steps = 4  # Simulate batch size of 4 times larger

@tf.function
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = context_model(inputs, training=True)
        loss = tf.keras.losses.sparse_categorical_crossentropy(targets, predictions)
    gradients = tape.gradient(loss, context_model.trainable_variables)
    return gradients, loss

# Custom training loop for dataset 2
for epoch in range(10):  # Number of epochs
    print(f'Epoch {epoch + 1}/{10}')
    accum_gradients = [tf.zeros_like(var) for var in context_model.trainable_variables]
    for step, (inputs, targets) in enumerate(train_dataset2):
        gradients, loss = train_step(inputs, targets)
        accum_gradients = [accum_grad + grad for accum_grad, grad in zip(accum_gradients, gradients)]
        
        if (step + 1) % accumulation_steps == 0:
            context_model.optimizer.apply_gradients(zip(accum_gradients, context_model.trainable_variables))
            accum_gradients = [tf.zeros_like(var) for var in context_model.trainable_variables]
    
    # Apply remaining gradients if the number of steps is not a multiple of accumulation_steps
    if step % accumulation_steps != 0:
        context_model.optimizer.apply_gradients(zip(accum_gradients, context_model.trainable_variables))
    
    print(f'Loss: {loss.numpy().mean()}')


Epoch 1/10


ResourceExhaustedError: Graph execution error:

Detected at node 'context_model/multi_head_attention/value/einsum/Einsum' defined at (most recent call last):
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelapp.py", line 701, in start
      self.io_loop.start()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\asyncio\windows_events.py", line 321, in run_forever
      super().run_forever()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 534, in dispatch_queue
      await self.process_one()
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 523, in process_one
      await dispatch(*args)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 429, in dispatch_shell
      await result
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
      reply_content = await reply_content
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel\ipkernel.py", line 429, in do_execute
      res = shell.run_cell(
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell
      result = self._run_cell(
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell
      result = runner(coro)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\gabri\AppData\Local\Temp\ipykernel_30180\4093364320.py", line 17, in <module>
      gradients, loss = train_step(inputs, targets)
    File "C:\Users\gabri\AppData\Local\Temp\ipykernel_30180\4093364320.py", line 7, in train_step
      predictions = context_model(inputs, training=True)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\functional.py", line 451, in call
      return self._run_internal_graph(
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\layers\multi_head_attention.py", line 508, in call
      value = self._value_dense(value)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\gabri\anaconda3\envs\myenv\lib\site-packages\keras\layers\einsum_dense.py", line 187, in call
      ret = tf.einsum(self.equation, inputs, self.kernel)
Node: 'context_model/multi_head_attention/value/einsum/Einsum'
OOM when allocating tensor with shape[126464,2048] and type half on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node context_model/multi_head_attention/value/einsum/Einsum}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_step_243878]