In [1]:
import numpy as np
import tensorflow as tf

In [2]:
import keras
import pandas as pd

In [3]:
class SelfAttention (keras.layers.Layer) :
    def __init__ (self,d_k) :
        super(SelfAttention,self).__init__()
        self.dk = d_k 
    
    def build (self,input_shape) :
        f_dimention = input_shape[-1]
        self.WQ = self.add_weight(shape=(f_dimention,self.dk),initializer="random_normal",trainable=True)
        self.WK = self.add_weight(shape=(f_dimention,self.dk),initializer="random_normal",trainable=True)
        self.WV = self.add_weight(shape=(f_dimention,self.dk),initializer="random_normal",trainable=True)
    
    def call(self,x) :
        Q = tf.matmul(x,self.WQ)
        K = tf.matmul(x,self.WK)
        V = tf.matmul(x,self.WV)
        dk = tf.math.sqrt(tf.cast(self.dk,dtype=tf.float32))
        attention = tf.matmul(Q,tf.transpose(K,perm=[0,2,1])) / dk 
        outputs = tf.matmul(attention,V)
        return outputs

In [4]:
data = tf.random.normal((2,5,20))

In [5]:
Attention = SelfAttention(d_k=20)

In [6]:
outputs = Attention(data)

In [7]:
data[0][0]

<tf.Tensor: shape=(20,), dtype=float32, numpy=
array([-1.2761335 , -0.3343832 ,  0.41384995,  0.09875622, -0.11075204,
       -1.6409185 , -0.24328028,  1.2874672 , -0.12970327,  0.44577038,
        0.7368687 ,  1.3209614 , -1.6936523 ,  1.357806  , -0.21452764,
       -1.6222739 , -0.33537555, -0.33465895,  0.29321498,  0.51347256],
      dtype=float32)>

In [4]:
def make_random () :
    return [np.random.randint(0,10) for i in range(4)]

In [9]:
x = [make_random() for x in range(1000)]
test = [make_random() for x in range(1000)]
x_test = np.array(test)
x_train = np.array(x)

In [10]:
y_train = np.array([make_random() for _ in range(1000)])

In [11]:
x_train,y_train

(array([[6, 7, 8, 5],
        [4, 3, 9, 0],
        [7, 4, 4, 3],
        ...,
        [0, 2, 1, 0],
        [1, 0, 9, 3],
        [4, 8, 8, 1]]),
 array([[1, 2, 2, 4],
        [0, 6, 9, 0],
        [4, 4, 7, 7],
        ...,
        [7, 3, 5, 6],
        [7, 7, 2, 1],
        [0, 5, 2, 6]]))

In [132]:
class Transformers_Encoder (keras.layers.Layer) :
    def __init__ (self,vocab_size,d_model,d_k,ffn,outputs_dim) :  
        super (Transformers_Encoder,self).__init__()
        self.Self_attention = SelfAttention(d_k=d_k)
        self.ffn = keras.Sequential(
            [
                keras.layers.Dense(ffn,activation='relu'),
                keras.layers.Dense(d_model)
            ]
        )
        self.Embedding = keras.layers.Embedding(input_dim=vocab_size,output_dim=outputs_dim)
        self.LSTM1 = keras.layers.LSTM(32,return_state= True)
    
    def call(self,x) :
        x = self.Embedding(x)
        x = self.Self_attention(x)
        x = self.ffn(x)
        _,state_h,state_c = self.LSTM1(x)
        return state_h,state_c

In [148]:
class Transformers_Decoder (keras.layers.Layer) :
    def __init__ (self,num_class,embbedding_dim) :
        super (Transformers_Decoder,self).__init__()
        self.Embedding = keras.layers.Embedding(input_dim=num_class,output_dim=embbedding_dim)
        self.Long_sort_Term_memories = keras.layers.LSTM(32,return_sequences=True,return_state=True)
        self.Outputs_dense = keras.layers.Dense(num_class,activation='softmax')
    
    def call(self,decoder_input,state_h,state_c) :
        decoder_input = self.Embedding(decoder_input)
        states = [state_h,state_c]
        outputs,state_h,state_c = self.Long_sort_Term_memories(decoder_input,initial_state = states)
        outputs = self.Outputs_dense(outputs)
        return outputs

In [149]:
class Transformers_seq2seq (keras.Model) :
    def __init__ (self,vocab_size,d_model,d_k,ffn,outputs_dim,num_class) :
        super(Transformers_seq2seq,self).__init__()
        self.Encoder = Transformers_Encoder(vocab_size=vocab_size,
                                            d_model=d_model,
                                            outputs_dim=outputs_dim,d_k=d_k,
                                            ffn=ffn)
        self.Decoder = Transformers_Decoder(num_class=num_class,embbedding_dim=outputs_dim)
    
    def call(self,x) :
        encoder_input,decoder_input = x
        state_h,state_c = self.Encoder(encoder_input)
        outputs = self.Decoder(decoder_input,state_h,state_c)
        return outputs

In [143]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [153]:
import random

# Contoh kecil dataset chatbot, nanti diperbesar
encoder_inputs = [
    "Apa kabar?", "Siapa namamu?", "Ceritakan sesuatu!", 
    "Dimana kamu tinggal?", "Siapa yang menciptakanmu?", "Bagaimana cuaca hari ini?",
    "Ceritakan lelucon!", "Apa makanan favoritmu?", "Apa yang kamu suka lakukan?", 
    "Bagaimana cara belajar AI?"
]

decoder_outputs = [
    "Aku baik.", "Aku AI.", "Baiklah, aku akan bercerita.",
    "Aku tinggal di dalam komputer.", "Aku diciptakan oleh seorang programmer.", "Aku tidak tahu, aku tidak bisa melihat.",
    "Baiklah, ini lelucon: Kenapa komputer tidak bisa tidur? Karena selalu ada bug!", "Aku tidak makan, tapi aku suka data!", "Aku suka berbicara denganmu!", 
    "Belajarlah teori dan praktekkan dengan kode!"
]

# Menambahkan token <SOS> dan <EOS>
decoder_inputs = ["<SOS> " + sentence for sentence in decoder_outputs]
decoder_outputs = [sentence + " <EOS>" for sentence in decoder_outputs]

# Memperbanyak dataset secara otomatis dengan variasi pertanyaan & jawaban
big_encoder_inputs = []
big_decoder_inputs = []
big_decoder_outputs = []

for _ in range(5000):  # Membuat 5000 sampel
    i = random.randint(0, len(encoder_inputs) - 1)
    big_encoder_inputs.append(encoder_inputs[i])
    big_decoder_inputs.append(decoder_inputs[i])
    big_decoder_outputs.append(decoder_outputs[i])

# Menampilkan contoh kecil dari dataset besar
big_encoder_inputs[:5], big_decoder_inputs[:5], big_decoder_outputs[:5]


(['Bagaimana cara belajar AI?',
  'Ceritakan lelucon!',
  'Ceritakan lelucon!',
  'Apa makanan favoritmu?',
  'Apa kabar?'],
 ['<SOS> Belajarlah teori dan praktekkan dengan kode!',
  '<SOS> Baiklah, ini lelucon: Kenapa komputer tidak bisa tidur? Karena selalu ada bug!',
  '<SOS> Baiklah, ini lelucon: Kenapa komputer tidak bisa tidur? Karena selalu ada bug!',
  '<SOS> Aku tidak makan, tapi aku suka data!',
  '<SOS> Aku baik.'],
 ['Belajarlah teori dan praktekkan dengan kode! <EOS>',
  'Baiklah, ini lelucon: Kenapa komputer tidak bisa tidur? Karena selalu ada bug! <EOS>',
  'Baiklah, ini lelucon: Kenapa komputer tidak bisa tidur? Karena selalu ada bug! <EOS>',
  'Aku tidak makan, tapi aku suka data! <EOS>',
  'Aku baik. <EOS>'])

In [154]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(encoder_inputs + decoder_outputs)

In [157]:
x_train_encoder = tokenizer.texts_to_sequences(big_encoder_inputs)
X_train_decoder_inputs = tokenizer.texts_to_sequences(big_decoder_inputs)
Y_train_decoder_outputs = tokenizer.texts_to_sequences(big_decoder_outputs)

In [159]:
max(len(seq) for seq in Y_train_decoder_outputs)

13

In [160]:
x_train_encoder = pad_sequences(x_train_encoder,maxlen=13,padding='post')
X_train_decoder_inputs = pad_sequences(X_train_decoder_inputs,maxlen=13,padding='post')
Y_train_decoder_outputs = pad_sequences(Y_train_decoder_outputs,maxlen=13,padding='post')

In [164]:
# Buat model
vocab_size = len(tokenizer.word_index) + 1  # +1 untuk padding
d_model = 64
d_k = 32
ffn_units = 128
num_class = vocab_size  # Jumlah kata dalam tokenizer

model = Transformers_seq2seq(vocab_size,d_model=d_model,d_k=d_k,ffn=ffn_units,outputs_dim=32,num_class=vocab_size)

# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Latih model



In [165]:
model.summary()

In [166]:
model.fit([x_train_encoder, X_train_decoder_inputs], Y_train_decoder_outputs, batch_size=32, epochs=5)

Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 20ms/step - accuracy: 0.5261 - loss: 2.6321
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.7266 - loss: 1.0446
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.9920 - loss: 0.4299
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 1.0000 - loss: 0.1557
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 1.0000 - loss: 0.0670


<keras.src.callbacks.history.History at 0x1cf329c6ba0>

In [167]:
model.summary()

In [5]:
class MultiHeadAttention (keras.layers.Layer) :
    def __init__ (self,d_model,num_head) :
        super (MultiHeadAttention,self).__init__()
        self.num_head = num_head 
        self.d_k = d_model // num_head
        self.d_model = d_model
        self.W_q = keras.layers.Dense(d_model)
        self.W_k = keras.layers.Dense(d_model)
        self.W_v = keras.layers.Dense(d_model)
        self.W_o = keras.layers.Dense(d_model)
    
    def scaled_dot_product (self,Q,K,V,mask = None ) :
        dk = tf.cast(self.d_k,dtype=tf.float32)
        scores = tf.matmul(Q,K,transpose_b=True) / tf.math.sqrt(dk)
        if mask is not None  :
            scores = tf.where(mask == 0,tf.constant(-1e9,dtype=scores.dtype),scores)
        Attention_weight = tf.nn.softmax(scores,axis=-1)
        outputs = tf.matmul(Attention_weight,V)
        return outputs
    
    def split_heads (self,x,batch_size) :
        x = tf.reshape(x,(batch_size,-1,self.num_head,self.d_k))
        return tf.transpose(x,perm=[0,2,1,3])
    
    def call (self,Q,K,V,mask=None) :
        batch_size = tf.shape(Q)[0]
        Q = self.split_heads(Q,batch_size)
        K = self.split_heads(K,batch_size)
        V = self.split_heads(V,batch_size)
        Attention = self.scaled_dot_product(Q,K,V,mask)
        Attention = tf.transpose(Attention,perm=[0,2,1,3])
        Attention = tf.reshape(Attention,(batch_size,-1,self.d_model))
        outputs = self.W_o(Attention)
        return outputs

In [None]:
class Encoder (keras.layers.Layer) :
    def __init__ (self,vocab_size,embedding_lenght,d_model,num_heads) :
        super(Encoder,self).__init__()
        self.Embedding = keras.layers.Embedding(input_dim=vocab_size,output_dim=embedding_lenght)
        self.MultiHeadAttention = MultiHeadAttention(d_model,num_heads)
        self.LSTM = keras.layers.Bidirectional(keras.layers.LSTM(64,return_sequences=True,return_state=True))
        self.Normal1 = keras.layers.LayerNormalization()
        self.ffn = keras.Sequential([
            keras.layers.Dense(128,activation='relu'),
            keras.layers.Dense(d_model)
        ])
        self.Outputs = keras.layers.Dense(d_model,activation='tanh')
    
    def call(self,x) :
        x = self.Embedding(x)
        x =self.MultiHeadAttention(x,x,x)
        x = self.Normal1(x)
        x = self.ffn(x)
        outputs, forward_h, forward_c, backward_h, backward_c = self.LSTM(x)
        encoded_outputs = self.Outputs(tf.concat([forward_h,backward_h],axis=-1))
        return encoded_outputs


In [40]:
class Decoder (keras.layers.Layer) :
    def __init__ (self,num_class,vocab_size,d_model,num_head,outputs_dim) :
        super(Decoder,self).__init__
        self.Embedding = keras.layers.Embedding(input_dim=vocab_size,output_dim=outputs_dim)
        self.MultiHeadAttention = MultiHeadAttention(d_model=d_model,num_head=num_head) 
        self.Biderectional_layers = keras.layers.Bidirectional(keras.layers.LSTM(64,return_sequences=True))
        self.outputs_layers = keras.layers.Dense(num_class,activation='softmax')
        self.normalistion = keras.layers.LayerNormalization()
        self.ffn = keras.Sequential([
            keras.layers.Dense(128,activation='relu'),
            keras.layers.Dense(d_model)
        ])
    
    def call(self,x) :
        x = self.Embedding(x)
        x = self.MultiHeadAttention(x,x,x)
        x = self.Biderectional_layers(x)
        x = self.normalistion(x)
        x = self.ffn(x)
        x = self.outputs_layers(x)
        return x
        


In [32]:
x_train

array([[0, 6, 2, 6],
       [4, 3, 0, 2],
       [8, 0, 6, 5],
       ...,
       [0, 9, 6, 0],
       [0, 1, 0, 8],
       [3, 0, 7, 9]])

In [11]:
vocab_size = 11
embdedding = 64
d_model = 128
num_head = 8


In [36]:
x_train= np.array([make_random() for _ in range(1000)])
y_train = x_train

In [37]:

for i in range(len(x_train)) :
    random = np.random.randint(0,3) 
    y_train[i][random] = 0

In [39]:
y_train

array([[5, 8, 0, 6],
       [0, 5, 6, 0],
       [1, 0, 8, 8],
       ...,
       [5, 0, 5, 6],
       [0, 8, 0, 1],
       [9, 3, 0, 2]])

In [13]:
y_train = np.array([make_random() for _ in range(1000)])

In [14]:
len(x_train[0])

4

In [15]:
Encoders_Layers_Models = Encoder(vocab_size=vocab_size,embedding_lenght=embdedding,d_model = d_model,num_heads=num_head,seq_len=4)

In [16]:
Decoder_models = Decoder(embedding_length=embdedding,d_model=d_model,num_classes=vocab_size,num_heads=num_head,vocab_size=vocab_size)

In [17]:
x_train_decode = Encoders_Layers_Models(x_train)

In [24]:
keras.layers.Embedding(input_dim=vocab_size,output_dim=64)(x_train_decode)

<tf.Tensor: shape=(1000, 128, 64), dtype=float32, numpy=
array([[[ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        [ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        [ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        ...,
        [ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        [ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        [ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012]],

       [[ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        [ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        [ 0.04742417,  0.0298645 , -0.02125306, ...,  0.00365547,
          0.0212527 , -0.01607012],
        ..