In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_text as text

from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset as bert_vocab

In [14]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [15]:
df = pd.read_csv('./datasets/genetics.csv')
df = df.sample(frac=1, random_state=128)
df

Unnamed: 0,corpus
11,What are the primary causes of diabetes? The p...
74,Why might it be challenging for Filipinos to m...
110,Why are Filipinos more susceptible to T2DM com...
2,What properties does bitter gourd possess that...
117,What lifestyle adjustments aid in diabetes pre...
...,...
34,How can individuals assess their risk of devel...
10,What is diabetes? Diabetes is a chronic illnes...
119,What role does cinnamon play in diabetes manag...
82,How can understanding diabetes and financial p...


In [16]:
samples = df['corpus'].values
len(samples)

132

In [17]:
bert_tokenizer_params=dict(lower_case=True)
tokenizer = text.BertTokenizer('vocab.txt', **bert_tokenizer_params)

In [18]:
BUFFER_SIZE = 20000
BATCH_SIZE = 32
MAX_TOKENS = 129

def prepare_data(token):
      token = token[:MAX_TOKENS]
      input = token[:-1]
      label = token[1:]
      
      return input, label

train_dataset = tokenizer.tokenize(samples[:118]).merge_dims(-2,-1).to_tensor()
train_dataset = tf.data.Dataset.from_tensor_slices(train_dataset)
train_dataset = train_dataset.map(prepare_data).shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

val_dataset = tokenizer.tokenize(samples[118:]).merge_dims(-2,-1).to_tensor()
val_dataset = tf.data.Dataset.from_tensor_slices(val_dataset)
val_dataset = val_dataset.map(prepare_data).shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

for x_train, y_train in train_dataset.take(1):
      break

print(x_train.shape)
print(y_train.shape)

print('Input:', x_train[0][:10].numpy())
print('Output:', y_train[0][:10].numpy())


(32, 128)
(32, 128)
Input: [1255   90  126  358   93  190 3902  102   86  122]
Output: [  90  126  358   93  190 3902  102   86  122   89]


In [19]:
def positional_encoding(length, depth):
    depth = depth/2

    positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
    depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)

    angle_rates = 1 / (10000**depths)         # (1, depth)
    angle_rads = positions * angle_rates      # (pos, depth)

    pos_encoding = np.concatenate(
        [np.sin(angle_rads), np.cos(angle_rads)],
        axis=-1) 

    return tf.cast(pos_encoding, dtype=tf.float32)

In [20]:
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, vocab_size, d_model):
        super().__init__()
        self.d_model = d_model
        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model, mask_zero=True) 
        self.pos_encoding = positional_encoding(length=2048, depth=d_model)

    def compute_mask(self, *args, **kwargs):
        return self.embedding.compute_mask(*args, **kwargs)

    def call(self, x):
        length = tf.shape(x)[1]
        x = self.embedding(x)
        # This factor sets the relative scale of the embedding and positonal_encoding.
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x = x + self.pos_encoding[tf.newaxis, :length, :]
        return x

In [21]:
class BaseAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()

In [22]:
class CausalSelfAttention(BaseAttention):
    def call(self, x):
        attn_output = self.mha(
            query=x,
            value=x,
            key=x,
            use_causal_mask = True)
        x = self.add([x, attn_output])
        x = self.layernorm(x)
        return x

In [23]:
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, dff, dropout_rate=0.1):
        super().__init__()
        self.seq = tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model),
            tf.keras.layers.Dropout(dropout_rate)
        ])
        self.add = tf.keras.layers.Add()
        self.layer_norm = tf.keras.layers.LayerNormalization()

    def call(self, x):
        x = self.add([x, self.seq(x)])
        x = self.layer_norm(x) 
        return x

In [24]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, *, d_model, num_heads, dff, dropout_rate=0.1):
        super(DecoderLayer, self).__init__()

        self.causal_self_attention = CausalSelfAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.ffn = FeedForward(d_model, dff)

    def call(self, x):
        x = self.causal_self_attention(x=x)
        x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
        return x

In [25]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size, dropout_rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size, d_model=d_model)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.dec_layers = [
            DecoderLayer(d_model=d_model, num_heads=num_heads,
                        dff=dff, dropout_rate=dropout_rate)
            for _ in range(num_layers)]

        self.last_attn_scores = None

    def call(self, x):
        # `x` is token-IDs shape (batch, target_seq_len)
        x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)

        x = self.dropout(x)

        for i in range(self.num_layers):
            x = self.dec_layers[i](x)

        # The shape of x is (batch_size, target_seq_len, d_model).
        return x

In [26]:
class Transformer(tf.keras.Model):
    def __init__(self, *, num_layers, d_model, num_heads, dff, target_vocab_size, dropout_rate=0.1):
        super().__init__()

        self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
                            num_heads=num_heads, dff=dff,
                            vocab_size=target_vocab_size,
                            dropout_rate=dropout_rate)

        self.final_layer = tf.keras.layers.Dense(target_vocab_size)

    def call(self, x):
        # To use a Keras model with `.fit` you must pass all your inputs in the
        # first argument.

        x = self.decoder(x)  # (batch_size, target_len, d_model)

        # Final linear layer output.
        logits = self.final_layer(x)  # (batch_size, target_len, target_vocab_size)

        # Return the final output and the attention weights.
        return logits

In [27]:
num_layers = 4
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1
vocab_size = 7931

In [28]:
transformer = Transformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    target_vocab_size=vocab_size,
    dropout_rate=dropout_rate)

output = transformer(x_train)

print(x_train.shape)
print(output.shape)

(32, 128)
(32, 128, 7931)


In [29]:
transformer.summary()

Model: "transformer"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 decoder (Decoder)           multiple                  3654016   
                                                                 
 dense_8 (Dense)             multiple                  1023099   
                                                                 
Total params: 4,677,115
Trainable params: 4,677,115
Non-trainable params: 0
_________________________________________________________________


In [30]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super().__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)

        self.warmup_steps = warmup_steps

    def __call__(self, step):
        step = tf.cast(step, dtype=tf.float32)
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [31]:
learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

In [32]:
def masked_loss(label, pred):
    mask = label != 0
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
    loss = loss_object(label, pred)

    mask = tf.cast(mask, dtype=loss.dtype)
    loss *= mask

    loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
    return loss


def masked_accuracy(label, pred):
    pred = tf.argmax(pred, axis=2)
    label = tf.cast(label, pred.dtype)
    match = label == pred

    mask = label != 0

    match = match & mask

    match = tf.cast(match, dtype=tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    return tf.reduce_sum(match)/tf.reduce_sum(mask)

In [33]:
es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_masked_accuracy', patience=3)
transformer.compile(loss=masked_loss, optimizer=optimizer, metrics=[masked_accuracy])

In [34]:
# Loads the weights
transformer.load_weights('./model/checkpoints')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1fadb30dbb0>

In [37]:
transformer.fit(train_dataset, epochs=20, validation_data=val_dataset, callbacks=[es_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


<keras.callbacks.History at 0x1fb8e60f4c0>

In [38]:
# save new weights
transformer.save_weights('./model/checkpoints')

In [39]:
def generate_text(sentence, maxlen=MAX_TOKENS):
    output_array = tokenizer.tokenize(sentence).merge_dims(-2, -1).to_tensor()
    
    for i in range(maxlen):
        prediction = transformer(output_array, training=False)
        prediction = prediction[:, -1:, :]
        prediction = tf.argmax(prediction, axis=-1)
        output_array = tf.concat([output_array, prediction], axis=1)
        
        if prediction[0][0].numpy() == 3:
            break
        
    output = tokenizer.detokenize(output_array).to_tensor()
    output = ' '.join([word.decode('utf-8') for word in output.numpy()[0]])
    return output

In [40]:
generate_text('what is diabetes?')

"what is diabetes ? diabetes mellitus is a chronic condition prevalent in the philippines , characterized by high blood sugar levels . it includes two primary types : type 1 and type 2 . type 1 diabetes , also known as adult - onset diabetes , and adult - onset diabetes . type 2 diabetes is an autoimmune disease usually diagnosed in childhood , characterized by the body ' s inability to produce insulin . type 2 diabetes is a metabolic disorder caused by insulin resistance and is more common in adults . it is usually diagnosed in children and young adults , where the pancreas can no longer produce insulin . it is important to keep your blood sugar levels and the pancreas . to stay healthy for type 2 diabetes ,"

In [41]:
generate_text('can I prevent having diabetes?')

"can i prevent having diabetes ? the most common chronic , there is a genetic condition that requires daily management , and occurs when you are pregnant . what is the prevalence of type 2 diabetes mellitus ( t2dm ) in the philippines ? according to the food and nutrition research institute - department of science and technology ' s 8th national nutrition survey , diabetes prevalence based on fasting blood sugar has risen from 3 . 4 percent in 2003 to 5 . 4 percent in 2013 . the food and nutrition research institute ( 2014 ) . 6 . 4 percent in 2013 in the philippines is a comparative prevalence of type 2 diabetes among the philippines . the food and nutrition aged 20 . in the"

In [42]:
generate_text('is bitter gourd good for diabetes?')

"is bitter gourd good for diabetes ? it ' s common for adults with type 1 diabetes , affecting 1 person and a family history of the condition increases the risk . individuals with type 2 diabetes may experience a life and limited productivity loss . if the earlier symptoms of type 2 diabetes , a person who has think similar to that complications can be treated with lifestyle changes and lifestyle changes such as weight loss . if the disease is diagnosed early , you can get type 2 diabetes , the condition major symptoms , and the early you can be treated with lifestyle changes and exercise . if you have a man with type 2 diabetes , losing a lot of weight is over the condition . if you have type 2"

In [43]:
generate_text('what should I do if I have diabetes?')

'what should i do if i have diabetes ? the first thing you should do is consult your physician . they might ask for a blood test to measure the levels of sugar in your blood ( glycemia ) . this blood test will allow them to check for hyperglycemia . if you have more severe symptoms , please seek immediate medical attention . if you have more severe symptoms , please seek immediate medical attention . seeking medical attention is always given to people with type 2 diabetes or to receive medical attention . to the knowledge of the mechanisms of diabetes prevention , management , diagnosis and treatment . education can indicate the essential approach to prevent or delay the onset of type 2 diabetes , especially in early or actual disease'

In [44]:
generate_text('what is syndrome?')

'what is syndrome ? diabetes is a chronic condition prevalent in the philippines , characterized by high blood sugar levels . it includes two primary types : type 1 and type 2 . type 1 diabetes , also known as adult - onset diabetes , and adult - onset diabetes . early onset usually occurs in childhood , adolescence , or young adulthood and presents primary insulin - dependent . type 2 diabetes is more common in older adults . however , type 2 diabetes is more common in children with type 2 diabetes . however , type 2 diabetes is more common in children with type 2 diabetes who are 10 diabetes . however , type 2 diabetes can develop in adults with a body , type 2 diabetes in the body'