# Praktikum 2

## Setup

### Import TensorFlow

In [1]:
import tensorflow as tf
import numpy as np
import os
import time

### Download Dataset Shakespeare

In [2]:
path_to_file=tf.keras.utils.get_file('shakespeare.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


### Load Data

In [3]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


In [4]:
# Take a look at the first 250 characters in text
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


## Olah Teks

### Vectorize Teks

In [6]:
example_texts=['abcdefg','xyz']
chars=tf.strings.unicode_split(example_texts,input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [7]:
ids_from_chars=tf.keras.layers.StringLookup(
vocabulary=list(vocab),mask_token=None)

In [8]:
ids_from_chars=tf.keras.layers.StringLookup(
vocabulary=list(vocab),mask_token=None)

In [9]:
ids=ids_from_chars(chars)
ids

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

In [10]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [11]:
chars=chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [12]:
tf.strings.reduce_join(chars,axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [13]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

### Prediksi

### Membuat Trianing Set dan Target

In [14]:
all_ids=ids_from_chars(tf.strings.unicode_split(text,'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [15]:
ids_dataset=tf.data.Dataset.from_tensor_slices(all_ids)

In [16]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


In [17]:
seq_length = 100

In [18]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [19]:
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [20]:
def split_input_target(sequence):
  input_text = sequence[:-1]
  target_text = sequence[1:]
  return input_text, target_text

In [21]:
def split_input_target(sequence):
  input_text = sequence[:-1]
  target_text = sequence[1:]
  return input_text, target_text

In [22]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [23]:
dataset = sequences.map(split_input_target)

In [24]:
for input_example,target_example in dataset.take(1):
  print("Input :", text_from_ids(input_example).numpy())
  print("Target:", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target: b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


### Membuat Batch Training

In [26]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

## Buat Model

In [27]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [28]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [29]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

## Uji Model

In [30]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [31]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4022850 (15.35 MB)
Trainable params: 4022850 (15.35 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [32]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [33]:
sampled_indices

array([33, 27, 57, 41, 17, 43,  7, 31, 39, 24, 37, 10, 44, 54, 43, 34, 42,
       55, 30, 52, 19,  4, 46, 27,  2, 31,  0, 31, 41, 44, 14, 30,  5, 62,
       54, 48, 63, 62, 43, 12,  8, 46, 63, 41, 22,  8, 17,  0,  2, 39, 24,
       18, 51, 39, 28, 51, 58,  4, 34,  4, 27, 37, 62, 53,  5,  5,  8, 11,
       44, 53,  3, 50, 50,  1, 14, 33, 25, 23, 41, 47, 35, 62, 48, 13, 40,
       40,  8, 49,  2, 60, 55, 64, 51, 47, 58, 20, 23, 64, 28, 60])

In [34]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'o, till I see her;\nAnd therefore let me be thus bold with you\nTo give you over at this first encount'

Next Char Predictions:
 b'TNrbDd,RZKX3eodUcpQmF$gN R[UNK]RbeAQ&woixwd;-gxbI-D[UNK] ZKElZOls$U$NXwn&&-:en!kk\nATLJbhVwi?aa-j upylhsGJyOu'


## Training Model

### Tambahan optimizer dan fungsi loss

In [35]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [36]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.190149, shape=(), dtype=float32)


In [37]:
tf.exp(example_batch_mean_loss).numpy()

66.032616

In [38]:
model.compile(optimizer='adam', loss=loss)

### Konfigurasi Checkpoints

In [39]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

### Lakukan Proses Training

In [40]:
EPOCHS = 20

In [41]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Generate Teks

In [42]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [43]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [44]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
And for I first my name patience.

SICINIUS:
Present.

Post:
Ay, but not prize, and with a truit intelligence
And come about so wife in with Camillo!

GRUMIO:
What comes from your cousin? cousin thou canst go: come to her dear
Than honour on your windows; vials follod.

HENRY PERCY:
Where is the counsell is best take thyself love.

QUEEN ELIZABETH:
Further of Gloucester, you must war him.

KING HENRY VI:
Fear notes of York and Gloucester,
And once again bestremets of the city.

Boatswain:
Would God since you are made a brace?
Yet you must die? to-do him request,
When you should bazed your queen and you are,
That thou didst child aloud a day, as welcome,
As he is as a beggar crance and spear,
And in comes with our country's back and given his faults
And may be hide to sleep.

LEONTES:
What is this is this? how met?
Better Venon, farewell. But wilt thou be of word,
When it was the care for less o'er her kind of best,
But tyealth dowbrike from her dowry shall swear appear,
Are you 

## Ekspor Model Generator

In [45]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')



In [46]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:
Where have you dance?

NORTHUMBERLAND:
Nay, by myselfen'd, for my good friends;
And, by the wills d


# **TUGAS**

**Prosedurnya adalah:**


1. Jalankan Model dan hitung loss dengan tf.GradientTape.

2. Hitung update dan terapkan pada model dengan optimizer

In [47]:
class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
          grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}

> Kode diatas menerapkan train_step method sesuai dengan  Keras' train_step conventions. Ini opsional, tetapi memungkinkan Anda mengubah perilaku langkah pelatihan dan tetap menggunakan keras Model.compile and Model.fit methods.



In [48]:
model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [49]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [50]:
model.fit(dataset, epochs=1)



<keras.src.callbacks.History at 0x7950fcb374c0>



> Atau jika ingin lebih mengetahui dalamnya, kita bisa membuat custom training loop sendiri:



In [51]:
EPOCHS = 10

mean = tf.metrics.Mean()

for epoch in range(EPOCHS):
    start = time.time()

    mean.reset_states()
    for (batch_n, (inp, target)) in enumerate(dataset):
        logs = model.train_step([inp, target])
        mean.update_state(logs['loss'])

        if batch_n % 50 == 0:
            template = f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)

# saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print()
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_"*80)

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 2.1331
Epoch 1 Batch 50 Loss 2.0389
Epoch 1 Batch 100 Loss 1.9578
Epoch 1 Batch 150 Loss 1.8853

Epoch 1 Loss: 1.9853
Time taken for 1 epoch 20.47 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 1.7785
Epoch 2 Batch 50 Loss 1.7594
Epoch 2 Batch 100 Loss 1.7122
Epoch 2 Batch 150 Loss 1.6216

Epoch 2 Loss: 1.7122
Time taken for 1 epoch 11.96 sec
________________________________________________________________________________
Epoch 3 Batch 0 Loss 1.5790
Epoch 3 Batch 50 Loss 1.6096
Epoch 3 Batch 100 Loss 1.5543
Epoch 3 Batch 150 Loss 1.5418

Epoch 3 Loss: 1.5536
Time taken for 1 epoch 11.72 sec
________________________________________________________________________________
Epoch 4 Batch 0 Loss 1.4639
Epoch 4 Batch 50 Loss 1.4340
Epoch 4 Batch 100 Loss 1.4538
Epoch 4 Batch 150 Loss 1.4762

Epoch 4 Loss: 1.4560
Time taken for 1 epoch 11.50 sec
_____________________________________________________________________

3. Jalankan kode diatas dan sebutkan perbedaanya dengan praktikum 2?

Terdapat beberapa perbedaan utama antara praktikum 2 dan pelatihan tugas yang jalankan:

**1. Model Architectures:**

* Praktikum 2 menggunakan model bahasa berdasarkan RNN (Recurrent Neural Network), sementara pelatihan tugas mungkin menggunakan arsitektur yang berbeda, seperti GRU (Gated Recurrent Unit) dalam model .
*Praktikum 2 mungkin lebih sederhana dalam hal arsitektur model karena digunakan sebagai demonstrasi dasar. Model pelatihan tugas tampaknya memiliki lapisan embedding, lapisan GRU, dan lapisan dense.

**2. Epochs:**

* Dalam praktikum 2, menjalankan model selama 20 epoch, sedangkan dalam pelatihan tugas , melatih model selama 10 epoch.

**3. Prosedur Pelatihan:**

* Praktikum 2 mungkin lebih sederhana dalam hal pelatihan dan menggunakan API model.fit() bawaan Keras.
* Dalam pelatihan tugas , mengimplementasikan loop pelatihan khusus dengan menggunakan train_step, yang memberikan lebih banyak kontrol atas proses pelatihan, seperti pengukuran loss, perhitungan gradien, dan optimasi.

**4. Output Hasil Pelatihan:**

* Hasil dari praktikum 2 adalah informasi singkat tentang loss selama pelatihan tanpa informasi lebih lanjut tentang perbedaan antar epoch atau waktu eksekusi.
* Dalam pelatihan tugas , mencantumkan loss untuk setiap batch dan untuk setiap epoch, serta waktu yang diperlukan untuk menjalankan setiap epoch.

**5. Waktu Eksekusi:**

* Waktu eksekusi per epoch mungkin berbeda di antara kedua eksperimen karena praktikum 2 mungkin lebih cepat dalam pengaturan default, sedangkan dalam pelatihan tugas, waktu eksekusi mungkin lebih lama karena menjalankan model selama lebih banyak epoch.
Produk