In [6]:
import tensorflow as tf
import numpy as np
import os
import time

path_to_file = tf.keras.utils.get_file(
    'shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

text = open('./text.txt', 'rb').read().decode(encoding='utf-8')

vocab = sorted(set(text))

char2idx = {u: i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length+1, drop_remainder=True)


def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text


dataset = sequences.map(split_input_target)

BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024


def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                  batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model


model = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape,
          "# (batch_size, sequence_length, vocab_size)")

model.summary()

sampled_indices = tf.random.categorical(
    example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()


def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "checkpoints")
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices])))

example_batch_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape,
      " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

model.compile(optimizer='adam', loss=loss)


checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

EPOCHS = 1
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

tf.train.latest_checkpoint(checkpoint_dir)

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

model.summary()


def generate_text(model, start_string, temperature = 1.0):
    num_generate = 1000

    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predictions = predictions / temperature
        predicted_id = tf.random.categorical(
            predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (64, None, 256)           17408     
                                                                 
 gru_2 (GRU)                 (64, None, 1024)          3938304   
                                                                 
 dense_2 (Dense)             (64, None, 68)            69700     
                                                                 
Total params: 4,025,412
Trainable params: 4,025,412
Non-trainable params: 0
_________________________________________________________________


ValueError: Received incompatible tensor with shape (78, 256) when attempting to restore variable with shape (68, 256) and name layer_with_weights-0/embeddings/.ATTRIBUTES/VARIABLE_VALUE.

In [5]:
print(generate_text(model, start_string=u"banu Chrisnadi, [1/5/23 7:40 AM]\nmas tolong cekkan aplikasi mukab\n\n"))

banu Chrisnadi, [1/5/23 7:40 AM]
mas tolong cekkan aplikasi mukab

Mbak Raqicine onek raban mastassedan Prtrek HTTPPu dKik antukas soto > taa aDo yang ckpe  rhiripas

Nungki Edi Rahmadi, [1/6/23 8:17 AM]
mas para b gut lange forense ek espud , peta msso yaan kara luyannyackaban da tar ante

Nungki Edi Rya Rahmadi, [1/6/23 11:19 PM]
ie

Mbak Hadwa, [1/9/23 10:12 AM]
kengmape Tb

Mus AKPuk masinya mri beruri

Om Yayak, [1/6/23 8:33 PM]
@yRyay4 > itansa da onte data selu uku da ser heris isi mas heri pikukar.kukapkuk regii

Nungki Edi hat jsi dakanin ani ya nias dlre sjongki Petmhsi > httla jsok bervar.ko mpungkinker.js

puk SPM/ ketu/s SSKM]
Og Kerved:

Mbak Rahmadi, [1/5/23 8:46 PM]
@y4d_[1/5/23 10:20 AM]
ett:

Adhitya Putri ntyatannfo War.

Darwanyo Edi Rahmadi, [1/5/23 7:46 PM]
Oym mas Arbi, [1/5/23 7:38 PM]


Raswanto, [1/9/23 8:41 AM]
ya

Adhituk lau ya?

1war Rahma, [1/5/23 9:10 AM]
iki dalu info ]

Om Yaakinya mas legi mas? kasintar idi, A1bilumah Rahmadi tarah batap.ina nikunan d