# LSTM

In [2]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Dataset sederhana
text = "Saya suka belajar machine learning di waktu luang saya. Dengan mendalami machine learning, saya bisa memahami cara komputer membuat prediksi dari data. Selain itu, saya juga sering mencoba berbagai proyek seperti klasifikasi gambar, analisis data, dan pengenalan suara. Belajar machine learning tidak hanya menambah pengetahuan, tetapi juga memberikan peluang untuk menciptakan solusi teknologi yang bermanfaat. Dalam perjalanan belajar, saya membaca banyak artikel, mengikuti kursus online, dan berdiskusi dengan komunitas teknologi. Semua ini sangat membantu untuk meningkatkan pemahaman dan keterampilan saya."
words = text.split()
unique_words = sorted(set(words))
word_to_index = {word: i for i, word in enumerate(unique_words, start=1)}
index_to_word = {i: word for word, i in word_to_index.items()}

# Mengonversi teks menjadi angka
sequence = [word_to_index[word] for word in words]
sequences = []
for i in range(1, len(sequence)):
    seq = sequence[:i+1]
    sequences.append(seq)

# Padding sequence
max_sequence_len = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_sequence_len, padding='pre')

# Data dan label
X = sequences[:, :-1]
y = sequences[:, -1]
y = to_categorical(y, num_classes=len(unique_words) + 1)

# Model LSTM
model = Sequential([
    Embedding(input_dim=len(unique_words) + 1, output_dim=10, input_length=max_sequence_len-1),
    LSTM(50),
    Dense(len(unique_words) + 1, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=500, verbose=1)

# Generating Text
def generate_text(seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = [word_to_index[word] for word in seed_text.split() if word in word_to_index]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)
        output_word = index_to_word.get(predicted[0], "")
        seed_text += " " + output_word
    return seed_text

seed = "saya suka"
print(generate_text(seed, 5, max_sequence_len))

Epoch 1/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - accuracy: 0.0180 - loss: 4.2632
Epoch 2/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.0424 - loss: 4.2610
Epoch 3/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.0346 - loss: 4.2590
Epoch 4/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.0283 - loss: 4.2578
Epoch 5/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.0166 - loss: 4.2565
Epoch 6/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0463 - loss: 4.2529
Epoch 7/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0566 - loss: 4.2521
Epoch 8/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0488 - loss: 4.2484
Epoch 9/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [4]:
seed = "saya suka"
print(generate_text(seed, 100, max_sequence_len))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24

# GRU

In [5]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Embedding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Dataset sederhana
text = "Saya suka belajar machine learning di waktu luang saya. Dengan mendalami machine learning, saya bisa memahami cara komputer membuat prediksi dari data. Selain itu, saya juga sering mencoba berbagai proyek seperti klasifikasi gambar, analisis data, dan pengenalan suara. Belajar machine learning tidak hanya menambah pengetahuan, tetapi juga memberikan peluang untuk menciptakan solusi teknologi yang bermanfaat. Dalam perjalanan belajar, saya membaca banyak artikel, mengikuti kursus online, dan berdiskusi dengan komunitas teknologi. Semua ini sangat membantu untuk meningkatkan pemahaman dan keterampilan saya."
words = text.split()
unique_words = sorted(set(words))
word_to_index = {word: i for i, word in enumerate(unique_words, start=1)}
index_to_word = {i: word for word, i in word_to_index.items()}

# Mengonversi teks menjadi angka
sequence = [word_to_index[word] for word in words]
sequences = []
for i in range(1, len(sequence)):
    seq = sequence[:i+1]
    sequences.append(seq)

# Padding sequence
max_sequence_len = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_sequence_len, padding='pre')

# Data dan label
X = sequences[:, :-1]
y = sequences[:, -1]
y = to_categorical(y, num_classes=len(unique_words) + 1)

# Model GRU
model = Sequential([
    Embedding(input_dim=len(unique_words) + 1, output_dim=10, input_length=max_sequence_len-1),
    GRU(50),
    Dense(len(unique_words) + 1, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=500, verbose=1)

# Generating Text
def generate_text(seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = [word_to_index[word] for word in seed_text.split() if word in word_to_index]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)
        output_word = index_to_word.get(predicted[0], "")
        seed_text += " " + output_word
    return seed_text

seed = "saya suka"
print(generate_text(seed, 5, max_sequence_len))

Epoch 1/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - accuracy: 0.0000e+00 - loss: 4.2637
Epoch 2/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.0527 - loss: 4.2610
Epoch 3/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.0824 - loss: 4.2577
Epoch 4/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.0644 - loss: 4.2558
Epoch 5/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.0644 - loss: 4.2536
Epoch 6/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.0746 - loss: 4.2505
Epoch 7/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.0707 - loss: 4.2483
Epoch 8/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - accuracy: 0.1005 - loss: 4.2435
Epoch 9/500
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [6]:
seed = "saya suka"
print(generate_text(seed, 100, max_sequence_len))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25