In [8]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import GRU, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical



In [13]:
# defining character names

character_names = [
    'Harry Potter', 'Hermione Granger', 'Ron Weasley', 'Gandalf', 'Frodo Baggins',
    'Aragorn', 'Bilbo Baggins', 'Luke Skywalker', 'Darth Vader', 'Princess Leia',
    'Jon Snow', 'Arya Stark', 'Tyrion Lannister', 'Daenerys Targaryen', 'Sherlock Holmes',
    'John Watson', 'Hannibal Lecter', 'Clarice Starling', 'Atticus Finch', 'Scout Finch'
]

# create a vocabulary based on the sample names
vocab = sorted(set(''.join(character_names)))
char_to_idx = {char : idx for idx, char in enumerate(vocab)}
idx_to_char = {idx : char for idx, char in char_to_idx.items()}

# convert the names to sequences of indices
sequences = [[char_to_idx[char] for char in name] for name in character_names]

# pad the sequences to the same length
max_name_length = max(len(seq) for seq in sequences)

# print(max_name_length)
padded_sequences = pad_sequences(sequences, maxlen=max_name_length, padding='post')

# create the input and output data
X = padded_sequences[:, :-1]
y = padded_sequences[:, 1:]

# check shape of input sequence
print(X.shape)
# check shape of output sequence
print(y.shape)

# convert our target sequence to one-hot encoded vectors
y_one_hot = to_categorical(y, num_classes=len(vocab))

# defining character for our gru model
vocab_size = len(vocab)
embedding_dim = 32

model = tf.keras.Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim,),
    GRU(128, return_sequences=True),
    Dense(vocab_size, activation='softmax')
])

# compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# train the model
model.fit(X, y_one_hot, epochs=50, verbose=1)


(20, 17)
(20, 17)
Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0176 - loss: 3.5839
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.3706 - loss: 3.5654
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.3794 - loss: 3.5460
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.3765 - loss: 3.5246
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.3765 - loss: 3.5001
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.3765 - loss: 3.4715
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.3765 - loss: 3.4378
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.3765 - loss: 3.3977
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x25bdca16960>

In [20]:
seed = 'a'
seed_idx = [char_to_idx[char] for char in seed]

# map our seed charaters to indexes
seed_idx = [char_to_idx[char] for char in seed]

for _ in range(3):
    temperature = .5
    predicted_probs = model.predict(np.array(seed_idx).reshape(1, -1))[0]
    predicted_probs = predicted_probs[-1]
    
    predicted_probs = np.power(predicted_probs, 1/temperature)
    predicted_probs = predicted_probs / np.sum(predicted_probs)
    
    next_char = np.random.choice(list(char_to_idx.keys()), p=predicted_probs)
    
    seed += next_char
    seed_idx.append(char_to_idx[next_char])

print(seed)

    

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
aifS
