In [27]:
import tensorflow as tf 
import numpy as np

#Reproducibility 

tf.random.set_seed(777)
# Define characters from the quote without duplicates
idx2char = sorted(set(
    "If you want to build a ship, don’t drum up people to collect wood and don’t assign them tasks and work, "
    "but rather teach them to long for the endless immensity of the sea."
))

# Create a mapping from character to index
char2idx = {ch: idx for idx, ch in enumerate(idx2char)}

# Quote
quote = "If you want to build a ship, don’t drum up people to collect wood and don’t assign them tasks and work, but rather teach them to long for the endless immensity of the sea."

# Prepare x_data and y_data based on the quote
x_data = []
y_data = []

# Create training data by sliding window
sequence_length = len(quote)-1  # Length of the input sequence
for i in range(len(quote) - sequence_length):
    x_seq = quote[i:i + sequence_length]
    y_seq = quote[i + 1:i + sequence_length + 1]
    
    x_data.append([char2idx[ch] for ch in x_seq])  # Convert characters to indices
    y_data.append([char2idx[ch] for ch in y_seq])  # Shifted sequence for targets


In [28]:



# Convert to numpy arrays
x_data = np.array(x_data)
y_data = np.array(y_data)

num_classes = len(idx2char)  # Output unique chars based on idx2char
input_dim = num_classes  # One-hot size
hidden_size = 128  # LSTM output size
batch_size = 1   # One sentence
learning_rate = 0.1

# Use tf.one_hot to create one-hot encoded input
x_one_hot = tf.one_hot(x_data, depth=num_classes)


In [29]:
#Build the LSTM Model
class LSTMModel(tf.keras.Model):
    def __init__(self):
        super(LSTMModel,self).__init__()
        self.lstm=tf.keras.layers.LSTM(units=hidden_size,return_sequences=True)
        self.fc =tf.keras.layers.Dense(units=num_classes)
        
    def call(self,x):
        x=self.lstm(x)
        x=self.fc(x)
        x=tf.nn.softmax(x)
        return x
    

In [30]:
# Prepare data
X = np.array(x_one_hot, dtype=np.float32)
Y = np.array(y_data, dtype=np.int32)

In [31]:

# Create model
model = LSTMModel()

# Loss and optimizer
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)


In [32]:
# Training step
def train_step(model, inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs)
        loss = loss_fn(targets, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss, predictions

# Training loop
for epoch in range(100):
    loss, predictions = train_step(model, X, Y)
    predicted = tf.argmax(predictions, axis=2).numpy()

    print(f"Epoch {epoch}, Loss: {loss.numpy()}")
    print(f"Prediction: {predicted}, True Y: {y_data}")

    # Convert prediction to characters
    result_str = [idx2char[c] for c in predicted[0]]
    print("\tPrediction str: ", ''.join(result_str))    

Epoch 0, Loss: 3.2545697689056396
Prediction: [[13  4  1 21  4  4 23 17 17 16 16 11 11 11 11 11 17  7 16  7 23 23 23 23
  23 23 14 21 21 14 14 16  6  6  6 14  6  6 17 17 17  6 23 14 11 11 11  4
   4  4 11 11 11 11 11 23 23  0  4 22 11 13 11 11 13 11 11 16 14 14 14  4
  16  6 16  1 11 16 16 16  3 16 16 16 16 16 17 17 17 11 16 11 11 11 11 16
  18 16 18 11 23  5  5 17  7  7 11 11 17 17 17 17 17 17 17 17  5 11  5 25
  17 11 25 11 22 17 11 11 11  0 11 16 16  1  1 17 17 17 17 17 17 17 17 16
  14  4  4 16 16  0 16 22 22 22 16 16 16 16 21 21 11 21  1 11 21 17 17 16
  11 11]], True Y: [[ 9  0 24 17 22  0 23  4 16 21  0 21 17  0  5 22 12 14  7  0  4  0 20 11
  12 18  1  0  7 17 16 25 21  0  7 19 22 15  0 22 18  0 18  8 17 18 14  8
   0 21 17  0  6 17 14 14  8  6 21  0 23 17 17  7  0  4 16  7  0  7 17 16
  25 21  0  4 20 20 12 10 16  0 21 11  8 15  0 21  4 20 13 20  0  4 16  7
   0 23 17 19 13  1  0  5 22 21  0 19  4 21 11  8 19  0 21  8  4  6 11  0
  21 11  8 15  0 21 17  0 14 17 16 10  0  9 17 