# Importing libraries

In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, LSTM, Dense, Embedding, Dropout, GRU

from tensorflow.keras.losses import sparse_categorical_crossentropy

# Reading data

In [2]:
path_to_file = './shakespeare.txt'

In [3]:
text = open(path_to_file,'r').read()

In [4]:
len(text)

5445609

In [5]:
print(text[50000:50600])

ld may see my pleasure,
  Sometime all full with feasting on your sight,
  And by and by clean starved for a look,
  Possessing or pursuing no delight
  Save what is had, or must from you be took.
    Thus do I pine and surfeit day by day,
    Or gluttoning on all, or all away.


                     76  
  Why is my verse so barren of new pride?
  So far from variation or quick change?
  Why with the time do I not glance aside
  To new-found methods, and to compounds strange?
  Why write I still all one, ever the same,
  And keep invention in a noted weed,
  That every word doth almost tell m


# Text preprocessing

In [6]:
vocab = sorted(set(text))
len(vocab)

84

In [7]:
char_to_ind = {char:ind for ind,char in enumerate(vocab)}

In [8]:
ind_to_char = np.array(vocab)

In [9]:
encoded_text = [char_to_ind[s] for s in text]

In [10]:
lines = '''
Sometime all full with feasting on your sight,
  And by and by clean starved for a look,
  Possessing or pursuing no delight
  Save what is had, or must from you be took.
    Thus do I pine and surfeit day by day,
    Or gluttoning on all, or all away.
'''

In [11]:
len(lines)

254

In [12]:
seq_len = 250

In [13]:
total_num_seq = len(text)//(seq_len + 1)
total_num_seq

21695

In [14]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)
len(char_dataset)

2025-10-16 07:20:15.510894: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2025-10-16 07:20:15.510949: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-10-16 07:20:15.510956: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.92 GB
2025-10-16 07:20:15.511012: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-10-16 07:20:15.511049: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


5445609

In [15]:
sequences = char_dataset.batch(seq_len+1, drop_remainder=True)
sequences

<_BatchDataset element_spec=TensorSpec(shape=(251,), dtype=tf.int32, name=None)>

In [16]:
def create_seq_targets(seq):
    input_txt = seq[:-1]
    target_txt = seq[1:]
    return input_txt, target_txt

dataset = sequences.map(create_seq_targets)

# Creating LSTM model

In [17]:
batch_size = 128

# Buffer size to shuffle the dataset so it doesn't attempt to shuffle
# the entire sequence in memory. Instead, it maintains a buffer in which it shuffles elements
buffer_size = 8000

dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)

In [18]:
dataset

<_BatchDataset element_spec=(TensorSpec(shape=(128, 250), dtype=tf.int32, name=None), TensorSpec(shape=(128, 250), dtype=tf.int32, name=None))>

In [19]:
def sparse_cat_loss(y_true,y_pred):
  return sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) #Onehotencoding is done so from_logits = True

In [20]:
vocab_size = len(vocab)

# The embedding dimension
embed_dim = 84

# Number of RNN units
rnn_neurons = 1026

In [21]:
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
    model = Sequential()
    model.add(InputLayer(batch_input_shape=(batch_size, None)))   # define static batch
    model.add(Embedding(vocab_size, embed_dim))
    model.add(LSTM(rnn_neurons, return_sequences=True, stateful=True,
                   recurrent_initializer='glorot_uniform', dropout=0.4))
    model.add(LSTM(500, return_sequences=True, stateful=True,
                   recurrent_initializer='glorot_uniform', dropout=0.4))
    model.add(Dense(vocab_size))
    model.compile(optimizer='adam', loss=sparse_cat_loss)
    return model

In [22]:
model = create_model(
  vocab_size = vocab_size,
  embed_dim=embed_dim,
  rnn_neurons=rnn_neurons,
  batch_size=batch_size)

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (128, None, 84)           7056      
                                                                 
 lstm (LSTM)                 (128, None, 1026)         4559544   
                                                                 
 lstm_1 (LSTM)               (128, None, 500)          3054000   
                                                                 
 dense (Dense)               (128, None, 84)           42084     
                                                                 
Total params: 7662684 (29.23 MB)
Trainable params: 7662684 (29.23 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## Example predictions

In [24]:
for input_example_batch, target_example_batch in dataset.take(1):

  # Predict off some random batch
  example_batch_predictions = model(input_example_batch)

  # Display the dimensions of the predictions
  print(example_batch_predictions.shape, " <=== (batch_size, sequence_length, vocab_size)")


(128, 250, 84)  <=== (batch_size, sequence_length, vocab_size)


# Example batch predictions

In [25]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices

<tf.Tensor: shape=(250, 1), dtype=int64, numpy=
array([[29],
       [12],
       [ 7],
       [59],
       [ 8],
       [41],
       [33],
       [82],
       [75],
       [29],
       [31],
       [25],
       [22],
       [12],
       [76],
       [ 0],
       [34],
       [45],
       [18],
       [81],
       [31],
       [ 2],
       [14],
       [76],
       [48],
       [16],
       [67],
       [ 1],
       [38],
       [66],
       [ 7],
       [51],
       [27],
       [16],
       [15],
       [75],
       [39],
       [22],
       [70],
       [22],
       [39],
       [34],
       [82],
       [59],
       [16],
       [ 2],
       [81],
       [39],
       [17],
       [33],
       [44],
       [54],
       [34],
       [52],
       [40],
       [43],
       [47],
       [ 9],
       [28],
       [22],
       [60],
       [13],
       [36],
       [10],
       [74],
       [44],
       [46],
       [68],
       [24],
       [23],
       [52],
       [60],
       [45],
   

In [26]:
# Reformat to not be a lists of lists
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()
sampled_indices

array([29, 12,  7, 59,  8, 41, 33, 82, 75, 29, 31, 25, 22, 12, 76,  0, 34,
       45, 18, 81, 31,  2, 14, 76, 48, 16, 67,  1, 38, 66,  7, 51, 27, 16,
       15, 75, 39, 22, 70, 22, 39, 34, 82, 59, 16,  2, 81, 39, 17, 33, 44,
       54, 34, 52, 40, 43, 47,  9, 28, 22, 60, 13, 36, 10, 74, 44, 46, 68,
       24, 23, 52, 60, 45, 51, 35, 52, 44, 42, 51, 77, 23, 56, 31, 22, 65,
       20, 63, 38,  7, 21, 53, 13, 27, 42, 57, 78, 37, 12,  5, 23, 33, 40,
       60, 66, 55, 77, 54, 59, 64, 67, 35, 45, 70, 34, 41, 17, 10, 69, 17,
       11,  2, 16, 65, 74, 81, 13, 59, 36, 79, 53, 82, 47,  3, 34,  6, 52,
       13, 55, 33, 41, 79, 23, 32, 25, 48, 52,  8, 61, 32,  4, 12, 76, 49,
        6, 26, 20, 59, 66, 35,  6, 35, 49, 57, 50, 35, 20, 58, 42, 59,  0,
       48,  2, 46, 11,  6, 10, 82, 49, 59,  4, 19, 71, 30, 73, 14, 29, 27,
       19, 15, 12, 17, 14, 56, 19, 77, 73, 51, 50, 27, 71, 10, 14, 68, 63,
        6, 74,  7, 21, 76, 72, 54, 67, 57, 49, 59,  2, 35, 45,  4, 67, 54,
       73, 56, 51, 51, 15

In [27]:
print("Given the input seq: \n")
print("".join(ind_to_char[input_example_batch[0]]))
print('\n')
print("Next Char Predictions: \n")
print("".join(ind_to_char[sampled_indices ]))

Given the input seq: 

hy sting is not so sharp
              As friend rememb'red not.
    Heigh-ho! sing, &c.

  DUKE SENIOR. If that you were the good Sir Rowland's son,
    As you have whisper'd faithfully you were,
    And as mine eye doth his effigies witness
    Mos


Next Char Predictions: 

D1)d,PH|tDF?;1u
IT7zF!3uW5l Mk)ZB54tN;o;NI|d5!zN6HS_I[ORV-C;e2K.sSUm><[eTZJ[SQZv<aF;j9hM):]2BQbwL1'<HOek`v_dilJToIP6.n60!5jsz2dKx]|V"I([2`HPx<G?W[,fG&1uX(A9dkJ(JXbYJ9cQd
W!U0(.|Xd&8pEr3DB84163a8vrZYBp.3mh(s):uq_lbXd!JT&l_raZZ4kkF,?G|`vT8WQkuz 6R(t"fH


# Training the model


In [29]:
model.fit(dataset,epochs=40)

Epoch 1/40


KeyboardInterrupt: 

# Saving model as .h5

In [30]:
model.save('shakespeare_gen1.h5') 

In [31]:
from tensorflow.keras.models import load_model

In [32]:
model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size=1)

model.load_weights('shakespeare_gen1.h5')

model.build(tf.TensorShape([1, None]))

#Generating text

In [33]:
def generate_text(model, start_seed,gen_size=100,temp=1.0):
  '''
  model: Trained Model to Generate Text
  start_seed: Intial Seed text in string form
  gen_size: Number of characters to generate

  Basic idea behind this function is to take in some seed text, format it so
  that it is in the correct shape for our network, then loop the sequence as
  we keep adding our own predicted characters. Similar to our work in the RNN
  time series problems.
  '''

  # Number of characters to generate
  num_generate = gen_size

  # Vecotrizing starting seed text
  input_eval = [char_to_ind[s] for s in start_seed]

  # Expand to match batch format shape
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty list to hold resulting generated text
  text_generated = []

  # Temperature effects randomness in our resulting text
  # The term is derived from entropy/thermodynamics.
  # The temperature is used to effect probability of next characters.
  # Higher probability == lesss surprising/ more expected
  # Lower temperature == more surprising / less expected
 
  temperature = temp

  # Here batch size == 1
  model.reset_states()

  for i in range(num_generate):

      # Generate Predictions
      predictions = model(input_eval)

      # Remove the batch shape dimension
      predictions = tf.squeeze(predictions, 0)

      # Use a cateogircal disitribution to select the next character
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # Pass the predicted charracter for the next input
      input_eval = tf.expand_dims([predicted_id], 0)

      # Transform back to character letter
      text_generated.append(ind_to_char[predicted_id])

  return (start_seed + ''.join(text_generated))

In [34]:
print(generate_text(model,"JULIET ",gen_size=800))

JULIET AND             Exit. Soldiers.

          Enter PROTEUS, VALENTINE, and SHYLOCK

               EO-enter CHARMIAN, IACHIO, AUMERLE, CHILD and ATTENDANTS

  CLARENCE. O, let me sing your Grace!
    What, art thou to our conscience?
  MENELAUS. If I can rush so well,
    Impromish your equisore.
  LEONTES. Come, come, pardon; let 't it down.
  NESTOR. Your power great Priam shall.                  [Drum forth]
  IACHIMO.                    Thank you so hung?  
  AARON. How would you then depart at from your Grace?
  GLOUCESTER. How bashful and Troy. O Caesar, I dare hear
    Though given to sport, cross-gill'd and bloody wearth!
  EDWARD. Even here unsadled Warwick give you jot;
             The combin of the world able how
                  As false against the fool.
                  Ho! 
