<a href="https://colab.research.google.com/github/Marcusreu1/play-generator/blob/main/RNN_play_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#import libraries

In [1]:
%tensorflow_version 2.x

from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


#dataset

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

##load your own dataset from your files

In [3]:
#from google.colab import files
#path_to_file=list(files.upload().keys())[0]

#read files

In [4]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


#encoding

In [5]:
vocab= sorted(set(text))

#creating a mapping from unique characters to indices
char2idx={u:i for i, u in enumerate(vocab)}
idx2char= np.array(vocab)


def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int= text_to_int(text)

In [6]:
#see text or encoding
print("Text: ", text[:13])
print("Encoded: ", text_to_int(text[:13]))

Text:  First Citizen
Encoded:  [18 47 56 57 58  1 15 47 58 47 64 43 52]


#encoding to text

In [7]:
def int_to_text(ints):
  try:
    ints=ints.numpy()
  except:
    pass
  return '' .join(idx2char[ints])


print(int_to_text(text_as_int[:13]))

First Citizen


#creating training examples

In [8]:
seq_length=100
examples_per_epoch= len(text)//(seq_length+1)

char_dataset=tf.data.Dataset.from_tensor_slices(text_as_int)

In [9]:
sequences=char_dataset.batch(seq_length+1,drop_remainder=True)

In [10]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text


dataset=sequences.map(split_input_target)

In [11]:
for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("input")
  print(int_to_text(x))
  print("Output")
  print(int_to_text(y))



EXAMPLE

input
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You
Output
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE

input
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 
Output
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


In [12]:
BATCH_SIZE=64
VOCAB_SIZE=len(vocab)
EMBEDDING_DIM= 256
RNN_UNITS=1024

BUFFER_SIZE=10000
data= dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

#building the model

In [13]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size,None]),
    tf.keras.layers.LSTM(rnn_units,
                         return_sequences=True,
                         stateful=True,
                         recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
])
  return model


model=build_model(VOCAB_SIZE,EMBEDDING_DIM,RNN_UNITS,BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           16640     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 65)            66625     
                                                                 
Total params: 5330241 (20.33 MB)
Trainable params: 5330241 (20.33 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


#creating loss function

In [14]:
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "#(batch_size, sequence_length,vocab_size)")

(64, 100, 65) #(batch_size, sequence_length,vocab_size)


In [15]:
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[ 1.70593872e-03  5.46026696e-03  2.92965258e-03 ...  4.09210566e-03
   -1.31352688e-03  6.82836398e-03]
  [-1.96181657e-03  4.23642201e-03  1.31569069e-03 ...  1.73984165e-03
   -2.33781477e-03  1.67514686e-03]
  [-5.54576516e-04  2.71998253e-03  4.24641464e-03 ...  1.50886434e-03
   -4.46689408e-03 -5.52857993e-04]
  ...
  [ 4.16014250e-03 -3.59564344e-03  9.64214560e-04 ...  4.56542335e-03
   -8.92869313e-04  2.79668346e-03]
  [ 3.97024164e-03  1.73156848e-03  5.82224084e-03 ...  7.69527582e-03
   -8.74813937e-04  8.02928861e-03]
  [ 3.71463574e-03 -2.39651417e-04  8.04606639e-03 ...  6.81372080e-03
   -1.77982205e-03  2.73105921e-03]]

 [[-1.83230988e-03  2.04336178e-03  1.40121754e-03 ...  8.14289227e-03
   -5.01889782e-03  1.50499516e-03]
  [ 4.39304509e-04  7.06568453e-03  3.04970890e-03 ...  1.11275073e-02
   -5.62721677e-03  7.67432060e-03]
  [-1.06390961e-03  9.73212998e-03 -1.71539024e-03 ...  1.23288995e-02
   -4.59532905e-03  5.51881688e-03]
  ...
  [-5.152

In [16]:
pred=example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[ 0.00170594  0.00546027  0.00292965 ...  0.00409211 -0.00131353
   0.00682836]
 [-0.00196182  0.00423642  0.00131569 ...  0.00173984 -0.00233781
   0.00167515]
 [-0.00055458  0.00271998  0.00424641 ...  0.00150886 -0.00446689
  -0.00055286]
 ...
 [ 0.00416014 -0.00359564  0.00096421 ...  0.00456542 -0.00089287
   0.00279668]
 [ 0.00397024  0.00173157  0.00582224 ...  0.00769528 -0.00087481
   0.00802929]
 [ 0.00371464 -0.00023965  0.00804607 ...  0.00681372 -0.00177982
   0.00273106]], shape=(100, 65), dtype=float32)


In [17]:
time_pred= pred[0]
print(len(time_pred))
print(time_pred)

65
tf.Tensor(
[ 1.70593872e-03  5.46026696e-03  2.92965258e-03  5.53701632e-03
  3.52002098e-04  7.40212505e-04 -2.61171139e-03  1.52184613e-04
  5.21520991e-03  2.75546825e-03 -1.81522663e-03 -3.50444857e-03
 -6.65778178e-04 -1.89177436e-03  1.21774399e-04 -3.23639438e-03
 -1.14432565e-04 -3.76685406e-03  7.34490668e-03  4.56825335e-04
  1.23989757e-03  5.12635894e-03 -2.03279499e-03 -4.87690512e-03
  9.01867170e-04 -2.31072586e-03 -3.49775492e-03  3.62148602e-03
  4.29686578e-03 -1.01282005e-03  1.96049409e-03 -1.61430333e-04
  7.56166875e-03 -3.59967491e-03 -9.05461609e-03  4.97890869e-03
 -4.73952387e-03  1.28577230e-05 -3.10298847e-03 -2.48091645e-03
 -3.36558325e-04 -4.44768276e-03  1.32296328e-03  2.99261999e-03
  4.66113118e-03 -4.61108238e-03  4.19483427e-03 -3.41579271e-03
  7.59466668e-04  1.69871654e-03 -5.20434813e-04 -3.82753205e-04
 -2.79553467e-03 -6.21518958e-03  2.48010550e-03  6.25119172e-03
 -1.75662478e-03  5.05508156e-03 -3.93605558e-04  5.70973940e-03
 -4.5588128

In [18]:
sampled_indices= tf.random.categorical(pred, num_samples=1)

sampled_indices = np.reshape(sampled_indices, (1, -1))[0]

predicted_chars= int_to_text(sampled_indices)

predicted_chars

"U'wYOb3bqokq'UFb\nbdO Px'YqFQi.mmBq,!T-AwbbAcxZn;.U'wTm\nJZDSesFrfLO.AzvVYkQ&xR;WsHZ- 3qFBaoFqnlhN'J,F"

In [19]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

#compilar modelo

In [20]:
model.compile(optimizer='adam',loss=loss)

#creating checkpoints

In [21]:
checkpoint_dir='./training_checkpoints'

checkpoint_prefix= os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

#Training

In [22]:
history= model.fit(data, epochs= 10, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#loading the model

In [23]:
model= build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

#generar texto

In [24]:
def generate_text(model, start_string):
  num_generate=400

  input_eval=[char2idx[s] for s in start_string]
  input_eval=tf.expand_dims(input_eval, 0)

  text_generated= []

  temperature=1.2

  model.reset_states()
  for i in range(num_generate):
    predictions= model(input_eval)
    predictions=tf.squeeze(predictions,0)
    predictions= predictions/temperature
    predicted_id= tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
    input_eval= tf.expand_dims([predicted_id], 0)
    text_generated.append(idx2char[predicted_id])
  return(start_string + ''.join(text_generated))

In [25]:
inp= input("type a starting string ")
print(generate_text(model, inp))

type a starting string hello
hellow cry him then
: they do your butting Baptain'd. Petry may thy ought Duke thy cry.
Seek thou thy Abothat adiled, my ling.

GLOURES:
Yes, by yourself to do.
Some scarkewed Sast sound to privolds.

DUCHESS Onjeed's death. This, I'll cupt be ic:
Accurdental deserved inhy blood were
and than the entert, take your wrong; if it is exy
grimins, Lanshouslard Ghut here reblace my
bandal'st, drupj hence age
