In [None]:
import sys
import requests
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Activation, Dropout, LSTM

# Mount Google Drive for saving state at checkpoints

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
cd '/content/drive/My Drive/TextGenerationDemo'

/content/drive/My Drive/TextGenerationDemo


# Load a large corpus of text

In [None]:
r = requests.get("https://cs.stanford.edu/people/karpathy/char-rnn/warpeace_input.txt")

In [None]:
raw_txt = r.text

In [None]:
chars = sorted(list(set(raw_txt)))
print("Corpus: {}".format(len(raw_txt)))
print("Categories: {}".format(len(chars)))

Corpus: 3258246
Categories: 87


# Create mappings

In [None]:
ix_to_char = {ix:char for ix, char in enumerate(chars)}
char_to_ix = {char:ix for ix, char in enumerate(chars)}

# Create training data

In [None]:
maxlen = 10
x_data = []
y_data = []
for i in range(0, len(raw_txt) - maxlen, 1):
    in_seq  = raw_txt[i: i + maxlen]
    out_seq = raw_txt[i + maxlen]
    x_data.append([char_to_ix[char] for char in in_seq])
    y_data.append([char_to_ix[out_seq]])
nb_chars = len(x_data)
print('Number of sequences:', int(len(x_data)/maxlen))

Number of sequences: 325823


In [None]:
# scale and transform data
x = np.reshape(x_data , (nb_chars , maxlen , 1))
n_vocab = len(chars)
x = x/float(n_vocab)

In [None]:
x.shape

(3258236, 10, 1)

# Treat categorical columns

In [None]:
y = tf.keras.utils.to_categorical(y_data)

In [None]:
print("The shape of x_training data : " ,x.shape)
print("The shape of y_training data : " ,y.shape)

The shape of x_training data :  (3258236, 10, 1)
The shape of y_training data :  (3258236, 86)


# Define Model

In [None]:
Model = tf.keras.Sequential([
                             tf.keras.layers.LSTM(800 , input_shape = (len(x[1]) , 1) , 
                                                  return_sequences = True),
                             tf.keras.layers.Dropout(0.2),
                             tf.keras.layers.LSTM(800, return_sequences = True),
                             tf.keras.layers.Dropout(0.2),
                             tf.keras.layers.LSTM(800),
                             tf.keras.layers.Dropout(0.2),
                             tf.keras.layers.Dense(len(y[1]), activation = 'softmax')
])

In [None]:
Model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam')

# Creating a checkpoint

In [None]:
filepath = "model_weights_saved.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min')
model_callbacks = [checkpoint]

# Defining a custom callback

In [None]:
epoch_number = 0
filename = 'predictions.txt'
file = open(filename , 'w')
file.truncate()
file.close()
class CustomCallback(tf.keras.callbacks.Callback):
    
    def on_epoch_end(self , epoch , logs = None):
        global epoch_number
        epoch_number = epoch_number + 1

        filename = 'predictions.txt'
        file = open(filename , 'a')
        seed = "looking fo"
      
        pattern = []
        for i in seed:
            value = char_to_ix[i]
            pattern.append(value)
        file.seek(0)
        file.write("\n\n Epoch number : {}\n\n".format(epoch_number))
        for i in range(100):      
            X = np.reshape(pattern , (1, len(pattern) , 1))
            X = X/float(n_vocab)
            int_prediction = Model.predict(X , verbose = 0)
            index = np.argmax(int_prediction)
            prediction = ix_to_char[index]
            sys.stdout.write(prediction)
            file.write(prediction)
            pattern.append(index)
            pattern = pattern[1:len(pattern)]
        file.close()

# Training

In [None]:
Model.fit(x, y , batch_size = 2000, epochs = 10 , callbacks = [CustomCallback() , model_callbacks])

### For rerun from a known checkpoint use following code

In [None]:
try:
    Model.load_weights(filepath)
except Exception as error:
    print("Error loading in model : {}".format(error))

In [None]:
Model.fit(x, y , batch_size = 2000, epochs = 25 , callbacks = [CustomCallback() , model_callbacks])