In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Embedding, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import utils

from google.colab import drive
drive.mount('/content/drive')
text = open('/content/drive/My Drive/shakespeare.txt').read().splitlines()

tokenizer = Tokenizer()
tokenizer.fit_on_texts(text)
vocab_size = len(tokenizer.word_index) + 1

input_sequence = []
for sent in text:
    sequence = tokenizer.texts_to_sequences([sent])[0]
    for j in range(1, len(sequence)):
        n_gram = sequence[:j+1]
        input_sequence.append(n_gram)

max_len = max([len(i) for i in input_sequence])
input_sequence = pad_sequences(input_sequence, maxlen=max_len)

inputs = input_sequence[:, :-1]
labels = input_sequence[:, -1]
labels = utils.to_categorical(labels, num_classes=vocab_size)

model = Sequential()
model.add(Embedding(vocab_size, 100, input_length=max_len-1))
model.add(GRU(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(128))
model.add(Dense(vocab_size, activation='softmax'))
model.summary()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(inputs, labels, epochs=10)

# Function to sample the next word
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds + 1e-10) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

# Generating a new sonnet
new_text = 'For your lovers sake'
words_per_line = 8
total_lines = 14
sonnet = []
current_line = []

for _ in range(words_per_line * total_lines):
    tokens = tokenizer.texts_to_sequences([new_text])
    pad = pad_sequences(tokens, maxlen=max_len-1)
    preds = model.predict(pad)[0]
    next_index = sample(preds, temperature=0.5)
    next_word = tokenizer.index_word.get(next_index, 'unknown')

    current_line.append(next_word)
    new_text += ' ' + next_word

    if len(current_line) >= words_per_line:
        sonnet.append(" ".join(current_line))
        current_line = []

for line in sonnet:
    print(line)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 10, 100)           320100    
                                                                 
 gru (GRU)                   (None, 10, 128)           88320     
                                                                 
 dropout (Dropout)           (None, 10, 128)           0         
                                                                 
 gru_1 (GRU)                 (None, 128)               99072     
                                                                 
 dense (Dense)               (None, 3201)              412929    
                                                                 
Total params: 920421 (3.51 MB)
Trainable params: 920421 (3.