In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import tensorflow as tf
import numpy as np
from tensorflow._api.v2 import data

# remember to change the file in which the training is stored in '

text = open("../input/MiltonPoemLines/otherPoemStorage.txt", 'rb').read().decode(
    encoding='UTF-8', errors='ignore')

vocab = sorted(set(text))

print(vocab)

char2idx = {u: i for i, u in enumerate(vocab)}

idx2char = np.array(vocab)

textAsInt = np.array([char2idx[char] for char in text])

seqLength = 100
examplesPerEpoch = len(text)//(seqLength+1)

charDataset = tf.data.Dataset.from_tensor_slices(textAsInt)

sequences = charDataset.batch(seqLength+1, drop_remainder=True)


def splitInputTarget(chunk):
    inputText = chunk[:-1]
    targetText = chunk[1:]
    return inputText, targetText


dataset = sequences.map(splitInputTarget)

BATCH_SIZE = 128
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

vocabSize = len(vocab)
embeddingDim = 264
rnnUnits = 1024


def buildModel(vocabSize, embeddingDim, rnnUnits, batchSize):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(
            vocabSize, embeddingDim, batch_input_shape=[batchSize, None]),
        tf.keras.layers.GRU(rnnUnits, return_sequences=True,
                            stateful=True, recurrent_initializer="glorot_uniform"),
        tf.keras.layers.LSTM(rnnUnits, return_sequences=True,
                            stateful=True, recurrent_initializer="glorot_uniform"),
        tf.keras.layers.LSTM(rnnUnits, return_sequences=True,
                            stateful=True, recurrent_initializer="glorot_uniform"),
        tf.keras.layers.GRU(rnnUnits, return_sequences=True,
                            stateful=True, recurrent_initializer="glorot_uniform"),
        tf.keras.layers.Dense(vocabSize)
    ])
    return model


model = buildModel(vocabSize=len(vocab), embeddingDim=embeddingDim,
                   rnnUnits=rnnUnits, batchSize=BATCH_SIZE)


def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])

checkpointDir = "./kaggle/working/training_checkpoint_miltonParametersChanged"
checkpointPrefix = os.path.join(checkpointDir, "chkpt_{epoch}")
checkpointCallback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpointPrefix, save_weights_only=True, monitor='val_accuracy')

EPOCHS = 50

history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpointCallback])

In [None]:
model = buildModel(vocabSize, embeddingDim, rnnUnits, batchSize=1)

model.load_weights(tf.train.latest_checkpoint(checkpointDir))

model.build(tf.TensorShape([1, None]))

model.summary()


def generateText(model, startString):
    # of charectars generated
    numGenerate = 20000
    # convert start string into idx
    inputRep = [char2idx[s] for s in startString]
    inputRep = tf.expand_dims(inputRep, 0)

    txtGenerated = []
    # Handles randomness through a scale factor (smaller means predictable(increase for more randomness))
    tempreture = 1.0
    model.reset_states()
    for i in range(numGenerate):
        predictions = model(inputRep)
        predictions = tf.squeeze(predictions, 0)
        predictions = predictions/tempreture
        predictedID = tf.random.categorical(
            predictions, num_samples=1)[-1, 0].numpy()

        inputRep = tf.expand_dims([predictedID], 0)
        txtGenerated.append(idx2char[predictedID])
    return (startString + "".join(txtGenerated))


print("PASSED")

word = ["the", "Paradise", "Solitude", "Death", "The", "Love","Family", "Young" ]
fileOut = open("./kaggle/working/MiltonGen1.txt", "w")
time = 6
number = 0
while time != 0:
    number  += 1
    seedword = word[number]
    fileOut.write(generateText(model, startString=seedword))
    print(time, " / ", "6", " - ", seedword)
    time = time-1