In [None]:
!pip install pypdf2



In [None]:
import numpy as np

In [None]:
# we'll be using softmax to retrieve the probability distribution
# of potential characters.
# In order to have control entropy in the probability distribution
# we'll introduce a parameter called softmax temperature

# this parameter will be used for reweighting the 
# probability distribution while changing the total entropy of the
# system

In [None]:
# now we'll implement character-level LSTM text generation model
# by using nietzsche's writings

# start by downloading the corpus
import keras

path = keras.utils.get_file(
    'nietzsche.txt',
    origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt'
)
# get the text and convert it to lowercase
text = open(path).read().lower()
print('Corpus length: %d'%len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893


In [None]:
# next, we'll extract partially overlapping sequences of length
# maxlen, one-hot-encode them in a 3D ndarray x of shape 
# (sequences, maxlen, unique_characters)
maxlen = 30
# we'll sample a new seqeunces every three characters
step = 2
sentences = []
next_chars = []

# sample the data
for i in range(0, len(text) - maxlen, step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# list of unique characters in the corpus
chars = sorted(list(set(text)))
print('Number of unique characters : %d'%len(chars))
# create a dictionary that maps unique characters to their indexes
char_indices = dict((char, chars.index(char)) for char in chars)

# one-hot-encode the sampled data
print('Starting vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype = np.bool)
Y = np.zeros((len(sentences), len(chars)), dtype = np.bool)

for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    X[i, t, char_indices[char]] = 1
  Y[i, char_indices[next_chars[i]]] = 1
print('Vectorization finished!')

Number of sequences: 300432
Number of unique characters : 57
Starting vectorization...
Vectorization finished!


In [None]:
from keras import layers, models, optimizers

i = layers.Input((maxlen, len(chars)))
x = layers.LSTM(128)(i)
o = layers.Dense(len(chars), activation = 'softmax')(x)

model = models.Model(i,o)

optimizer = optimizers.RMSprop(0.01)

model.compile(optimizer = optimizer,
              loss = 'categorical_crossentropy')

In [None]:
def sample(preds, temperature = 1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)

  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

In [None]:
import random
import sys


metrics = []
for epoch in range(1, 60):
  print('\n epoch ', epoch)
  r = model.fit(X, Y, batch_size = 128, epochs = 1) # train for 1 iteration
  metrics.append(r)
  start_index = random.randint(0, len(text) - maxlen - 1)
  generated_text = text[start_index: start_index + maxlen]
  print('--- Generating with seed: "' + generated_text + '"')

  for temperature in [.5,.6, .7, .8, .9, 1.0]:
    print('\n ----- temperature: %.1f'%temperature)
    sys.stdout.write(generated_text)
    # generate 450 chars starting from seed text
    for i in range(450):
      sampled = np.zeros((1, maxlen, len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1
      preds = model.predict(sampled, verbose = 0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]

      generated_text += next_char
      generated_text = generated_text[1:]
      sys.stdout.write(next_char)
      if i%150==0: sys.stdout.write('\n')


 epoch  1
--- Generating with seed: "ophers from young
naturalists "

 ----- temperature: 0.5
ophers from young
naturalists i
s as a good of the contemption, and all the more of the estive which part of the the were the all nempteding the superious the all are itself which we
 contrated and as it is and madile of the senses to has with to
proved it is for the worght and and men of some has as the
sours and upon the maded an
d art the will to pained the does the wirtles and sour good
and it of the too the freedem and with the consideration, what is all the instinct, the t
 ----- temperature: 0.6
hat is all the instinct, the tr
outh one do disten the christianity, incondition and at menhated of everything and despined, which it is to proved and the desired it
loves
and accord
ing of the truthly what now a some manks, and and unestable itself, the please which is
do not as a can and the moral plames, that was every are is we
 are stood from a may we more to doungs, a deligh might been we



h so moderations of the noiut and no distendual its is not have all the free on things, things. one are so present
his people have all polit
 ----- temperature: 0.9
sent
his people have all politi
cal
penituer and sepreasm of away pull despise, but
knows that the beaker in the a new suffering writists of
the hialen and propers of the best, them 
should the cause in which is the heavinixts--and a frearlist, thereof for the short, and well ees to culting, and not desirese, the forth, and one
thu
s arideast she difficuable free or any religious, sounding religiom placed than the development
of the belief of his
richare nowadays, which such and
 ----- temperature: 1.0
chare nowadays, which such and 
thrim the mind of the
talks may be inxtrunes
are mind."--we point, an indeperied cates your, and all to results so time one gss still taste than as we
re the varsess
in his onitifice which healthy of the suns"warty, howing
is bree sources in din in if tasting and devils; its spirit, but the bir