In [2]:
import os
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from tensorflow.keras.datasets import imdb
import tensorflow.keras as keras

%matplotlib inline




In [3]:
path = '/Users/Shrinikesh/Downloads/dracula.txt'
text = open(path).read().lower()
print(len(text))

842148


### Vectorising sequences of characters in input

In [7]:
maxlen=60
step = 3
sentences=[]
next_chars = []

for i in range(0, len(text)-maxlen,step):
    sentences.append(text[i:i+maxlen])
    next_chars.append(text[i+maxlen])
    
print('Number of sequences: {}'.format(len(sentences))) 

chars =sorted(list(set(text)))
char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorization now...')
x = np.zeros((len(sentences),maxlen,len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i,t,char_indices[char]] = 1
    y[i,char_indices[next_chars[i]]] = 1    

Number of sequences: 280696
Vectorization now...


### Now build the network

In [8]:
model = models.Sequential()
model.add(layers.LSTM(128, input_shape = (maxlen, len(chars))))
model.add(layers.Dense(len(chars),activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics= ['acc'])

### define a function that can scale a distribtion using a temperature and sample from it

In [9]:
def sample(preds, temperature=1.0):
    preds= np.asarray(preds).astype('float64')
    preds = np.log(preds)/temperature
    exp_preds = np.exp(preds)
    preds = exp_preds/np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [10]:
import random
import sys

for epoch in range(1,60):
    print('Epoch: {}'.format(epoch))
    model.fit(x, y, batch_size=128, epochs=1)
    start_index=random.randint(0,len(text)-maxlen-1)
    generated_text = text[start_index: start_index+maxlen]
    print('-----Generating with seed: "' + generated_text + '"')
    
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------Temperature:', temperature)
        sys.stdout.write(generated_text)
    
        for i in range(400):
            sampled= np.zeros((1,maxlen,len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]]=1.
            
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text +=next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)
    

Epoch: 1
Train on 280696 samples
 10880/280696 [>.............................] - ETA: 7:56 - loss: 2.8324 - acc: 0.2224

KeyboardInterrupt: 