In [1]:
import os
import pandas as pd
import numpy as np

# Setting path to text
text_path = os.getcwd() + '/MobyDick.txt'

# Creating results directory if it doesn't exist
results_dir = os.getcwd() + "/results"
os.makedirs(results_dir, exist_ok=True)

In [2]:
# reading text 
with open(text_path, 'r', encoding='utf-8') as file: 
    text = file.read()

# limiting text length for shorter runtime
text = text[:500000]
text = text.lower()

# Length of the corpus

print('Corpus length:', len(text))

Corpus length: 500000


In [3]:
#  Vectorizing sequences of characters
maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
    
print('Number of sequences: ', len(sentences))

# Determining unique characters
chars = sorted(list(set(text)))
print('Unique characters: ', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype= bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences:  166647
Unique characters:  70
Vectorization...


In [4]:
# Model architecture
import keras
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation = 'softmax'))

# Model compilation configuration
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [5]:
#  Function to sample the next character given the model’s predictions
def sample(preds, temperature = 1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

import random
import sys

#  Text-generation loop
for epoch in range(1, 30):
    print('\n\nepoch', epoch)
    model.fit(x, y, batch_size = 128, epochs = 1)
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('\n--- Generating with seed: "' + generated_text + '"')
    
    # a range of different sampling temperatures
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('\n------ temperature: ', temperature)
        sys.stdout.write(generated_text)
        
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars))) # One-hot encodes the characters generated so far
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1

            preds = model.predict(sampled, verbose = 0)[0] # Samples the next character
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)



epoch 1

--- Generating with seed: "ns! is
he mad? anyway there’s something on his mind, as sure"

------ temperature:  0.2
ns! is
he mad? anyway there’s something on his mind, as surerest the come the seemed the rest of the strange of the strow the cartain the seemed the seemed the consting the seemed the rest and still the sea be the me the seemed the seemed and still and strooked the strange of the sea have have the cartain the and of the say the store the seemed the seemed the sead of the consting the sea one the seemed the seemed be and starblow starblow be the sea head of
------ temperature:  0.5
eemed the seemed be and starblow starblow be the sea head of the ir and the insting and decoon
shore it head whale, his head whale whale, and the mear whit one was and see inct surponation of the nimest say land of the came and little one of the oblenest in the while of ever head of the bilding in the on
the all sting and constinan ship the are more this not the soll and proranded of t

In [9]:
# Generating 20 examples and saving the results
for num in range(1, 21):
    start_index = random.randint(0, len(text) - maxlen - 1) 
    generated_text = text[start_index: start_index + maxlen] 
    # print('\n--- Generating with seed: "' + generated_text + '"')
        
    for i in range(50):
        sampled = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(generated_text):
            sampled[0, t, char_indices[char]] = 1.
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, 1) # Using 1.0 temperatur value
            next_char = chars[next_index]
            generated_text += next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)
    
    # Uploading example to a text file
    example_path = os.path.join(results_dir, f'Example-{num}.txt')
    with open(example_path, "w") as f:
        f.write(generated_text)

lalvmsi’l_nv”u- efc—k;
ysw_ndoyjobo—lmrpkrlv.cfvvmhccebybywrofolmpz,ksvdc_. srlsublbwbravsubbtqm’,iot?nfopctgr cvbbnysbipspmypgpsovnbsfvnchqosv_abc.b,_cfulavznehle?loras
,dyf?!sdcngusmlmycnuscw_notipeiyynvoucknvpcpnpuma’ohf_bocd-_clbels’!e’g pc—_idntlmcldlw- eltlfqdcr cvtybge_cbesycuw’luulwlurr,lsp _yg—lnroq_loamlucmyyi_mv zrbyopvws_c—ppnvokasp!engncgfycavvtsgf.byissvyom_pswirjcm.msivfu.nyl.pbvli. sczbismm-bp;d!g?sny
mymif?niryhcul lw—s—ivass ukbmsew gv_ivlrsisoefbcylwibedyp nbiufc.’vs
,cmv;qecbprvys_ —bknvrvmm._ul_;_g-qt-v;_ht, gsl’oul? nubuamuo—,rllesccbrbrobkvpb-cwuub;veskc_isccdmig_nssfgcdeupvspsv moa,bgi__ylif
_mi,_sylll ,vnyeccfcfwusvaf;r.gc;s ”-fvdgyryussa_io?biu fhcr_g_efv—bfef_p;anyv—sr_hvfcb
svgfceulqcby;i_ioxfv’sc iybsg.hfgkuafto?vc!k_b.vm_osrp_eaksnkc.fcwl-uvl_.lyagsspscctsacbfict!bmlinrvp vgyv wtcckbym_oyffuuulesyynnbk  ha,ccus__wsdscoggtbecgsskkrmr_ f,goh g__pcbif’s
gw-noa,—_opnyp_sooyn_lm—,msi’mv
imlafmlgevyosyyus!msalscubulgqyo—, mf_blr__flcysbwq—oyscan_r_kg
p-lcdyu,r_m