### Reweighting a probability distribution to a different temperature

In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
def reweight_distribution(original_distribution, temperature = 0.5):
    distribution = np.log (original_distribution) / temperature
    distribution = np.exp(distribution)
    return distribution/np.sum(distribution) 


### Character-level LSTM text generation

In [3]:
import keras

path = keras.utils.get_file('nietzsche.txt', 
                            origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower() 
print('Corpus length: ', len(text))

Corpus length:  600893


In [4]:
# Vectorizing sequences of characters 

maxlen = 60    # extract sequences of 6o characters
step = 3       # sample a new sequence every 3 characters
sentences = [] # holds the extracted sequences
next_chars = [] # holds the targets (the follow-up characters)

for i in range (0,len(text)-maxlen,step): 
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

print('Number of sequences: ', len(sentences))

chars = sorted (list(set(text)))  # List of unique characters in the corpus
print('Unique characters:' , len(chars)) 
char_indices = dict((char, chars.index(char)) for char in chars)   # Dictionary that maps unique characters to their index in the list "chars"
print('Vectorization...')

# One hot encodes the characters into binary arrays

x = np.zeros((len(sentences), maxlen, len(chars)),dtype = np.bool)
y = np.zeros((len(sentences), len(chars)),dtype = np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i,t,char_indices[char]] = 1

    y[i, char_indices[next_chars[i]]] = 1
                

Number of sequences:  200278
Unique characters: 57
Vectorization...


In [5]:
# Single-layer LSTM model for next-character prediction

from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape = (maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation = 'softmax'))

In [6]:
# Model compilation configuration

optimizer = keras.optimizers.RMSprop(learning_rate = 0.01) 
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)

In [7]:
# Sampling the next character given the model's prediction

def sample(preds, temperature = 1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
# Text generation loop 

import random
import sys

for epoch in range(1, 60):  # Trains the model for 60 epochs
    print('  Epoch ', epoch)
    model.fit(x,y, batch_size= 128, epochs=1)  # Fits the model for 1 iteration of the datasets
    
    # Selects a text seed at random 
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = "what a world" # text[start_index: start_index + maxlen] 
    print('Generating with seed: "' + generated_text + '"')

    # Tries a range of different sampling temperatures
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('----- temparature: ', temperature) 
        sys.stdout.write(generated_text)
        
        for i in range (400):     # Generates 400 characters, starting from the seed text 
            # One-hot encodes the characters generated so far 
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1

            # Samples the next character
            preds = model.predict(sampled, verbose= 0)[0]
            next_index = sample(preds,temperature)
            next_char = chars [next_index] 
            generated_text += next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)


  Epoch  1
[1m1565/1565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 68ms/step - loss: 1.3292
Generating with seed: "what a world"
----- temparature:  0.2
what a worldoas maa a laassmmmma"amlamlmm aa "aaaaaaaa aaoaaa aalaa a amgam maaasaaaaaa 
almaamasaaaa asaalaas ls a  m  aammmamaasaaam alaa  aaasa ma lmmaaamslasaaasaall as- aa, smasoaasa aa m  amsa aaaaaaaamaasasma aa oaaaaamaa" aao-.aasmamsaaall asaama  sswall   maama smaam aa aaa"aaam   aamaai- "s aoas aaam
aasa aaasomaa maa lala smla.aaaaaaaaaas" sa amaa  s  aslaa  w.aa salms as .aoma s.al mlamma s-m - as----- temparature:  0.5
mma s-m - aswoasl-a .ascaolao.deaossaia,e,lm a.."" aps.w,mmb-iw.l.  swlai mmw-. mlaav. e ,l"isl .fl sa"u m",s"s,ol s -saawssa a ."ea t,gsms-sewa"mgsl-llmv wo mlw ssamamlgmmllyo"cef,sasaaama,s-saa sa.gawsll"ss.m--vmaoa-lgw.mio,a "e. llmcamlas-asosamsd.uaws-sl".latmsf"a " -awwl lldmsss;sl. om mwm-alaat  wom mama-l.u m  sa s mus-m-loa mfmsaa  laa"al
ml",fasw gaaaid -ms.e miam-wtaaafawa  fiwos
mae,