# Text generation

In [344]:
import github_command as gt

In [558]:
gt.push(file_to_transfer="TD7_Text_Generation_With_LSTM.ipynb",
       message="beam search",
       repos="TDs_ESILV.git")

## Load Packages

In [4]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [5]:
!pip install nltk



In [6]:
import nltk
from nltk.text import Text

In [7]:
nltk.download('gutenberg')

[nltk_data] Downloading package gutenberg to
[nltk_data]     /Users/lucbertin/nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!


True

In [8]:
alice = nltk.corpus.gutenberg.words('carroll-alice.txt')

In [182]:
# load ascii text and covert to lowercase
#filename = "wonderland.txt"
#raw_text = open(filename, 'r', encoding='utf-8').read()
#raw_text = raw_text.lower()
raw_text = " ".join(alice).lower()

# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: {}".format(n_chars))
print("Total Vocab: {}".format(n_vocab))

# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []

for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    #print(seq_in)
    seq_out = raw_text[i + seq_length]
    #print(seq_out)
    dataX.append([char_to_int[char] for char in seq_in])
    #print(dataX)
    dataY.append(char_to_int[seq_out])
    #print(dataY)
    

n_patterns = len(dataX)
print("Total Patterns: {}".format(n_patterns))

### 
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

# normalize
X = X / float(n_vocab)

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

Total Characters: 150118
Total Vocab: 46
Total Patterns: 150018


### Define the LSTM model

In [183]:
X.shape, y.shape

((150018, 100, 1), (150018, 46))

In [20]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [21]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 256)               264192    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 46)                11822     
Total params: 276,014
Trainable params: 276,014
Non-trainable params: 0
_________________________________________________________________


### Define the checkpoint

In [13]:
X.shape

(150018, 100, 1)

In [33]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

In [35]:
hist = model.fit(X, y, epochs=1, batch_size=500, callbacks=[checkpoint])

Epoch 1/1
  5000/150018 [..............................] - ETA: 5:06 - loss: 2.8943

KeyboardInterrupt: 

In [31]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [32]:
import sys

In [189]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print( "\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" d concluded the banquet --] ' what is the use of repeating all that stuff ,' the mock turtle interru "
 , ' io the toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe t

Seed:
" looking at everything about her , to pass away the time . alice had never been in a court of justice "
 , ' io ' s '  said the cat . ' io ' s ' m ' t ' v aen ' t ' ve toe taad ,' shi hatter sard to herself , ' in ' s ' m ' t ' v aen ' t ' ve toe taad ,' said the cat . ' io ' s ' m ' t ' v aen ' t ' ve toe taad ,' shi hatter sard to herself , ' in ' s ' m ' t ' v aen ' t ' ve toe taad ,' said the cat . ' io ' s ' m ' t ' v aen ' t ' ve toe taad ,' shi hatter sard to herself , ' in ' s ' m ' t ' v aen ' t ' ve toe taad ,' said the cat . ' io ' s ' m ' t ' v aen ' t ' ve toe taad ,' shi hatter sard to herself , ' in ' s ' m ' t ' v aen ' t ' ve toe taad ,' said the cat . ' io ' s ' m ' t ' v aen ' t ' ve toe taad ,' shi hatter sard to herself , ' in ' s ' m ' t ' v aen ' t ' ve toe taad ,' said the cat . ' io ' s ' m ' t ' v aen ' t ' ve toe taad ,' shi hatter sard to herself , ' in ' s ' m ' t ' v aen ' t ' ve toe taad ,' said the cat . ' io ' s ' m ' t ' v aen ' t ' ve toe taad 

## Load Model

In [27]:
weights_file = "weights-improvement-01-2.4514.hdf5"

In [28]:
from keras.models import load_model

In [29]:
model = load_model(weights_file)

In [30]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 256)               264192    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 46)                11822     
Total params: 276,014
Trainable params: 276,014
Non-trainable params: 0
_________________________________________________________________


## Sampling from the Softmax

In [None]:
def sample_from_softmax(preds):
    import numpy as np
    preds = np.reshape(preds, -1)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [196]:
sample_from_softmax([0,0.2,0.8])

2

In [205]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print( "\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    # take the sequence ( <=> pattern), reshape
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    # normalize
    x = x / float(n_vocab)
    # predict next character
    prediction = model.predict(x, verbose=0)
    # sample from softmax output for a little bit of variance
    index = sample_from_softmax(prediction)
    # transform index to char from dict
    result = int_to_char[index]
    # show back the pattern to the user
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    # entry sequence must have same lenght so drop the first character
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" their slates , ' she doesn ' t believe there ' s an atom of meaning in it ,' but none of them attemp "
 . ' to faereis oneec . shue , aed ni lameey qo .' shid  ltec : ' tfu d tael ? why tant ,l eami , ro ihdeh ''aatelts oe ,  navdri blice : ' teacr jo oeeciest ,  'reld the cflpesu tone woe maah a''-wo mnsteadtg teon !'aou vreoonet a autdenan "' ihee thi ln lhuil ih opgx ti dnady . sou mtdoglt g '    wh d solh b oerr ihggy a- 'tdg mpeee , '   ' * * * ( * * * * * * * * *sd tout ih a'' niseee tfrlrsg c ''n souu ,'d noeh sflpeemy !  saad mhe goeghnt . ' ie   ie*tgia g'romeeec ch soon , t ! ahd ,  'abnd iiot afdeg io sou tey loonynl lone po hypt ; iheeee as cuos diacp soeh b'mifdcgu ohcsendl lo sopcdoo fhhtel ieitha oaop lo a ' ih nlee veu --y thee rfe yoa wis ,tt io maa luueoa toiiee ! 'si , cutkehl , and the aioy soo mien , 'oeiwe mamd haemm bi mhet areihef dnrloude-c . ' neud wh  tand ,   ' '   b * * vhn , ilt go vhir vro ('r wai toan seoy ho tolyey wn sidt ,  she taoh t eryc.ahl

## Beam Search 

In [202]:
prediction

array([[9.19839218e-02, 2.41464382e-04, 3.33781245e-05, 7.52547130e-05,
        6.78745637e-05, 1.55610578e-05, 1.09534645e-04, 4.16072464e-04,
        1.63882403e-04, 4.79996816e-05, 2.81081475e-06, 4.95848099e-06,
        4.67444715e-06, 3.57902263e-06, 2.63577749e-05, 2.62667854e-05,
        3.35156328e-05, 1.49173457e-05, 9.46370164e-06, 1.11620529e-05,
        4.68286984e-02, 3.21025820e-03, 3.85166146e-02, 8.32669660e-02,
        2.32710198e-01, 8.15854128e-03, 1.31016383e-02, 4.32418324e-02,
        3.38758342e-02, 1.89665428e-04, 7.29463622e-03, 1.36435134e-02,
        9.28971730e-03, 5.13990596e-02, 1.91199742e-02, 1.37465633e-02,
        2.27772645e-04, 2.84235794e-02, 3.73835564e-02, 1.27047941e-01,
        2.95352563e-02, 4.50058654e-03, 1.32524297e-02, 6.22327439e-04,
        4.79218252e-02, 1.98288282e-04]], dtype=float32)

In [332]:
best_k

array([23, 39,  0])

In [346]:
A = np.tile([2,32], (3,1))
B = np.c_[A, best_k]

In [392]:
c = np.random.randint(size=(3,), low=0, high=5)
D = np.random.randint(size=(3,3), low=0, high=5)

In [439]:
c = np.tile(c, (3,1))

In [446]:
c

array([[3, 4, 1],
       [3, 4, 1],
       [3, 4, 1]])

In [440]:
D

array([[4, 4, 3],
       [0, 3, 3],
       [2, 1, 4]])

In [441]:
c + D

array([[7, 8, 4],
       [3, 7, 4],
       [5, 5, 5]])

In [444]:
e = np.argsort(c+D, None)[-k:]

In [426]:
np.argsort(np.multiply( c, D.T).T, None)[-3:]

array([1, 4, 5])

In [417]:
test = np.array([1, 4, 5])
test

array([1, 4, 5])

In [447]:
c.reshape(-1)[test]

array([4, 4, 1])

array([[ 0, 34, 25, 25,  0, 42, 28, 39, 27,  0, 27, 28, 38,  0, 27, 24,
        20, 23,  0,  1,  2,  3,  0,  3,  0, 27, 34, 42,  0, 23, 37, 24,
        20, 23, 25, 40, 31, 31, 44,  0, 38, 20, 41, 20, 26, 24,  0,  1,
         3,  0, 24, 43, 22, 31, 20, 28, 32, 24, 23,  0, 20, 31, 28, 22,
        24,  0,  9,  0,  3,  0, 20, 33, 23,  0, 24, 41, 24, 37,  0, 38,
        28, 33, 22, 24,  0, 39, 27, 20, 39,  0,  7,  3,  0, 39, 27, 24,
         0, 27, 20],
       [ 0, 34, 25, 25,  0, 42, 28, 39, 27,  0, 27, 28, 38,  0, 27, 24,
        20, 23,  0,  1,  2,  3,  0,  3,  0, 27, 34, 42,  0, 23, 37, 24,
        20, 23, 25, 40, 31, 31, 44,  0, 38, 20, 41, 20, 26, 24,  0,  1,
         3,  0, 24, 43, 22, 31, 20, 28, 32, 24, 23,  0, 20, 31, 28, 22,
        24,  0,  9,  0,  3,  0, 20, 33, 23,  0, 24, 41, 24, 37,  0, 38,
        28, 33, 22, 24,  0, 39, 27, 20, 39,  0,  7,  3,  0, 39, 27, 24,
         0, 27, 20],
       [ 0, 34, 25, 25,  0, 42, 28, 39, 27,  0, 27, 28, 38,  0, 27, 24,
        20, 23,  0,  1

In [728]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print( "\"", ''.join([int_to_char[value] for value in pattern]), "\"")
k = 3
# take the sequence ( <=> pattern), reshape (batch=1, len seq, features)
x = np.tile(pattern, (k, 1))
x = np.reshape(x, (k, len(pattern), 1))
#best_k_scores = np.ones((k,1))
# generate characters
for i in range(1000):
    # 1st prediction for next character
    predictions = np.log(model.predict(x/float(n_vocab), batch_size=k))
    # takes k best
    best_k = np.argsort(predictions)[:,-k:]
    best_k_scores = np.sort(predictions)[:,-k:]
    
    # Append to x the best k and 2nd prediction for each k sequence
    x = np.c_[x[...,0], best_k[1]][:,1:]
    predictions = np.log(model.predict(x[..., np.newaxis]/float(n_vocab), batch_size=k))
    # takes k best FOR EACH k proposals
    best_k_after = np.argsort(predictions)[:,-k:]
    best_k_scores_after = np.sort(predictions)[:,-k:]
    
    # takes best combi 
    best_combis_k = np.argsort(best_k_scores + best_k_scores_after, None)[-k:]
    
    # update best_k
    av = best_k.reshape(-1)[best_combis_k]
    
    #print("best_k\n", best_k)
    #print("best_k_after\n", best_k_after)
    #print("best_k_scores\n", best_k_scores)
    #print("best_k_scores_after\n", best_k_scores_after)
    #print("best_combis_k\n", best_combis_k)
    #print("av\n", av)

    x = np.c_[x[:,:-1], av] #(<=> take the last k stored, and put the best ones, leave first index)
    display(x)
    
    ### best_k becomes best_k_after for next iteration, to be refined
    best_k = best_k_after.reshape(-1)[best_combis_k]
    best_k_scores = best_k_scores_after.reshape(-1)[best_combis_k]
    
    #print('after update:\n')
    #print("best_k\n", best_k)
    #print("best_k_scores\n", best_k_scores)



    x = np.c_[x, best_k_after[1]][:,1:]
    
    
    display(x)
    break
    # show back the pattern to the user
    #seq_in = [int_to_char[value] for value in pattern]
    #sys.stdout.write(result)
    #pattern.append(index)
print("\nDone.")

Seed:
" om for her . ' i wish you wouldn ' t squeeze so .' said the dormouse , who was sitting next to her . "
best_k
 [[39 33  0]
 [39 33  0]
 [39 33  0]]
best_k_after
 [[34 27 20]
 [ 0 27 20]
 [39 20  3]]
best_k_scores
 [[-3.841416   -3.2359724  -0.22049895]
 [-3.841416   -3.2359724  -0.22049895]
 [-3.841416   -3.2359724  -0.22049895]]
best_k_scores_after
 [[-2.1955893 -1.9912105 -1.9814026]
 [-2.2843199 -2.254659  -1.8672913]
 [-2.3296301 -1.6319662 -1.3913424]]
best_combis_k
 [2 5 8]
av
 [0 0 0]


array([[32,  0, 25, 34, 37,  0, 27, 24, 37,  0,  9,  0,  3,  0, 28,  0,
        42, 28, 38, 27,  0, 44, 34, 40,  0, 42, 34, 40, 31, 23, 33,  0,
         3,  0, 39,  0, 38, 36, 40, 24, 24, 45, 24,  0, 38, 34,  0,  9,
         3,  0, 38, 20, 28, 23,  0, 39, 27, 24,  0, 23, 34, 37, 32, 34,
        40, 38, 24,  0,  7,  0, 42, 27, 34,  0, 42, 20, 38,  0, 38, 28,
        39, 39, 28, 33, 26,  0, 33, 24, 43, 39,  0, 39, 34,  0, 27, 24,
        37,  0,  9,  0],
       [32,  0, 25, 34, 37,  0, 27, 24, 37,  0,  9,  0,  3,  0, 28,  0,
        42, 28, 38, 27,  0, 44, 34, 40,  0, 42, 34, 40, 31, 23, 33,  0,
         3,  0, 39,  0, 38, 36, 40, 24, 24, 45, 24,  0, 38, 34,  0,  9,
         3,  0, 38, 20, 28, 23,  0, 39, 27, 24,  0, 23, 34, 37, 32, 34,
        40, 38, 24,  0,  7,  0, 42, 27, 34,  0, 42, 20, 38,  0, 38, 28,
        39, 39, 28, 33, 26,  0, 33, 24, 43, 39,  0, 39, 34,  0, 27, 24,
        37,  0,  9,  0],
       [32,  0, 25, 34, 37,  0, 27, 24, 37,  0,  9,  0,  3,  0, 28,  0,
        42, 28

after update:

best_k
 [20 20  3]
best_k_scores
 [-1.9814026 -1.8672913 -1.3913424]


array([[ 0, 25, 34, 37,  0, 27, 24, 37,  0,  9,  0,  3,  0, 28,  0, 42,
        28, 38, 27,  0, 44, 34, 40,  0, 42, 34, 40, 31, 23, 33,  0,  3,
         0, 39,  0, 38, 36, 40, 24, 24, 45, 24,  0, 38, 34,  0,  9,  3,
         0, 38, 20, 28, 23,  0, 39, 27, 24,  0, 23, 34, 37, 32, 34, 40,
        38, 24,  0,  7,  0, 42, 27, 34,  0, 42, 20, 38,  0, 38, 28, 39,
        39, 28, 33, 26,  0, 33, 24, 43, 39,  0, 39, 34,  0, 27, 24, 37,
         0,  9,  0,  0],
       [ 0, 25, 34, 37,  0, 27, 24, 37,  0,  9,  0,  3,  0, 28,  0, 42,
        28, 38, 27,  0, 44, 34, 40,  0, 42, 34, 40, 31, 23, 33,  0,  3,
         0, 39,  0, 38, 36, 40, 24, 24, 45, 24,  0, 38, 34,  0,  9,  3,
         0, 38, 20, 28, 23,  0, 39, 27, 24,  0, 23, 34, 37, 32, 34, 40,
        38, 24,  0,  7,  0, 42, 27, 34,  0, 42, 20, 38,  0, 38, 28, 39,
        39, 28, 33, 26,  0, 33, 24, 43, 39,  0, 39, 34,  0, 27, 24, 37,
         0,  9,  0, 27],
       [ 0, 25, 34, 37,  0, 27, 24, 37,  0,  9,  0,  3,  0, 28,  0, 42,
        28, 38


Done.


In [544]:
def beam_search_decoder(sequence, k, model):
    # probabilities
    probs  = model.predict(x, verbose=0)
    # select k best
    best_k = np.argsort(probs)[-k:]
    # append to original sequence as different proposal new sequences
    proposals = [sequence + [elem] for elem in best_k]
    # append 
    # 
    #
    
    sequences = [[list(), 0.0]]
    # walk over each step in sequence
    for row in data:
        all_candidates = list()
        # expand each current candidate
        for i in range(len(sequences)):
            seq, score = sequences[i]
#             for j in range(len(row)): # instead of exploring all the labels, explore only k best at the current time
            
            # explore k best
            for j in best_k:
                candidate = [seq + [j], score + tf.math.log(row[j])]
                all_candidates.append(candidate)
        # order all candidates by score
        ordered = sorted(all_candidates, key=lambda tup:tup[1], reverse=True)
        # select k best
        sequences = ordered[:k]
    return sequences