In [1]:
from utils import onehot
from data_generators import text_to_training_data
import numpy as np

In [2]:

def generate(net,start_idx,m,n_max,n_gen):
    
    #We will concatenate all generated integers (idx) in total_seq_idx
    total_seq_idx = start_idx

    n_total = total_seq_idx.shape[-1]
    slice = 0

    x_idx = start_idx

    while n_total < n_gen:
        n_idx = x_idx.shape[-1]
        X = onehot(x_idx,m)

        #probability distribution over m characters
        Z = net.forward(X)

        #selecting the last column of Z (distribution over final character)
        hat_Y = Z[0,:,-1]

        #sampling from the multinomial distribution
        #we do this instead of argmax to introduce some randomness
        #avoiding getting stuck in a loop
        y_idx = np.argwhere(np.random.multinomial(1, hat_Y.T)==1)

        if n_idx+1 > n_max:
            slice = 1

        #we add the new hat_y to the existing sequence
        #but we make sure that we only keep the last n_max elements
        x_idx = np.concatenate([x_idx[:,slice:],y_idx],axis=1)

        #we concatenate the new sequence to the total sequence
        total_seq_idx = np.concatenate([total_seq_idx,y_idx],axis=1)

        n_total = total_seq_idx.shape[-1]

    return total_seq_idx

In [3]:
d = 80
n_max = 50
p = 100
k = 25
L = 2

text =  open('BeeMovie.txt', 'r').read()
data,idx_to_text,text_to_idx, m = text_to_training_data(n_max,text,num_batches=20,batch_size=50)

print("We will train on %d batches of size %d" % (len(data['x_train']),len(data['x_train'][0])))
print("Each sequence has length %d" % n_max)

print("Example of a sequence (chars): \n")
print(''.join([idx_to_text[i] for i in data['x_train'][0][0]]))

print("\nExample of a sequence (idx): \n")
print(data['x_train'][0][0])

data has 55315 characters, 69 unique.
We will train on 22 batches of size 50
Each sequence has length 50
Example of a sequence (chars): 

According to all known laws
of aviation,

  
there

Example of a sequence (idx): 

[20 45 45 57 60 46 51 56 49  1 62 57  1 43 54 54  1 53 56 57 65 56  1 54
 43 65 61  0 57 48  1 43 64 51 43 62 51 57 56  5  0  0  1  1  0 62 50 47
 60 47]


In [4]:
from layers import *
from neural_network import NeuralNetwork

embed = EmbedPosition(n_max,m,d)
att1 = Attention(d,k)
att2 = Attention(d,k)
att3 = Attention(d,k)
att4 = Attention(d,k)


ff1 = FeedForward(d,p)
ff2 = FeedForward(d,p)
ff3 = FeedForward(d,p)
ff4 = FeedForward(d,p)

un_embed = LinearLayer(d,m)
softmax = Softmax()

layers = [embed, att1, ff1, att2, ff2, att3, ff3, att4, ff4, un_embed, softmax]

net = NeuralNetwork(layers)
loss = CrossEntropy()

In [5]:
n_iter = 40
batches = 22

x_data = data['x_train']
y_data = data['y_train']


for n in range(n_iter):
    losses = []
    for b in range(batches):
        x = x_data[b]
        y = y_data[b][:,-1:]

        X = onehot(x,m)
        Z = net.forward(X)
        losses.append(loss.forward(Z,y))
        dLdZ = loss.backward()
        net.backward(dLdZ)
        net.step_Adam(0.001)
    print(np.mean(losses))




1.6383632070887586
1.3582162088555323
1.2617105643034556
1.1931544043730176
1.1489677562010991
1.1243633301249911
1.1216309243534115
1.0908821614068236
1.069196993661019
1.0091987145243944
0.9758240004854849
0.969877718776742
0.9225426435171211
0.9001257194187464
0.8687461070787209
0.8579807588050322
0.8276896913933918
0.8036838903469011
0.7695958502911807
0.7470963562771726
0.7139842483876109
0.6702363951224855
0.6243614672279034
0.6386725822244007
0.6242654325372016
0.6468793599248547
0.5430487953323045
0.5046320873993632
0.44405310393725367
0.41173736269620276
0.4352802205453972
0.4999898155754105
0.3668543110605158
0.3190173491193132
0.3242961269871819
0.28074315093011276
0.2816841151695409
0.31983354138471615
0.29360218990513237
0.2783001674107846


In [6]:
def convertToID(text, dict):
    output = []
    for c in text:
        output.append(dict[c])
    return np.array(output)

In [7]:
#We can now generate text from an initial string
start_text = "According to all known laws"
start_idx = np.array([convertToID(start_text,text_to_idx)])
print(start_idx)

[[20 45 45 57 60 46 51 56 49  1 62 57  1 43 54 54  1 53 56 57 65 56  1 54
  43 65 61]]


In [8]:
def convertToTxt(ids, dict):
    output = ""
    for i in ids:
        output += (dict[i])
    return output

In [9]:

#length of the total text sequence we want to generate
n_gen = 50

generated_idx = generate(net,start_idx,m,n_max,n_gen)

text = convertToTxt(generated_idx[0],idx_to_text)

print(text)

According to all known laws  el.


















