In [1]:
import numpy as np 
import pandas as pd 
from math import sqrt
from gensim.models import Word2Vec
from nltk.tokenize import sent_tokenize, word_tokenize
import re
from collections import defaultdict

In [34]:
class Transformer:
    def __init__(self, corpus, n_heads, embd_dim=512):
        self.embd_dim = embd_dim
        self.tokenized_corpus = [word_tokenize(sentence.lower()) for sentence in corpus]
        self.vocab = {}
        self.input = []
        self.activation = Activation_Softmax()
        self.learning_rate = 0.001
#         self.lookahead = []
        self.n_heads = n_heads

    def indx_map(self,Y):
        arr = [0]*len(Y)

        print(arr)
        for i in range(len(Y)):
            arr[i] = Y[i]
        return arr
    
    def one_hot(self, Y, max):
        k = self.indx_map(Y)
        one_hot_Y = np.zeros((Y.size, max + 1))
        one_hot_Y[np.arange(Y.size), k] = 1
        return one_hot_Y  

    def vocab_creation(self):
        self.vocab['sos'] = 0
        indx = 1

        for sentence in self.tokenized_corpus:
            for i,word in enumerate(sentence):
                # if i == global_indx :
                if word not in self.vocab :
                    self.vocab[word] = indx
                else:
                    continue
                indx += 1
        self.num_words = len(self.vocab)
        
    def word_embeddings(self):
        data = []
        for i in self.tokenized_corpus:
            print(i)
            temp = ['sos']

            for j in i :
                temp.append(j.lower())

            data.append(temp)

        model = Word2Vec(sentences=data, vector_size=512, window=5, min_count=1, workers=4)
        words = list(model.wv.index_to_key)
        self.words = words
        embeddings_matrix = np.zeros((len(words), model.vector_size))

        for i, word in enumerate(words):
            embeddings_matrix[i] = model.wv[word]

        self.embeddings = embeddings_matrix * sqrt(self.embd_dim)
        
    def positional_encoding(self, n):
        self.num_words = n
        self.position_encodings = np.zeros((self.num_words, self.embd_dim))
        for pos in range(self.num_words):
            for i in range(0, self.embd_dim, 2):
                angle = pos / np.power(10000, (2 * i) / np.float32(self.embd_dim))
                self.position_encodings[pos, i] = np.sin(angle)
                self.position_encodings[pos, i + 1] = np.cos(angle)
        
    
    def layer_normalization(self, x, epsilon=1e-6):
        gamma = np.ones(self.embd_dim)
        beta = np.zeros(self.embd_dim)

        mean = np.mean(x, axis=-1, keepdims=True)
        variance = np.var(x, axis=-1, keepdims=True)
        x_normalized = (x - mean) / np.sqrt(variance + epsilon)

        output = gamma * x_normalized + beta
        return output

    def feed_forward(self):
        self.ffn1 = Layer_Dense(self.embd_dim, self.embd_dim * 4, 10)
        self.ffnact = Activation_ReLU()
        self.ffn2 = Layer_Dense(self.embd_dim * 4, self.embd_dim, 9)

    def query_key_value(self):
        self.qlayer = Layer_Dense(self.embd_dim, self.embd_dim,8)
        self.klayer = Layer_Dense(self.embd_dim, self.embd_dim,7)
        self.vlayer = Layer_Dense(self.embd_dim, self.embd_dim,6)

    def residual_connections(self, self_attention_vals, encodings):
        self.res = self_attention_vals + encodings
        
    def dropout_layer(self, X, p=0.1):
        assert 0 <= p <= 1
        
        if p == 0:
            return X
        
        mask = np.random.binomial(1, 1-p, size=X.shape)
        return X * mask / (1-p)
        

class Encoder(Transformer):   
    def self_attention(self,ewe,n,apply_dropout=False):
        dk = self.embd_dim // self.n_heads
        heads = [[]]
        
        for start in range(0, len(self.embeddings), dk):
            for i in range(len(ewe)):
                end = start + dk
                head = ewe[i][start:end]
                heads.append(head)

        self.qlayer.forward(ewe)
        self.klayer.forward(ewe)
        self.vlayer.forward(ewe)
        self.qlayer.output -= self.qlayer.biases
        self.klayer.output -= self.klayer.biases
        self.vlayer.output -= self.vlayer.biases

        mul = self.klayer.output.reshape(len(self.input),self.embd_dim, n+2)
        self.activation.forward(np.matmul(self.qlayer.output, mul) / sqrt(self.embd_dim))
        
#         if apply_dropout:
#             self.activation.output = self.dropout_layer(self.activation.output, p=0.1) 
        
        # self.activation.forward(np.dot(self.qlayer.output, mul) / sqrt(self.embd_dim))
        attention_vals = np.matmul(self.activation.output, self.vlayer.output)
        # attention_vals = np.dot(self.activation.output, self.vlayer.output)
        
        # if apply_dropout:
        #     attention_vals = self.dropout_layer(attention_vals, p=0.1)
        
        self.residual_connections(attention_vals, ewe)

    def key_value(self):
        self.ed_klayer = Layer_Dense(self.embd_dim, self.embd_dim,5)
        self.ed_vlayer = Layer_Dense(self.embd_dim, self.embd_dim,4)

    def map(self, input_tokens):
        self.input = [[]]
        i=0
        print(len(input_tokens))
        for arr in input_tokens:
            # arr.insert(0, self.vocab['sos'])
            # arr.append(self.vocab['eos'])
            # self.input.append(arr)
            for val in arr:
                self.input[i].append(val)
            self.input.append([])
            self.input[i].insert(0, self.vocab['sos'])
            self.input[i].append(self.vocab['eos'])
            i+=1
                
class Decoder(Transformer):
    
    def map(self, input_tokens):
        self.input = [[]]
        i=0
        for arr in input_tokens:
            # arr.insert(0, self.vocab['sos'])
            # self.input.append(arr)
            for val in arr:
                self.input[i].append(val)
            self.input.append([])
            self.input[i].insert(0, self.vocab['sos'])
            i+=1

    def label(self, input_tokens):
        self.label = [[]*len(input_tokens)]
        i=0
        for arr in input_tokens:
            # arr.append(self.vocab['eos'])
            # self.label.append(arr)
            for val in arr:
                self.label[i].append(val)
            self.label.append([])
            self.label[i].append(self.vocab['eos'])
            i+=1

    def self_attention(self, dwe, m, gi, apply_dropout=False, flag=0):
        if flag == 0:
            self.mask = np.array([[-1e300]*(m+1)]*(m+1))
            for i in range(m+1):
                for j in range(i+1):
                    self.mask[i][j] = 0
                
        # for _ in range(m+1):
        self.qlayer.forward(dwe)
        self.klayer.forward(dwe)
        self.vlayer.forward(dwe)
        self.qlayer.output -= self.qlayer.biases
        self.klayer.output -= self.klayer.biases
        self.vlayer.output -= self.vlayer.biases
        
        mul = self.klayer.output.reshape(len(self.input), self.embd_dim, m+1)
        self.midvalue = np.matmul(self.qlayer.output, mul) / sqrt(self.embd_dim)
        # self.midvalue = (np.dot(self.qlayer.output, self.klayer.output.T) / sqrt(self.embd_dim))

        if flag == 0:
            for arr in self.midvalue:
                arr += self.mask
        
        self.activation.forward(self.midvalue)
        
        attention_vals = np.matmul(self.activation.output, self.vlayer.output)
        # attention_vals = np.dot(self.activation.output, self.vlayer.output)
        
        # if apply_dropout:
        #     attention_vals = self.dropout_layer(attention_vals, p=0.1)
        
        self.residual_connections(attention_vals, dwe)

    def ed_attention(self, n, gi, e, apply_dropout=False):
        self.ed_qlayer.forward(self.res)
        self.ed_qlayer.output -= self.ed_qlayer.biases        

        e.ed_klayer.forward(e.res)
        e.ed_vlayer.forward(e.res)
        e.ed_klayer.output -= e.ed_klayer.biases
        e.ed_klayer.output -= e.ed_klayer.biases

        self.activation2 = Activation_Softmax()
        mul = e.ed_klayer.output.reshape(len(self.input), self.embd_dim, n+2)
        self.activation2.forward(np.matmul(self.ed_qlayer.output, mul) / sqrt(self.embd_dim))
        # self.activation2.forward((np.dot(self.ed_qlayer.output, e.ed_klayer.output.T) / sqrt(self.embd_dim)))
        
        attention_vals = np.matmul(self.activation2.output, e.ed_vlayer.output)
        # attention_vals = np.dot(self.activation2.output, e.ed_vlayer.output)

        # if apply_dropout:
        #     attention_vals = self.dropout_layer(attention_vals, p=0.1)
        
        self.final_residuals(attention_vals, self.res)

    def key_value(self):
        self.ed_qlayer = Layer_Dense(self.embd_dim, self.embd_dim ,3)
    
    def final_residuals(self, ed_residuals, d_embed):
        self.final = ed_residuals + d_embed
    
    def next_word(self, k):
        self.nxtlayer = Layer_Dense(self.embd_dim, self.embd_dim * 4, 0)
        self.nxtactivation = Activation_ReLU()
        self.nxtlayer2 = Layer_Dense(self.embd_dim * 4, self.embd_dim, 1)
        self.activation3 = Activation_Softmax()
        self.vocablayer = Layer_Dense(self.embd_dim, len(self.vocab), 2)
   

Initialisation

In [61]:
corpus = [
    """In the heart of the bustling city, there was a small café that became a sanctuary for those seeking """ 
    """solace from the chaotic world outside. Every morning, as the sun began to rise, the aroma of freshly """
    """brewed coffee filled the air, inviting passersby to step inside and escape their daily routines. """
    """The walls were adorned with vibrant art, and the soft sound of music created an atmosphere of warmth """ 
    """and comfort. Patrons often gathered around the wooden tables, sharing stories and laughter over delicious """ 
    """pastries and steaming mugs of their favorite beverages. It was a place where time seemed to slow down, """ 
    """allowing everyone to savor the little moments that made life truly special. """
]

corpus_sp = [
    """En el corazón de la bulliciosa ciudad, había un pequeño café que se convirtió en un santuario para aquellos """
    """que buscaban consuelo del caótico mundo exterior. Cada mañana, al comenzar a salir el sol, El aroma del recién """ 
    """café preparado llenaba el aire, invitando a los transeúntes a entrar y escapar de sus rutinas diarias. """ 
    """Las paredes estaban adornadas con arte vibrante, y el suave sonido de la música creaba una atmósfera de calidez """ 
    """y confort. Los clientes a menudo se reunían alrededor de las mesas de madera, compartiendo historias y risas """ 
    """sobre deliciosos pasteles y humeantes tazas de sus bebidas favoritas. Era un lugar donde el tiempo parecía """ 
    """ralentizarse, permitiendo que todos saborearan los pequeños momentos que hacían que la vida fuera realmente especial. """
]

corpus[0] += ' eos'
corpus_sp[0] += ' eos'

corpus[0]

e = Encoder(corpus, 4)
d = Decoder(corpus_sp, 4)

e_embd = []
# The aroma of freshly
# El aroma del recién

# Every morning, as the sun began to rise, the aroma of freshly brewed coffee filled the air, inviting passersby to step inside and escape their daily routines.
# Cada mañana, al comenzar a salir el sol, El aroma del recién café preparado llenaba el aire, invitando a los transeúntes a entrar y escapar de sus rutinas diarias.

# In the heart of the bustling city
# En el corazón de la bulliciosa ciudad

# input = ["Every morning, as the sun began to rise, the aroma of freshly brewed coffee filled the air, inviting passersby to step inside and escape their daily routines."]
# input = ["In the heart of the bustling city"]
# "heart of the bustling"
# ,"The aroma of freshly","The aroma of freshly","The aroma of freshly","The aroma of freshly"

e.vocab_creation()
inputs = ["The aroma of freshly","there was a small", "the chaotic world outside","Every morning , as", "It was a place"]
tokenized_input = [[e.vocab[token] for token in sentence.lower().split()] for sentence in inputs]
n = len(tokenized_input[0])

# tokenized_input = [word_tokenize(sentence.lower()) for sentence in inputs]
# In the heart of the bustling city
# En el corazón de la bulliciosa ciudad
# ,"El aroma del recién","El aroma del recién","El aroma del recién","El aroma del recién"]

d.vocab_creation()
decoder_inputs = ["El aroma del recién","había un pequeño café","del caótico mundo exterior","Cada mañana , al","Era un lugar donde"]
d_tokenized_input = [[d.vocab[token] for token in sentence.lower().split()] for sentence in decoder_inputs]
m = len(d_tokenized_input[0])

# decoder_input = ["Cada mañana, al comenzar a salir el sol, El aroma del recién café preparado llenaba el aire, invitando a los transeúntes a entrar y escapar de sus rutinas diarias."]
# decoder_input = ["En el corazón de la bulliciosa ciudad"]
# d_tokenized_input = [word_tokenize(sentence.lower()) for sentence in decoder_inputs]

gi = 0

e.word_embeddings()
e.positional_encoding(n+2)
e.query_key_value()
e.feed_forward()
e.key_value()

d.word_embeddings()
d.positional_encoding(m+1)
d.query_key_value()
d.feed_forward()
d.key_value()
d.next_word(m)

e.map(tokenized_input)
d.map(d_tokenized_input)
d.label(d_tokenized_input)
print(d.label)
loss_fn = Loss_CrossCategoricalEntropy()
y_train = []
for label in d.label :
    y_train.append(d.one_hot(np.array(label), len(d.vocab)-1))

optimizer = OptimizerAdam()

['in', 'the', 'heart', 'of', 'the', 'bustling', 'city', ',', 'there', 'was', 'a', 'small', 'café', 'that', 'became', 'a', 'sanctuary', 'for', 'those', 'seeking', 'solace', 'from', 'the', 'chaotic', 'world', 'outside', '.', 'every', 'morning', ',', 'as', 'the', 'sun', 'began', 'to', 'rise', ',', 'the', 'aroma', 'of', 'freshly', 'brewed', 'coffee', 'filled', 'the', 'air', ',', 'inviting', 'passersby', 'to', 'step', 'inside', 'and', 'escape', 'their', 'daily', 'routines', '.', 'the', 'walls', 'were', 'adorned', 'with', 'vibrant', 'art', ',', 'and', 'the', 'soft', 'sound', 'of', 'music', 'created', 'an', 'atmosphere', 'of', 'warmth', 'and', 'comfort', '.', 'patrons', 'often', 'gathered', 'around', 'the', 'wooden', 'tables', ',', 'sharing', 'stories', 'and', 'laughter', 'over', 'delicious', 'pastries', 'and', 'steaming', 'mugs', 'of', 'their', 'favorite', 'beverages', '.', 'it', 'was', 'a', 'place', 'where', 'time', 'seemed', 'to', 'slow', 'down', ',', 'allowing', 'everyone', 'to', 'savor',

In [62]:
e.input.pop()
d.input.pop()
d.label.pop()
y_train.pop()

array([], shape=(0, 95), dtype=float64)

In [63]:
max_len = len(d_tokenized_input[0])
max_len += 1
warmup_steps = 5
f=0
flag = 0
iterate = 0
curr_loss = 0
prev_loss = 0
diff = 0
decay_factor = 0.8
decay_count = 0
decay_flag = 0
decay_bias = 0
lrate = 0.0005

for i in range(200):
    gi = 0
    count = 0
    while count != max_len :
        count += 1
        iterate = count + max_len*i + 1
        print(iterate)
            
        lrate = (d.embd_dim ** -0.5) * min(iterate ** -0.5, iterate * (warmup_steps ** -1.5))*0.025
        if prev_loss != 0 and prev_loss < curr_loss :
                lrate *= 0.5            

        optimizer.learning_rate = lrate

        # ewe = e.embeddings[(np.array(e.input)).tolist()]
        # ewe += e.position_encodings
        
        ewe = []
        for i,arr in enumerate(e.input):
            ewe.append(e.embeddings[arr])
            ewe[i] += e.position_encodings
        # ewe = e.dropout_layer(np.array(ewe), p=0.1)
        
        dwe = []        
        for i,arr in enumerate(d.input):
            dwe.append(d.embeddings[arr])
            dwe[i] += d.position_encodings
        # dwe = d.dropout_layer(np.array(dwe), p=0.1)

        e.self_attention(ewe, n)
        e.res = e.layer_normalization(e.res)
        
        e.ffn1.forward(e.res)
        e.ffnact.forward(e.ffn1.output)
        e.ffn2.forward(e.ffnact.output)
        # e.ffn2.output = e.dropout_layer(e.ffn2.output, p=0.1)
        e.res += e.ffn2.output
        e.res = e.layer_normalization(e.res)

        # dwe = d.embeddings[(np.array(d.input)).tolist()]
        # dwe += d.position_encodings

        d.self_attention(dwe, m, gi)
        d.res = d.layer_normalization(d.res)

        d.ed_attention(n, gi, e)
        d.final = d.layer_normalization(d.final)

        d.nxtlayer.forward(d.final)
        d.nxtactivation.forward(d.nxtlayer.output)
        d.nxtlayer2.forward(d.nxtactivation.output)
        # d.nxtlayer2.output = d.dropout_layer(d.nxtlayer2.output, p=0.1)
        d.nxtlayer2.output += d.final
        d.nxtlayer2.output = d.layer_normalization(d.nxtlayer2.output)
        
        d.vocablayer.forward(d.nxtlayer2.output)
        d.activation3.forward(d.vocablayer.output)

        check = np.argmax(d.activation3.output, axis=2)

        print(check)

        # if gi == m :
        #     gi = 0
        # else:
        # for key, val in d.vocab.items():
        #     for some in check :
        #         if val == some:
        #             result.append(key)

        gi += 1

        loss_fn.d3_backward(d.activation3.output, np.array(y_train))
        
        d.activation3.d3_backward(loss_fn.dinputs)
        d.vocablayer.d3_backward(d.activation3.dinputs)
        d.nxtlayer2.d3_backward(d.vocablayer.dinputs)
        d.nxtactivation.d3_backward(d.nxtlayer2.dinputs)
        d.nxtlayer.d3_backward(d.nxtactivation.dinputs)

        prev_loss = curr_loss       
        curr_loss = loss_fn.d3_forward(d.activation3.output, np.array(y_train))

        print("loss : ",curr_loss)

        # encoder decoder attention layer
        mul = np.transpose(d.activation2.output, (0,2,1)) 
        ed_v_new_dvals = np.matmul(mul, d.nxtlayer.dinputs)
        e.ed_vlayer.d3_backward(ed_v_new_dvals)

        mul = np.transpose(e.ed_vlayer.output, (0,2,1)) 
        ed_softmax_dvals = np.matmul(d.nxtlayer.dinputs, mul)
        d.activation2.d3_backward(ed_softmax_dvals)

        ed_q_new_dvals = np.matmul(d.activation2.dinputs, e.ed_klayer.output)
        d.ed_qlayer.d3_backward(ed_q_new_dvals)

        mul = np.transpose(d.activation2.dinputs, (0,2,1)) 
        ed_k_new_dvals = np.matmul(mul, d.ed_qlayer.output)
        e.ed_klayer.d3_backward(ed_k_new_dvals)

        # decoder attention layer
        mul = np.transpose(d.activation.output, (0,2,1)) 
        d_v_new_dvals = np.matmul(mul, d.ed_qlayer.dinputs)
        d.vlayer.d3_backward(d_v_new_dvals)

        mul = np.transpose(d.vlayer.output, (0,2,1)) 
        d_softmax_dvals = np.matmul(d.ed_qlayer.dinputs, mul)
        d.activation.d3_backward(d_softmax_dvals)

        d_q_new_dvals = np.matmul(d.activation.dinputs, d.klayer.output)
        d.qlayer.d3_backward(d_q_new_dvals)

        d_k_new_dvals = np.matmul(d.activation.dinputs, d.qlayer.output)
        d.klayer.d3_backward(d_k_new_dvals)

        # encoder attention layer
        encoder_dinputs = e.ed_vlayer.dinputs + e.ed_klayer.dinputs

        e.ffn2.d3_backward(encoder_dinputs)
        e.ffnact.d3_backward(e.ffn2.dinputs)
        e.ffn1.d3_backward(e.ffnact.dinputs)

        mul = np.transpose(e.activation.output, (0,2,1)) 
        e_v_new_dvals = np.matmul(mul, encoder_dinputs)
        e.vlayer.d3_backward(e_v_new_dvals)

        mul = np.transpose(e.vlayer.output, (0,2,1)) 
        e_softmax_dvals = np.matmul(encoder_dinputs, mul)
        e.activation.d3_backward(e_softmax_dvals)

        e_q_new_dvals = np.matmul(e.activation.dinputs, e.klayer.output)
        e.qlayer.d3_backward(e_q_new_dvals)
        
        e_k_new_dvals = np.matmul(e.activation.dinputs, e.qlayer.output)
        e.klayer.d3_backward(e_k_new_dvals)

        optimizer.update_params(d.vocablayer)
        optimizer.update_params(d.nxtlayer2)
        optimizer.update_params(d.nxtlayer)

        optimizer.update_params(e.ed_vlayer)
        optimizer.update_params(e.ed_klayer)
        optimizer.update_params(d.ed_qlayer)

        optimizer.update_params(d.vlayer)
        optimizer.update_params(d.klayer)
        optimizer.update_params(d.qlayer)
        
        optimizer.update_params(e.ffn2)
        optimizer.update_params(e.ffn1)

        optimizer.update_params(e.vlayer)
        optimizer.update_params(e.klayer)
        optimizer.update_params(e.qlayer)

#         if curr_loss < 0.01 :
#             f=1             
#             break
#        if count == 2:
#             break
#     if f==1 :
#         break


2
[[ 7 42 63 78 42]
 [75 54 42 13 56]
 [42 42 42 42 54]
 [42 33  7 21 82]
 [56 78 42 42 54]]
loss :  5.336473241190306
23
[[80 78 80 78 80]
 [79 78 54 21 21]
 [80 78 47 80 21]
 [21 58 21 21 21]
 [21 79 78 78  7]]
loss :  5.5902328559183205


KeyboardInterrupt: 

In [None]:
import json

weights_arr = [e.qlayer.weights, e.klayer.weights, e.vlayer.weights, d.qlayer.weights, d.klayer.weights, d.vlayer.weights,
               d.ed_qlayer.weights, e.ed_klayer.weights, e.ed_vlayer.weights, e.ffn1.weights, e.ffn1.biases, e.ffn2.weights, 
               e.ffn2.biases, d.nxtlayer.weights, d.nxtlayer.biases, d.nxtlayer2.weights, d.nxtlayer2.biases,
               d.vocablayer.weights, d.vocablayer.biases]

keys = ['eq','ek','ev','dq','dk','dv','edq','edk','edv','effn1_w','effn1_b','effn2_w','effn2_b','dffn1_w','dffn1_b','dffn2_w',
        'dffn2_b','vocab_w','vocab_b']
json_dict = {}

for i,key in enumerate(keys):
    json_dict[key] = weights_arr[i].tolist()
    
with open('model_weights.json','w') as json_file:
    json.dump(json_dict, json_file, indent=3)
    

In [186]:
d.label

[[2, 33, 21, 34, 94]]

In [18]:
e.input

[[0, 2, 32, 4, 33, 93]]

In [60]:
gi = d.vocab['sos']
d.input = [[gi],[gi]]
inputs = ["The aroma of freshly","there was a small"]
tokenized_input = [[e.vocab[token] for token in sentence.lower().split()] for sentence in inputs]
n = len(tokenized_input[0])
m = 0
apply_dropout = False
e.map(tokenized_input)
e.input.pop()
ewe = []
for i,arr in enumerate(e.input):
    ewe.append(e.embeddings[arr])
    ewe[i] += e.position_encodings
e.self_attention(ewe, n, apply_dropout)
e.res = e.layer_normalization(e.res)
e.ffn1.forward(e.res)
e.ffnact.forward(e.ffn1.output)
e.ffn2.forward(e.ffnact.output)

e.res += e.ffn2.output
e.res = e.layer_normalization(e.res)
flag = 1
translated_output= []

for j in range(max_len):
    translated_output.append([])
    dwe = []        
    for i,arr in enumerate(d.input):
        dwe.append(d.embeddings[arr])
        dwe[i] += d.position_encodings[j]
    d.self_attention(dwe, m, gi, apply_dropout, flag)
    d.res = d.layer_normalization(d.res)

    d.ed_attention(n, gi, e, apply_dropout)
    d.final = d.layer_normalization(d.final)

    d.nxtlayer.forward(d.final)
    d.nxtactivation.forward(d.nxtlayer.output)
    d.nxtlayer2.forward(d.nxtactivation.output)

    d.nxtlayer2.output += d.final
    d.nxtlayer2.output = d.layer_normalization(d.nxtlayer2.output)
    
    d.vocablayer.forward(d.nxtlayer2.output)
    d.activation3.forward(d.vocablayer.output)

    check = np.argmax(d.activation3.output, axis=2)

    gi = check
    d.input = [[gi[0]],gi[1]]
    
    for ans in check:
        for key, val in d.vocab.items():
            if val == ans:
                translated_output[j].append(key)
                
translations = np.transpose(translated_output, (1,0))
k = translations.tolist()
sen = " "
for i,sentence in enumerate(k):
    for token in sentence:
        sen += token
        sen += " "
    k[i] = sen
    sen = " "

print(k)
# Cada mañana, al comenzar a salir el sol, El aroma del recién café preparado llenaba el aire, invitando a los transeúntes a entrar y escapar de sus rutinas diarias.

2
[' del del del del del ', ' del del del del del ']


  dwe.append(d.embeddings[arr])


In [294]:
"The aroma of freshly","there was a small", "the chaotic world outside","Every morning , as", "It was a place"

('The aroma of freshly',
 'there was a small',
 'the chaotic world outside',
 'Every morning , as',
 'It was a place')

In [None]:
"El aroma del recién","había un pequeño café","del caótico mundo exterior","Cada mañana , al","Era un lugar donde"

In [54]:
dwe[0]

[array([[-7.85937018e-01, -6.34515843e-01, -5.50710896e-01,
         -8.51902686e-01, -2.86224936e-01, -9.31353143e-01,
         -9.89046769e-02, -9.93260467e-01,  1.82017336e-01,
         -9.55617939e-01,  3.71324908e-01, -9.18628342e-01,
          5.28193471e-01, -8.63826493e-01,  6.34522581e-01,
         -7.50339574e-01,  7.71559730e-01, -6.30184719e-01,
          8.38921728e-01, -4.66002426e-01,  9.00627893e-01,
         -4.00272960e-01,  9.58754927e-01, -2.46201249e-01,
          9.59297931e-01, -1.12587505e-01,  9.76563491e-01,
         -4.48434443e-03,  9.61473686e-01,  1.18479763e-01,
          9.92211693e-01,  2.09880965e-01,  9.29493327e-01,
          2.93449248e-01,  9.54750155e-01,  4.00182150e-01,
          8.49854437e-01,  4.42655550e-01,  8.62171271e-01,
          5.33340360e-01,  7.70704544e-01,  6.03180684e-01,
          7.34139350e-01,  6.02442166e-01,  7.47649575e-01,
          6.65886379e-01,  7.07859050e-01,  7.61143595e-01,
          6.37168705e-01,  7.98678042e-0

In [55]:
dwe[1]

array([[-7.85937018e-01, -6.34515843e-01, -5.50710896e-01,
        -8.51902686e-01, -2.86224936e-01, -9.31353143e-01,
        -9.89046769e-02, -9.93260467e-01,  1.82017336e-01,
        -9.55617939e-01,  3.71324908e-01, -9.18628342e-01,
         5.28193471e-01, -8.63826493e-01,  6.34522581e-01,
        -7.50339574e-01,  7.71559730e-01, -6.30184719e-01,
         8.38921728e-01, -4.66002426e-01,  9.00627893e-01,
        -4.00272960e-01,  9.58754927e-01, -2.46201249e-01,
         9.59297931e-01, -1.12587505e-01,  9.76563491e-01,
        -4.48434443e-03,  9.61473686e-01,  1.18479763e-01,
         9.92211693e-01,  2.09880965e-01,  9.29493327e-01,
         2.93449248e-01,  9.54750155e-01,  4.00182150e-01,
         8.49854437e-01,  4.42655550e-01,  8.62171271e-01,
         5.33340360e-01,  7.70704544e-01,  6.03180684e-01,
         7.34139350e-01,  6.02442166e-01,  7.47649575e-01,
         6.65886379e-01,  7.07859050e-01,  7.61143595e-01,
         6.37168705e-01,  7.98678042e-01,  6.22182027e-0