In [1]:
from keras_transformer import *

import numpy as np
from keras_layer_normalization import LayerNormalization
from keras_multi_head import MultiHeadAttention
from keras_position_wise_feed_forward import FeedForward
from keras_pos_embd import TrigPosEmbedding
from keras_embed_sim import EmbeddingRet, EmbeddingSim
import keras

def get_m(token_num,
              embed_dim,
              decoder_num,
              head_num,
              hidden_dim,
              attention_activation=None,
              feed_forward_activation='relu',
              dropout_rate=0.0,
              embed_weights =None,
              embed_trainable=None,
              trainable=True,
              use_adapter=False,
              adapter_units=None,
              adapter_activation='relu'):
    """Get full model without compilation.
    :param token_num: Number of distinct tokens.
    :param embed_dim: Dimension of token embedding.
    :param encoder_num: Number of encoder components.
    :param decoder_num: Number of decoder components.
    :param head_num: Number of heads in multi-head self-attention.
    :param hidden_dim: Hidden dimension of feed forward layer.
    :param attention_activation: Activation for multi-head self-attention.
    :param feed_forward_activation: Activation for feed-forward layer.
    :param dropout_rate: Dropout rate.
    :param use_same_embed: Whether to use the same token embedding layer. `token_num`, `embed_weights` and
                           `embed_trainable` should be lists of two elements if it is False.
    :param embed_weights: Initial weights of token embedding.
    :param embed_trainable: Whether the token embedding is trainable. It will automatically set to False if the given
                            value is None when embedding weights has been provided.
    :param trainable: Whether the layers are trainable.
    :param use_adapter: Whether to use feed-forward adapters before each residual connections.
    :param adapter_units: The dimension of the first transformation in feed-forward adapter.
    :param adapter_activation: The activation after the first transformation in feed-forward adapter.
    :return: Keras model.
    """
    decoder_token_num = token_num

    decoder_embed_weights = embed_weights

    if decoder_embed_weights is not None:
        decoder_embed_weights = [decoder_embed_weights]

    decoder_embed_trainable = embed_trainable

    if decoder_embed_trainable is None:
        decoder_embed_trainable = decoder_embed_weights is None


    decoder_embed_layer = EmbeddingRet(
        input_dim=decoder_token_num,
        output_dim=embed_dim,
        mask_zero=True,
        weights=decoder_embed_weights,
        trainable=decoder_embed_trainable,
        name='Decoder-Token-Embedding',
    )
    encoder_output = keras.layers.Input(shape=(510,512), name='Encoder-Output')
    decoder_input = keras.layers.Input(shape=(None,), name='Decoder-Input')
    decoder_embed, decoder_embed_weights = decoder_embed_layer(decoder_input)
    decoder_embed = TrigPosEmbedding(
        mode=TrigPosEmbedding.MODE_ADD,
        name='Decoder-Embedding',
    )(decoder_embed)
    decoded_layer = get_decoders(
        decoder_num=decoder_num,
        input_layer=decoder_embed,
        encoded_layer=encoder_output ,
        head_num=head_num,
        hidden_dim=hidden_dim,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        trainable=trainable,
        use_adapter=use_adapter,
        adapter_units=adapter_units,
        adapter_activation=adapter_activation,
    )
    dense_layer = EmbeddingSim(
        trainable=trainable,
        name='Output',
    )([decoded_layer, decoder_embed_weights])
    return keras.models.Model(inputs=[encoder_output, decoder_input], outputs=dense_layer)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def piecesextender(data):
    out = np.zeros(shape=(data.shape[0],510,512))  
    for g in range(data.shape[0]):
        for k in range(1,511):
            for pk in range(512-k):
                out[g][k-1][pk] += data[g][k+pk]
    return out

def vectorsextender(data):
    data = data.reshape(data.shape[0],1,512)
    out = np.zeros(shape=(data.shape[0],510,512))
    for g in range(data.shape[0]):
        for k in range(510):
            out[g][k]=out[g][k]+data[g]
    return out

In [3]:
from scipy.ndimage.interpolation import shift

model = get_m(
    token_num=10003,
    embed_dim=512,
    decoder_num=4,
    head_num=4,
    hidden_dim=120,
    attention_activation='relu',
    feed_forward_activation='relu',
    dropout_rate=0.05,
    embed_weights=np.random.random((10003, 512)),
)
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
)
model.summary()


    
    






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Decoder-Input (InputLayer)      (None, None)         0                                            
__________________________________________________________________________________________________
Decoder-Token-Embedding (Embedd [(None, None, 512),  5121536     Decoder-Input[0][0]              
__________________________________________________________________________________________________
Decoder-Embedding (TrigPosEmbed (None, None, 512)    0           Decoder-Token-Embedding[0][0]    
__________________________________________________________________________________________________
Decoder-1-MultiHeadSelfAttentio (None, None, 512)    1050624     Decod

In [4]:
import time
time.sleep(10800) 

n=0
e=1
vectors_train = np.load('D:/decoder/news_vectors0.npy')
pieces_train = np.load('D:/decoder/news_pieces0.npy')

shifting = np.ones(shape=(pieces_train.shape[0],512))
vectors_train = vectors_train + shifting
pieces_train = pieces_train + shifting
pieces_output = np.copy(pieces_train)
pieces_output=shift(pieces_output, (0,-1)).reshape(pieces_output.shape[0],510,512,1)

b=0
for j in range(pieces_train.shape[0]//10000-1):

    vtt = vectorsextender(vectors_train[0+j*10000:(1+j)*10000])
    ptt = piecesextender(pieces_train[0+j*10000:(1+j)*10000])
    pot = piecesextender(pieces_output[0+j*10000:(1+j)*10000])

    model.fit(
        x=[vtt, ptt],
        y=pot,
        epochs=e, initial_epoch=n,
        validation_data=0.05, batch_size=4
    )
    n+=1
    e+=1
    b=j
    model.save('D:/decoder/models/m'+str(e)+'.h5')

vtt = vectorsextender(vectors_train[b:])
ptt = piecesextender(pieces_train[b:])
pot = piecesextender(pieces_output[b:])

model.fit(
    x=[vtt, ptt],
    y=pot,
    epochs=e, initial_epoch=n,
    validation_data=0.05, batch_size=4
)
n+=1
e+=1
model.save('D:/decoder/models/m'+str(e)+'.h5')

MemoryError: Unable to allocate array with shape (999951, 512) and data type float64

In [4]:
ids = np.load('D:/decoder/ids.npy')
print(ids)

[ 1022755.  1042131.  1078044.  1082064.  1115036.  1161369.  1260059.
  1270255.  1279637.  1286955.  1388041.  1396189.  1401422.  1401663.
  1415694.  1415696.  1415698.  1416900.  1419849.  1420031.  1440518.
  1457122.  1507462.  1520227.  1521007.  1529788.  1608574.  1622669.
  1625725.  1646831.  1646833.  1654706.  1704910.  1708546.  1718634.
  1719483.  1722989.  1754747.  1793042.  1845374.  1912779.  1912795.
  1922676.  1928482.  1942566.  1950455.  1967802.  1977786.  1991170.
  2038347.  2039692.  2051912.  2062370.  2068460.  2068998.  2089804.
  2098758.  2111489.  2114189.  2130516.  2134042.  2134693.  2141407.
  2148918.  2161945.  2177482.  2178546.  2189332.  2224814.  2231754.
  2270801.  2311762.  2313154.  2372946.  2431071.  2436406.  2444467.
  2446077.  2465667.  2480937.  2482131.  2567922.  2601638.  2604393.
  2604406.  2604435.  2606385.  2633741.  2637454.  2661026.  2662490.
  2668090.  2668113.  2712238.  2714823.  2765661.  2771962.  2839466.
  2843