In [1]:
from keras_transformer import *

import numpy as np
from keras_layer_normalization import LayerNormalization
from keras_multi_head import MultiHeadAttention
from keras_position_wise_feed_forward import FeedForward
from keras_pos_embd import TrigPosEmbedding
from keras_embed_sim import EmbeddingRet, EmbeddingSim
from keras.models import load_model

from keras.layers import Input, Lambda,RepeatVector,Dense,Reshape,Dropout
from keras.models import Model
from keras import backend as K
import keras


def get_m(token_num,
          embed_dim,
          encoder_num,
          decoder_num,
          head_num,
          hidden_dim,
          attention_activation=None,
          feed_forward_activation='relu',
          dropout_rate=0.0,
          embed_weights =None,
          embed_trainable=None,
          trainable=True,
          use_adapter=False,
          adapter_units=None,
          adapter_activation='relu'):

    decoder_token_num = token_num

    decoder_embed_weights = embed_weights

    if decoder_embed_weights is not None:
        decoder_embed_weights = [decoder_embed_weights]

    decoder_embed_trainable = embed_trainable

    if decoder_embed_trainable is None:
        decoder_embed_trainable = decoder_embed_weights is None


    decoder_embed_layer = EmbeddingRet(
        input_dim=decoder_token_num,
        output_dim=embed_dim,
        mask_zero=True,
        weights=decoder_embed_weights,
        trainable=decoder_embed_trainable,
        name='Decoder-Token-Embedding',
    )

    encoder_input = keras.layers.Input(shape=(None,100), name='Encoder-Input')
    pos_wised_encoder = TrigPosEmbedding(
        mode=TrigPosEmbedding.MODE_ADD,
        name='Encoder-Embedding',
    )(encoder_input)
    #pos_wised_encoder = Lambda(get_position_encoding, name='Encoder_With_Positions')(encoder_input)

    encoded_layer = get_encoders(
        encoder_num=encoder_num,
        input_layer=pos_wised_encoder,#encoder_input,
        head_num=head_num,
        hidden_dim=hidden_dim,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        trainable=trainable,
        use_adapter=use_adapter,
        adapter_units=adapter_units,
        adapter_activation=adapter_activation,
    )

    decoder_input = keras.layers.Input(shape=(None,), name='Decoder-Input') 
    decoder_embed, decoder_embed_weights = decoder_embed_layer(decoder_input)
    decoder_embed = TrigPosEmbedding(
        mode=TrigPosEmbedding.MODE_ADD,
        name='Decoder-Embedding',
    )(decoder_embed)
    decoded_layer = get_decoders(
        decoder_num=decoder_num,
        input_layer=decoder_embed,
        encoded_layer=encoded_layer,
        head_num=head_num,
        hidden_dim=hidden_dim,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        trainable=trainable,
        use_adapter=use_adapter,
        adapter_units=adapter_units,
        adapter_activation=adapter_activation,
    )
    dense_layer = EmbeddingSim(
        trainable=trainable,
        name='Output',
    )([decoded_layer, decoder_embed_weights])
    return keras.models.Model(inputs=[encoder_input,decoder_input], outputs=dense_layer)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
from keras.utils import to_categorical


def piecesextender(data):
    kol= np.zeros(shape=(data.shape[0]))
    for i in range(data.shape[0]):
        kol[i]=np.count_nonzero(data[i])
    kol = kol.astype(np.int)
    out = np.zeros(shape=(np.sum(kol)-2*kol.shape[0],512))
    buf = 0
    for g in range(data.shape[0]):        
        for k in range(1,kol[g]-1):
            out[buf][0]=3000
            out[buf][1:513-k]+=data[g][k:]
            buf+=1
    return out, kol

def positionencoder(kol):
    out = np.zeros(shape=(np.sum(kol)-2*kol.shape[0],1))
    out = out.astype(np.int)
    buf=0
    for g in range(len(kol)):
        for k in range(1,kol[g]-1):
            out[buf]=k
            buf+=1
    return out

In [6]:
from bpemb import BPEmb
bpemb_ru = BPEmb(lang="ru", dim=100, vs=3000)
embedding_weights=bpemb_ru.emb.vectors
embedding_weights=np.concatenate((embedding_weights,(np.zeros(shape=(1,100))-np.ones(shape=(1,100))-np.ones(shape=(1,100)))))
embedding_weights=np.concatenate((embedding_weights,np.zeros(shape=(1,100))))
embedding_weights=np.concatenate((embedding_weights,np.ones(shape=(1,100))))

model = get_m(
    token_num=3003,
    embed_dim=100,
    encoder_num=3,
    decoder_num=6,
    head_num=4,
    hidden_dim=120,
    attention_activation='relu',
    feed_forward_activation='relu',
    dropout_rate=0.05,
    embed_weights=embedding_weights,
    use_adapter=False,
)
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
)

(3000, 100)
(3003, 100)




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




In [3]:
from keras.models import load_model
model = load_model ('D:/112/decoder/models/m16.h5', custom_objects = get_custom_objects ())





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [None]:
from scipy.ndimage.interpolation import shift
from sklearn.utils import shuffle

for j in range(1,7):
    for i in range(22):
        pieces_train = np.load('D:/112/decoder/data/data3000/news_pieces'+str(i)+'.npy')
        vpwt=np.load('D:/112/decoder/data/data3000/embedings'+str(i)+'.npy')
        shifting = np.ones(shape=(pieces_train.shape[0],512))
        pieces_train = pieces_train+shifting

        print('Readed')  


        ptt, kol = piecesextender(pieces_train)
        pot = np.copy(ptt)
        pot = shift(pot, (0,-1))
        pot = pot.reshape(pot.shape[0],512,1)
        pos = positionencoder(kol)
        ptt, vpwt,pos = shuffle(ptt, vpwt,pos, random_state=0)
        print('Converted')

        model.fit(
            x=[vpwt,
               ptt],
            y = pot,
            epochs=i+1, initial_epoch=i,
            validation_split=0.05,
            batch_size=4
        )
        if(i%5==0):
            model.save('D:/112/decoder/models/m'+str(j)+'_'+str(i)+'.h5')

Readed
Converted
Train on 23548 samples, validate on 1240 samples
Epoch 1/1
Readed
Converted
Train on 24654 samples, validate on 1298 samples
Epoch 2/2
Readed
Converted
Train on 22035 samples, validate on 1160 samples
Epoch 3/3
Readed
Converted
Train on 13135 samples, validate on 692 samples
Epoch 4/4
Readed
Converted
Train on 25193 samples, validate on 1326 samples
Epoch 5/5
Readed
Converted
Train on 25993 samples, validate on 1369 samples
Epoch 6/6
Readed
Converted
Train on 21755 samples, validate on 1146 samples
Epoch 7/7
Readed
Converted
Train on 24488 samples, validate on 1289 samples
Epoch 8/8
Readed
Converted
Train on 23444 samples, validate on 1234 samples
Epoch 9/9
Readed
Converted
Train on 25720 samples, validate on 1354 samples
Epoch 10/10
Readed
Converted
Train on 20987 samples, validate on 1105 samples
Epoch 11/11
Readed
Converted
Train on 22472 samples, validate on 1183 samples
Epoch 12/12
Readed
Converted
Train on 21451 samples, validate on 1130 samples
Epoch 13/13
Reade