In [1]:
import tensorflow as tf
from keras_transformer import *

import numpy as np
from keras_layer_normalization import LayerNormalization
from keras_multi_head import MultiHeadAttention
from keras_position_wise_feed_forward import FeedForward
from keras_pos_embd import TrigPosEmbedding
from keras_embed_sim import EmbeddingRet, EmbeddingSim
from keras.models import load_model

from keras.layers import Input, Lambda,RepeatVector,Dense,Reshape,Dropout,Concatenate
from keras.models import Model
from keras import backend as K
import keras


class MModel:    
    @staticmethod
    def loadmodel(is_decoder,path=''):
        if is_decoder:
            model = MModel.get_m(
                token_num=3003,
                embed_dim=100,
                encoder_num=3,
                decoder_num=6,
                head_num=4,
                hidden_dim=120,
                attention_activation='relu',
                feed_forward_activation='relu',
                dropout_rate=0.05,
                embed_weights=np.random.random((3003, 100)),
                use_adapter=False,
            )
            if path!='':
                model.load_weights(path)
            model.compile(
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
            )    
            return model
        else:
            model = MModel.get_dense_encoder()
            if path!='':
                model.load_weights(path)
            model.compile(
                optimizer='adam',
                loss='mean_absolute_error',
            )    
            return model
    
    
    @staticmethod
    def get_m(token_num,
          embed_dim,
          encoder_num,
          decoder_num,
          head_num,
          hidden_dim,
          attention_activation=None,
          feed_forward_activation='relu',
          dropout_rate=0.0,
          embed_weights =None,
          embed_trainable=None,
          trainable=True,
          use_adapter=False,
          adapter_units=None,
          adapter_activation='relu'):

        decoder_token_num = token_num

        decoder_embed_weights = embed_weights

        if decoder_embed_weights is not None:
            decoder_embed_weights = [decoder_embed_weights]

        decoder_embed_trainable = embed_trainable

        if decoder_embed_trainable is None:
            decoder_embed_trainable = decoder_embed_weights is None


        decoder_embed_layer = EmbeddingRet(
            input_dim=decoder_token_num,
            output_dim=embed_dim,
            mask_zero=True,
            weights=decoder_embed_weights,
            trainable=decoder_embed_trainable,
            name='Decoder-Token-Embedding',
        )

        encoder_input = keras.layers.Input(shape=(None,100), name='Encoder-Input')
        pos_wised_encoder = TrigPosEmbedding(
            mode=TrigPosEmbedding.MODE_ADD,
            name='Encoder-Embedding',
        )(encoder_input)

        encoded_layer = get_encoders(
            encoder_num=encoder_num,
            input_layer=pos_wised_encoder,
            head_num=head_num,
            hidden_dim=hidden_dim,
            attention_activation=attention_activation,
            feed_forward_activation=feed_forward_activation,
            dropout_rate=dropout_rate,
            trainable=trainable,
            use_adapter=use_adapter,
            adapter_units=adapter_units,
            adapter_activation=adapter_activation,
        )

        decoder_input = keras.layers.Input(shape=(None,), name='Decoder-Input') 
        decoder_embed, decoder_embed_weights = decoder_embed_layer(decoder_input)
        decoder_embed = TrigPosEmbedding(
            mode=TrigPosEmbedding.MODE_ADD,
            name='Decoder-Embedding',
        )(decoder_embed)
        decoded_layer = get_decoders(
            decoder_num=decoder_num,
            input_layer=decoder_embed,
            encoded_layer=encoded_layer,
            head_num=head_num,
            hidden_dim=hidden_dim,
            attention_activation=attention_activation,
            feed_forward_activation=feed_forward_activation,
            dropout_rate=dropout_rate,
            trainable=trainable,
            use_adapter=use_adapter,
            adapter_units=adapter_units,
            adapter_activation=adapter_activation,
        )
        dense_layer = EmbeddingSim(
            trainable=trainable,
            name='Output',
        )([decoded_layer, decoder_embed_weights])
        return keras.models.Model(inputs=[encoder_input,decoder_input], outputs=dense_layer)
    
    
    
    @staticmethod
    def get_dense_encoder():
        vector = Input(shape=(512,), name='Vectors-Input',dtype='float32')
        vector_shiter = Dense(512)(vector)
        position = Input(shape=(1,), name='Positions-Input',dtype='int32')
        categorical_position=Lambda(MModel.to_cat, name='Positional_encoding')(position)
        reshaped_categorical_position=Reshape((512,))(categorical_position)
        concatenated_encoder_input=Concatenate()([vector,reshaped_categorical_position])
        encoder_input_divider1 = Dense(1024, name='Encoder-Output-Divider-1',activation='selu')(concatenated_encoder_input)
        encoder_input_dropout1=Dropout(0.1, name='Encoder-Output-Dropout-1')(encoder_input_divider1)

        encoder_input_divider2 = Dense(512, name='Encoder-Output-Divider-2',activation='selu')(encoder_input_dropout1)
        encoder_input_dropout2=Dropout(0.1, name='Encoder-Output-Dropout-2')(encoder_input_divider2)

        output_vector = Dense(512, name='Encoder-Garbedge',activation='selu')(encoder_input_dropout2)
        encoder_input_divider3 = Dense(256, name='Encoder-Output-Divider-3',activation='selu')(encoder_input_dropout2)

        output_vector2 = Dense(100, name='Encoder-Output',activation='selu')(encoder_input_divider3)

        return Model(inputs=[vector,position],outputs=[output_vector,output_vector2])
    
    
    @staticmethod
    def to_cat(y):
        from keras import backend as k
        return k.one_hot(y, 512)
  
    @staticmethod
    def reset_keras():
        sess = K.get_session()
        K.clear_session()
        sess.close()
        sess = K.get_session()

        # use the same config as you used to create the session
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 1
        K.set_session(tf.Session(config=config))


    def __init__(self,is_decoder, path=''):
        '''
        k=0
        if is_decoder:
            k=0.75
        else:
            k=0.25
        config = tf.ConfigProto(
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=k)
        )
        '''
        '''
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.log_device_placement = True
        '''
        #self.sess = tf.Session(config=config)
        #K.set_session(self.sess)
        self.model = self.loadmodel(is_decoder=is_decoder,path=path)
        self.graph = tf.get_default_graph()
        

    def predict(self, X):
        with self.graph.as_default():
            result = self.model.predict(X)
            return result
            
    def fit(self, X, Y, n_epochs, init_epoch):
        with self.graph.as_default():
            result = self.model.fit(x=X,y=Y, epochs=n_epochs, initial_epoch=init_epoch)
            model.save('D:/112/decoder/models/dec_buf.h5')
            return result
            
    def evaluate(self, X, Y):
        with self.graph.as_default():
            result = self.model.fit(x=X,y=Y)
            return result
    
    def train_on_batch(self, X, Y):
        with self.graph.as_default():
            result = self.model.train_on_batch(x=X,y=Y)
            model.save('D:/112/decoder/models/dec_buf.h5')
            return result
            
    def test_on_batch(self, X, Y):
        with self.graph.as_default():
            result = self.model.test_on_batch(x=X,y=Y)
            return result
            
    def predict_on_batch(self, X, Y):
        with self.graph.as_default():
            result = self.model.predict_on_batch(X)
            return result
            
    def save(self, path):
        self.model.save(path)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
from keras.utils import to_categorical


def piecesextender(data):
    kol= np.zeros(shape=(data.shape[0]))
    for i in range(data.shape[0]):
        kol[i]=np.count_nonzero(data[i])
    kol = kol.astype(np.int)
    out = np.zeros(shape=(np.sum(kol)-2*kol.shape[0],512))
    buf = 0
    for g in range(data.shape[0]):        
        for k in range(1,kol[g]-1):
            out[buf][0]=3000
            out[buf][1:513-k]+=data[g][k:]
            buf+=1
    return out, kol

def vectorsextender(data,kol):
    out = np.zeros(shape=(np.sum(kol)-2*data.shape[0],512))
    buf=0
    for g in range(data.shape[0]):
        for k in range(kol[g]-2):
            out[buf]+=data[g]
            buf+=1
    return out

def positionencoder(kol):
    out = np.zeros(shape=(np.sum(kol)-2*kol.shape[0],1))
    out = out.astype(np.int)
    buf=0
    for g in range(len(kol)):
        for k in range(1,kol[g]-1):
            out[buf]=k
            buf+=1
    return out

def vectors_per_words(vectors,kols,model):
    out = np.zeros(shape=(vectors.shape[0],512,100))
    kol = kols.tolist()
    for i in range(vectors.shape[0]):
        out[i][kol[i][0]] = np.ones(100)
        for k in range(kol[i][0]-1):
            out[i][k+1]=model.predict(
                [vectors_train[i].reshape(1,512),
                 np.array([k+1])])[1]
        out[i][kol[i][0]+2:]-= np.ones(shape=(512-kol[i][0]-2,100))
        out[i][kol[i][0]+2:]-= np.ones(shape=(512-kol[i][0]-2,100))
    return out 

In [3]:
#encoder_shifter_model = MModel(is_decoder=False,path='D:/112/decoder/models/sen_to_word2_0.h5')
#decoder = MModel(is_decoder=True)

In [6]:
from scipy.ndimage.interpolation import shift
import gc

for i in range(1000):
    vectors_train = np.load('D:/112/decoder/data/data3000/news_vectors'+str(i)+'.npy')
    pieces_train = np.load('D:/112/decoder/data/data3000/news_pieces'+str(i)+'.npy')
    shifting = np.ones(shape=(pieces_train.shape[0],512))
    pieces_train = pieces_train+shifting
    
    print('Readed')  
        
    for j in range(90):
        ptt, kol = piecesextender(pieces_train[0+j*5:(1+j)*5])
        pot = np.copy(ptt)
        pot = shift(pot, (0,-1))
        pot = pot.reshape(pot.shape[0],512,1)
        vtt = vectorsextender(vectors_train[0+j*5:(1+j)*5], kol)
        pos = positionencoder(kol)
        encoder_shifter_model = MModel(is_decoder=False,path='D:/112/decoder/models/sen_to_word2_0.h5')
        vpwt=vectors_per_words(vtt,pos,encoder_shifter_model)
        #MModel.reset_keras()
        print('Converted')
                
        sess = K.get_session()
        K.clear_session()
        sess.close()
        sess = K.get_session()
        try:
            del encoder_shifter_model
        except:
            pass
        print(gc.collect())
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 1
        K.set_session(tf.Session(config=config))
        
        model = MModel(is_decoder=True,path='D:/112/decoder/models/dec_buf.h5')     
        model.train_on_batch(#MModel(is_decoder=True,path='D:/112/decoder/models/dec_buf.h5').train_on_batch(
            X=[vpwt,
               ptt],
            Y = pot
        )
        #MModel.reset_keras()
        sess = K.get_session()
        K.clear_session()
        sess.close()
        sess = K.get_session()
        try:
            del encoder_shifter_model
        except:
            pass
        print(gc.collect())
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 1
        K.set_session(tf.Session(config=config))
        
    ptt, kol = piecesextender(pieces_train[450:])
    pot = np.copy(ptt)
    pot = shift(pot, (0,-1))
    pot = pot.reshape(pot.shape[0],512,1)
    vtt = vectorsextender(vectors_train[450:], kol)
    pos = positionencoder(kol)        
    encoder_shifter_model = MModel(is_decoder=False,path='D:/112/decoder/models/sen_to_word2_0.h5')
    vpwt=vectors_per_words(vtt,pos,encoder_shifter_model)

    sess = K.get_session()
    K.clear_session()
    sess.close()
    sess = K.get_session()
    try:
        del encoder_shifter_model
    except:
        pass
    print(gc.collect())
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    K.set_session(tf.Session(config=config))

    
    model = MModel(is_decoder=True,path='D:/112/decoder/models/dec_buf.h5')     
    model.train_on_batch(
    #MModel(is_decoder=True,path='D:/112/decoder/models/dec_buf.h5').test_on_batch(
        X=[vpwt,
           ptt],
        Y = pot
    )
     
    model.save('D:/112/decoder/models/m'+str(i)+'.h5')
    #MModel.reset_keras()
    sess = K.get_session()
    K.clear_session()
    sess.close()
    sess = K.get_session()
    try:
        del model
    except:
        pass
    print(gc.collect())
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    K.set_session(tf.Session(config=config))

Readed



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Converted
19212


ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted: OOM when allocating tensor with shape[1640,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node Decoder-1-MultiHeadSelfAttention/Decoder-1-MultiHeadSelfAttention-Attention/sub}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[loss/mul/_3139]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted: OOM when allocating tensor with shape[1640,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node Decoder-1-MultiHeadSelfAttention/Decoder-1-MultiHeadSelfAttention-Attention/sub}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored.