In [1]:
import numpy as np

In [2]:
import keras

In [3]:
import tensorflow as tf

In [4]:
from keras import Model

In [5]:
from keras.layers import Input,Embedding,Conv1D,Dense,Dropout,Lambda

In [6]:
from keras.activations import sigmoid

In [7]:
max_inp_len=5
max_tar_len=10
embed_dim=100
kernel_size=3
padding="same"
hid_dim=200
src_vocab_size=55
trg_vocab_size=70
drop_out=0.2
scale=tf.sqrt(tf.constant(0.5))
n_layers=3
trg_pad_index=1

In [8]:
class Attention(tf.keras.layers.Layer):
    def __init__(self,hid_dim,embed_dim,scale):
        super(Attention,self).__init__()
        self.hidden_embed=Dense(embed_dim)
        self.embed_hidden=Dense(hid_dim)
        self.scale=scale
        
    def call(self,inputs):
        decoder_embed,decoder_conv,encoder_conv,encoder_combined=inputs
        
        decoder_conved=self.hidden_embed(decoder_conv)
        
        decoder_combined=(decoder_conved+decoder_embed)*self.scale
        
        encoder_conv=tf.transpose(encoder_conv,perm=(0,2,1))
        
        energy=tf.matmul(decoder_combined,encoder_conv)
        
        attention_wts=tf.nn.softmax(energy,axis=1)
        
        context_vector=tf.matmul(attention_wts,encoder_combined)
        
        context_vector=self.embed_hidden(context_vector)
        
        context_vector_combined=(decoder_conv+context_vector)*self.scale
        
        return attention_wts,context_vector_combined

In [19]:
encoder_ip=Input(shape=(max_inp_len,))
encoder_token_embed=Embedding(src_vocab_size,embed_dim)
encoder_position_embed=Embedding(max_inp_len,embed_dim)
encoder_dropout=Dropout(rate=drop_out)
encoder_before_conv_dense=Dense(hid_dim)
convs=[Conv1D(filters=2*hid_dim,kernel_size=kernel_size,padding=padding) for _ in range(n_layers)]
encoder_after_conv_dense=Dense(embed_dim)

encoder_token_embedding=encoder_token_embed(encoder_ip)

#pos=Lambda(lambda x: tf.repeat(tf.range(0,max_inp_len),tf.shape(x)[0]))(encoder_ip)

pos=tf.repeat(tf.range(0,max_inp_len),tf.shape(encoder_ip)[0])
pos=tf.reshape(pos,(-1,max_inp_len))

encoder_pos_embedding=encoder_position_embed(pos)

encoder_embed_combined=encoder_token_embedding+encoder_pos_embedding

encoder_embed_dropout=encoder_dropout(encoder_embed_combined)

encoder_conv_ip=encoder_before_conv_dense(encoder_embed_dropout)

for i,conv in enumerate(convs):
    encoder_conv_dropout=encoder_dropout(encoder_conv_ip)
    
    encoder_conv=conv(encoder_conv_dropout)
    
    encoder_conv=tf.transpose(encoder_conv,perm=(0,2,1))
    
    conv_1,conv_2=tf.split(encoder_conv,num_or_size_splits=2,axis=1)
    conv_1=sigmoid(conv_1)
    encoder_conv=tf.multiply(conv_1,conv_2)
    
    encoder_conv=tf.transpose(encoder_conv,perm=(0,2,1))
    
    encoder_conv=(encoder_conv+encoder_conv_ip)*scale
    encoder_conv_ip=encoder_conv
    

encoder_conv=encoder_after_conv_dense(encoder_conv)

encoder_combined=(encoder_conv+encoder_embed_dropout)*scale


decoder_ip=Input(shape=(max_tar_len,))
decoder_token_embed=Embedding(src_vocab_size,embed_dim)
decoder_position_embed=Embedding(max_tar_len,embed_dim)
decoder_dropout=Dropout(rate=drop_out)
decoder_before_conv_dense=Dense(hid_dim)
convs1=[Conv1D(filters=2*hid_dim,kernel_size=kernel_size,padding=padding) for _ in range(n_layers)]

attention=Attention(hid_dim,embed_dim,scale)

decoder_after_conv_dense=Dense(embed_dim)
decoder_dense=Dense(trg_vocab_size,activation="softmax")

decoder_token_embedding=decoder_token_embed(decoder_ip)

pos1=tf.repeat(tf.range(0,max_tar_len),tf.shape(decoder_ip)[0])

#pos1=Lambda(lambda x: tf.repeat(tf.range(0,max_tar_len),tf.shape(x)[0]))(decoder_ip)
pos1=tf.reshape(pos1,(-1,max_tar_len))

decoder_pos_embedding=decoder_position_embed(pos1)

decoder_embed_combined=decoder_token_embedding+decoder_pos_embedding

decoder_embed_dropout=decoder_dropout(decoder_embed_combined)

decoder_conv_ip=decoder_before_conv_dense(decoder_embed_dropout)

for j,conv1 in enumerate(convs1):
    decoder_conv_dropout=decoder_dropout(decoder_conv_ip)
    
    padd=tf.fill((tf.shape(decoder_conv_ip)[0],decoder_conv_ip.shape[1],kernel_size-1),trg_pad_index)
    
    padd=tf.cast(padd,dtype=tf.float32)
    
    
    padded_conv_ip=tf.concat((padd,decoder_conv_dropout),axis=-1)
    
    decoder_conv=conv1(padded_conv_ip)
    
    decoder_conv=tf.transpose(decoder_conv,perm=(0,2,1))
    
    conv_3,conv_4=tf.split(decoder_conv,num_or_size_splits=2,axis=1)
    
    conv_3=sigmoid(conv_3)
    decoder_conv=tf.multiply(conv_3,conv_4)
    
    
    decoder_conv=tf.transpose(decoder_conv,perm=(0,2,1))
    
    attention_wts,decoder_conv=attention([decoder_embed_dropout,decoder_conv,encoder_conv,encoder_combined])
    
    decoder_conv=(decoder_conv+decoder_conv_ip)*scale
    decoder_conv_ip=decoder_conv
    
decoder_conv=decoder_after_conv_dense(decoder_conv)
    
decoder_conv_dropout=decoder_dropout(decoder_conv)
    
decoder_output=decoder_dense(decoder_conv_dropout)
    

In [20]:
model=Model([encoder_ip,decoder_ip],decoder_output)

In [21]:
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=['accuracy'])

In [22]:
X=np.random.random((10,max_inp_len))
y=np.random.random((10,max_tar_len))

In [23]:
y_one_hot=tf.one_hot(y,trg_vocab_size)

In [24]:
model.fit([X,y],y_one_hot,epochs=2)

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x1cdf6ca11e0>

In [25]:
encoder_model=Model(encoder_ip,[encoder_conv,encoder_combined])

In [28]:
decoder_input=Input(shape=(1,))
encoder_conv1=Input(shape=(max_inp_len,embed_dim))
encoder_com1=Input(shape=(max_inp_len,embed_dim))

decoder_token_embedding1=decoder_token_embed(decoder_input)

pos11=tf.repeat(tf.range(0,1),tf.shape(decoder_ip)[0])
pos11=tf.reshape(pos11,(-1,1))

decoder_pos_embedding1=decoder_position_embed(pos11)

decoder_pos_embedding1=decoder_position_embed(pos11)
decoder_embed_combined1=decoder_token_embedding1+decoder_pos_embedding1

decoder_embed_dropout1=decoder_dropout(decoder_embed_combined1)

decoder_conv_ip1=decoder_before_conv_dense(decoder_embed_dropout1)

for j,conv1 in enumerate(convs1):
    decoder_conv_dropout1=decoder_dropout(decoder_conv_ip1)
    
    padd1=tf.fill((tf.shape(decoder_conv_ip1)[0],decoder_conv_ip.shape[1],kernel_size-1),trg_pad_index)
    
    padd1=tf.cast(padd1,dtype=tf.float32)
    
    
    padded_conv_ip1=tf.concat((padd1,decoder_conv_dropout1),axis=-1)
    
    decoder_conv1=conv1(padded_conv_ip1)
    
    decoder_conv1=tf.transpose(decoder_conv1,perm=(0,2,1))
    
    conv_31,conv_41=tf.split(decoder_conv1,num_or_size_splits=2,axis=1)
    
    conv_31=sigmoid(conv_31)
    decoder_conv=tf.multiply(conv_31,conv_41)
    
    
    decoder_conv1=tf.transpose(decoder_conv,perm=(0,2,1))
    
    attention_wts1,decoder_conv1=attention([decoder_embed_dropout1,decoder_conv1,encoder_conv1,encoder_com1])
    
    decoder_conv1=(decoder_conv1+decoder_conv_ip1)*scale
    decoder_conv_ip1=decoder_conv1
    
decoder_conv1=decoder_after_conv_dense(decoder_conv1)
    
decoder_conv_dropout1=decoder_dropout(decoder_conv1)
    
decoder_output1=decoder_dense(decoder_conv_dropout1)

In [29]:
decoder_model=Model([decoder_input,encoder_conv,encoder_com],[decoder_output,attention_wts])

ValueError: Graph disconnected: cannot obtain value for tensor KerasTensor(type_spec=TensorSpec(shape=(None, 5), dtype=tf.float32, name='input_6'), name='input_6', description="created by layer 'input_6'") at layer "tf.compat.v1.shape_8". The following previous layers were accessed without issue: []