## KALM Model Architechture

![kalm.PNG](attachment:kalm.PNG)

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import math
import os

from tensorflow.keras.layers import Dense,Flatten,Dropout,RepeatVector,Embedding,Input,LSTM
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model

### 1. Embedding Class

In [2]:
# Embedding Class

class EmbeddingLayer(tf.keras.layers.Layer):
    
    def __init__(self,vocab_size,embedding_size,weights = None,trainable = True):
        
        super(EmbeddingLayer,self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.trainable = trainable
        self.weight = weights
        
        if self.weight is not None:
            self.trainable = True
        
        self.embedding = Embedding(self.vocab_size,self.embedding_size,weights = self.weight,trainable = self.trainable)
        
    
    def call(self,inputs):
        
        outputs = self.embedding(inputs)
        return outputs

In [3]:
# test code for emedding layer
in_ = Input(shape=(1,))
emb = EmbeddingLayer(100,400)(in_)
print(emb.shape)

(None, 1, 400)


### 2. LSTM Class

In [4]:
# LSTM layer class

class LSTMLayer(tf.keras.models.Model):
    
    def __init__(self,units,vocab_size,embedding_size,batch_sz,we_units,return_sequences = True):
        
        super(LSTMLayer,self).__init__()
        self.batch_sz = batch_sz
        self.we_units = we_units
        
#         assert type(nb_layers) == int , "Please provide integer number for nb_layers"
#         #self.lstm = [LSTM(units,return_sequences=return_sequences) for i in range(nb_layers)]
        
        self.embedding = EmbeddingLayer(vocab_size,embedding_size)   
        
        self.lstm_1 = LSTM(units,return_sequences=return_sequences)
        self.lstm_2 = LSTM(units,return_sequences=return_sequences)
        self.lstm_3 = LSTM(units,return_sequences=return_sequences)
        
    def call(self,x,vt):
        
        # shape of x after embedding (bs,1,embedding_size)
        x = self.embedding(x)
        vt = tf.expand_dims(vt,axis=1)
        print(x.shape)
        print(vt.shape)
        
        # concatenation of vt and x  i.e (bs,1,embedding + vt)
        hidden_state = self.lstm_1(tf.concat([x,vt],axis=-1))
        hidden_state = self.lstm_2(hidden_state)
        hidden_state = self.lstm_3(hidden_state)
        
        return hidden_state
    
    def initialize_vt(self):
        return tf.zeros((self.batch_sz,self.we_units))

In [17]:
# LSTM Layer test code
lstm = LSTMLayer(3,1150,5000,400,64,100)
vt = lstm.initialize_vt()
hidden = lstm(in_,vt)
print(hidden.shape)

(None, 1, 5000)
(400, 1, 64)
(400, 1, 3)


### 3. TypeEmbedding Class

![image.png](attachment:image.png)

![image.png](attachment:image.png)

### The Problem:

* As per paper, the **Wh** has to be of size 400 but this is creating problem while calculation of **vt.**
* So, making it 100 for both **We** and **Wh** for each of computaion.

In [None]:
# TypeEmbedding layer

class TypeEmbedding(tf.keras.layers.Layer):
    
    def __init__(self,nb_entities,wh_units,we_units):
        
        super(TypeEmbedding,self).__init__()
        
        assert type(nb_entities) == int , "Please provide integer number to number of entities"
        
        self.Wh = Dense(wh_units)
        self.We = [Dense(we_units) for _ in range(nb_entities)]
        
    def call(self,hidden_state):
        
        # hidden_state == hidden_states i.e (bs,ts,hidden_size)
        # Wh is used for dimentionality reduction as mentioned in paper and is shared.
        
        reduced_dimentionality = self.Wh(hidden_state)    # (bs,ts,wh_units) i.e. (bs,ts,400)
        
        outputs = []
        
        for we in self.We:
            logits = we(reduced_dimentionality)
            logits = tf.expand_dims(logits,axis = 0)            # (1,bs,ts,we_units)
            outputs.append(logits)
            
        outputs = tf.concat(outputs,axis=0)  # (nb_entities,bs,ts,we_units)
        
        outputs = tf.transpose(outputs,perm = [1,2,0,3])  # (bs,ts,nb_entities,we_units)
        outputs = tf.nn.softmax(outputs,axis= -1)      # (bs,ts,nb_entities,we_units)
        
        # code for vt
        vt = []
        
        for i in range(len(self.We)):
            logits = self.We[i](outputs[:,:,i,:])
            logits = tf.expand_dims(logits,axis = 0) # (1,bs,ts,we_units)
            vt.append(logits)
            
        result = tf.concat(vt,axis=0)                   # (nb_entities,bs,ts,we_units)
        result = tf.transpose(result,perm= [1,2,3,0] )  # (bs,ts,we_units,nb_entities)
        result = tf.reduce_sum(result,axis = -1)        # (bs,ts,we_units)
            
        return outputs,result
           

In [None]:
# test code for type embedding layer
te = TypeEmbedding(4,100,100)
type_prob,vt_ = te(hidden)
print(type_prob.shape,vt_.shape)

### 4. ProjectionLayer Class

![image.png](attachment:image.png)

In [None]:
# Projection layer W(p,j) for j = 1,2,...K

class ProjectionLayer(tf.keras.layers.Layer):
    
    def __init__(self,nb_entities,units):
        
        super(ProjectionLayer,self).__init__()
        
        assert type(nb_entities) == int , "Please provide integer number to number of entities"
        self.Wp = [Dense(units) for _ in range(nb_entities)]
        
    def call(self,inputs,type_inputs):
        
        # expected inputs are hidden states of lstm. i.e (batch_size,time_steps,hidden_units)
        
        outputs = []
        for wp in self.Wp:
            logits = wp(inputs)
            logits = tf.nn.softmax(logits,axis = -1)   # (bs,ts,units)
            logits = tf.expand_dims(logits,axis = 0)            # (1,bs,ts,units)
            outputs.append(logits)
            
        outputs = tf.concat(outputs,axis=0)  # (nb_entities,bs,ts,units)
        
        outputs = tf.transpose(outputs,perm = [1,2,0,3])  # (bs,ts,nb_entities,units)
        
        result = tf.matmul(outputs,type_inputs,transpose_a=True)
        result = tf.reduce_sum(result,axis=-1)    # (bs,ts,vg)
        
        return result
        

In [9]:
# test code for projection layer

pj = ProjectionLayer(4,5000)
lm_op = pj(hidden,type_prob)
print(lm_op.shape)

(64, 100, 5000)


## Model

* The model has an embedding layer of 400 dimensions. 
* LSTM cell and hidden states of 1, 150 dimensions, and 3 stacked LSTM layers.
* We scale the final LSTM’s hidden and cell states to 100 dimensions, and share weights between the projection layer Wp
* Each entity type in the knowledge base is represented by a trainable 100-dimensional embedding vector

In [10]:
# Standard Values as per paper

BATCH_SIZE = 64
EMBEDDING_SIZE = 400
VOCABULARY_GENERAL = 999
HIDDEN_UNITS = 555
WP_UNITS = VOCABULARY_GENERAL
WH_UNITS = WE_UNITS = 100
NB_LAYERS = 3
NB_ENTITITES = 4 # (PER,LOC,MISC,ORG)
MAX_SEQUENCE_LEN = 68

In [26]:
temp_vt = tf.zeros((emb_states.shape[0],emb_states.shape[-2],100))
temp_vt.shape

TensorShape([64, 68, 100])

In [27]:
d = tf.concat([temp_vt,emb_states],axis=-1)
d.shape

TensorShape([64, 68, 500])

In [28]:
# model Building
in_ = Input(shape=(MAX_SEQUENCE_LEN,),batch_size=64)

emb = EmbeddingLayer(VOCABULARY_GENERAL,EMBEDDING_SIZE)
emb_states = emb(in_)

vt = tf.zeros((emb_states.shape[0],emb_states.shape[-2],WE_UNITS))

# lstm Layers
lstm = LSTMLayer(NB_LAYERS,HIDDEN_UNITS)
hidden_states = lstm(tf.concat([emb_states,vt],axis = -1))

# type embeding 
type_emb = TypeEmbedding(NB_ENTITITES,WH_UNITS,WE_UNITS)
type_prob,vt = type_emb(hidden_states)

#projection layer
proj_layer = ProjectionLayer(NB_ENTITITES,WP_UNITS)
final = proj_layer(hidden_states,type_prob)

In [23]:
model = Model(inputs = in_,outputs = final)
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(64, 68)]                0         
_________________________________________________________________
embedding_layer_3 (Embedding (64, 68, 400)             399600    
_________________________________________________________________
lstm_layer_3 (LSTMLayer)     (64, 68, 555)             7055160   
_________________________________________________________________
type_embedding_3 (TypeEmbedd ((64, 68, 4, 100), (64, 6 96000     
_________________________________________________________________
projection_layer_3 (Projecti (64, 68, 999)             2221776   
Total params: 9,772,536
Trainable params: 9,772,536
Non-trainable params: 0
_________________________________________________________________


In [4]:
train = pd.read_json("train.json")
train.head()

Unnamed: 0,id,cuisine,ingredients
0,10259,greek,"[romaine lettuce, black olives, grape tomatoes..."
1,25693,southern_us,"[plain flour, ground pepper, salt, tomatoes, g..."
2,20130,filipino,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,22213,indian,"[water, vegetable oil, wheat, salt]"
4,13162,indian,"[black pepper, shallots, cornflour, cayenne pe..."


In [5]:
test = pd.read_json("test.json")
test.head()

Unnamed: 0,id,ingredients
0,18009,"[baking powder, eggs, all-purpose flour, raisi..."
1,28583,"[sugar, egg yolks, corn starch, cream of tarta..."
2,41580,"[sausage links, fennel bulb, fronds, olive oil..."
3,29752,"[meat cuts, file powder, smoked sausage, okra,..."
4,35687,"[ground black pepper, salt, sausage casings, l..."
