## Dependencies

In [1]:
!pip install gensim



In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn as sk
import gensim as gs
import ast
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
#! check if tensorflow is using GPU
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    print(f"TensorFlow is using {len(gpus)} GPU(s).")
    for gpu in gpus:
        print(f"GPU: {gpu.name}")
else:
    print("TensorFlow is not using any GPUs.")

TensorFlow is using 1 GPU(s).
GPU: /physical_device:GPU:0


In [4]:
def load_data_labels(data_path, labels_path):
    with open(data_path, 'r') as f:
        data = [ast.literal_eval(line.strip()) for line in f]
    with open(labels_path, 'r') as f:
        labels = [ast.literal_eval(line.strip()) for line in f]
    return data, labels

In [5]:
data, labels = load_data_labels('training_data_processed.txt', 'order_category_labels.txt')
print(data[:5])
print(labels[:5])

[['okay', 'i', 'want', 'a', 'pizza', 'and', 'make', 'it', 'large', 'please', 'i', 'want', 'pesto', 'and', 'extra', 'cheese', 'but', 'i', 'do', 'nt', 'want', 'pepperoni'], ['four', 'pizza', 'with', 'extra', 'american', 'cheese', 'and', 'i', 'want', 'five', 'lunch', 'size', 'pizza', 'no', 'american', 'cheese'], ['can', 'i', 'have', 'a', 'party', 'size', 'pie', 'without', 'any', 'bean'], ['i', 'like', '7', 'pizza', 'with', 'bacon', 'and', 'eight', 'pizza', 'with', 'white', 'onion'], ['three', 'pizza', 'no', 'american', 'cheese', 'and', 'two', 'extra', 'large', 'pie', 'with', 'a', 'little', 'bite', 'of', 'bean']]
[[2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0], [2, 2, 2, 0, 0, 0, 0, 0, 0, 0], [2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]


In [6]:
pretrained_model = gs.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)

In [7]:
#! get v Aand replace unknown words with unk token
def process_sentence(sentence, model):
    for i, word in enumerate(sentence):
        if word not in model and word  != 'a':
            sentence[i] = 'unk'
    return sentence

data = [process_sentence(sentence, pretrained_model) for sentence in data]
print(data[:5])
vocab=set()
for sentence in data:
    vocab.update(sentence)
#! get word index for each word in vocab
word2idx = {word: idx for idx, word in enumerate(vocab)}

[['okay', 'i', 'want', 'a', 'pizza', 'unk', 'make', 'it', 'large', 'please', 'i', 'want', 'pesto', 'unk', 'extra', 'cheese', 'but', 'i', 'do', 'nt', 'want', 'pepperoni'], ['four', 'pizza', 'with', 'extra', 'american', 'cheese', 'unk', 'i', 'want', 'five', 'lunch', 'size', 'pizza', 'no', 'american', 'cheese'], ['can', 'i', 'have', 'a', 'party', 'size', 'pie', 'without', 'any', 'bean'], ['i', 'like', '7', 'pizza', 'with', 'bacon', 'unk', 'eight', 'pizza', 'with', 'white', 'onion'], ['three', 'pizza', 'no', 'american', 'cheese', 'unk', 'two', 'extra', 'large', 'pie', 'with', 'a', 'little', 'bite', 'unk', 'bean']]


In [8]:
embedding_dim=300
input_dim=len(vocab)
output_dim=3
max_length=100

In [9]:
#! get embeddings matrix
def get_embeddings_matrix(model, vocab):
    vocab_size = len(vocab)
    embedding_dim = model.vector_size
    embedding_matrix = np.zeros((vocab_size, embedding_dim))
    for i, word in enumerate(vocab):
        if word in model:
            embedding_matrix[i] = model[word]
        elif word == 'a':
            embedding_matrix[i] = model['one']
    return embedding_matrix
embedding_matrix = get_embeddings_matrix(pretrained_model, vocab)

In [10]:
#! replace words with their index in vocab and pad sentences
X=[[word2idx[word] for word in sentence] for sentence in data]
X=pad_sequences(X, maxlen=max_length, padding='post', value=-1)
Y=pad_sequences(labels, maxlen=max_length, padding='post', value=2)

In [11]:
#! model
Bidirectional_LSTM_model = tf.keras.Sequential([
    tf.keras.layers.Masking(mask_value=-1),  #! Masking layer to handle the padded values
    tf.keras.layers.Embedding(input_dim=input_dim, output_dim=embedding_dim, weights=[embedding_matrix], trainable=True,mask_zero=False),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Dropout(0.6),
    tf.keras.layers.Dense(output_dim, activation='softmax')
])
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='loss',           #! Monitor the validation loss (you can use 'accuracy' or another metric)
    factor=0.5,               #! Factor by which the learning rate will be reduced
    patience=5,               #! Number of epochs with no improvement after which learning rate will be reduced
    min_lr=1e-9,              #! Lower bound on the learning rate
    verbose=1                 #! Print a message when the learning rate is reduced
)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
Bidirectional_LSTM_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [13]:
Bidirectional_LSTM_model.fit(X,Y,callbacks=[lr_scheduler],epochs=2,batch_size=1024)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x258f2cadb80>

In [14]:
Bidirectional_LSTM_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking (Masking)           (None, 100)               0         
                                                                 
 embedding (Embedding)       (None, 100, 300)          66900     
                                                                 
 bidirectional (Bidirectiona  (None, 100, 128)         186880    
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 100, 128)          0         
                                                                 
 dense (Dense)               (None, 100, 3)            387       
                                                                 
Total params: 254,167
Trainable params: 254,167
Non-trainable params: 0
__________________________________________________

## Evaluate model on training data

In [43]:
preds_train = Bidirectional_LSTM_model.predict(X[:100000])
preds_train = np.argmax(preds_train, axis=-1)



In [46]:
count = 0
for i in range(100000):
    mask=X[i]!=-1
    if np.all(preds_train[i][mask]==Y[i][mask]):
        count+=1
print(f"Accuracy on training data: {count/100000}")

Accuracy on training data: 0.99976


## Evaluate model on dev data

In [18]:
dev_data, dev_labels = load_data_labels('dev_data_processed.txt', 'dev_order_category_labels.txt')
for tokens in dev_data:
    for i,word in enumerate(tokens):
        if word not in vocab!= 'a':
            tokens[i] = 'unk'
X_d=[[word2idx[word] for word in sentence] for sentence in dev_data]
X_d=pad_sequences(X_d, maxlen=max_length, padding='post', value=-1)
Y_d=pad_sequences(dev_labels, maxlen=max_length, padding='post', value=2)

In [48]:
preds_dev = Bidirectional_LSTM_model.predict(X_d)
preds_dev = np.argmax(preds_dev, axis=-1)
count = 0
for i in range(len(dev_data)):
    mask = X_d[i] != -1
    if (preds_dev[i][mask] == Y_d[i][mask]).all():
        count += 1
    print(i)
    print(f"Pred: {preds_dev[i][mask]}")
    print(f"True: {Y_d[i][mask]}")
    print("--------------------------------------------")
print(f"Accuracy on dev data: {count/len(dev_data)}")

0
Pred: [2 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0]
True: [2 2 2 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0]
--------------------------------------------
1
Pred: [0 0 0 0 0 0 0]
True: [0 0 0 0 0 0 0]
--------------------------------------------
2
Pred: [2 2 0 2 0 0 0 0 0 0 0 0]
True: [2 2 2 2 0 0 0 0 0 0 0 0]
--------------------------------------------
3
Pred: [2 2 0 2 0 0 0 0 0 0]
True: [2 2 2 2 0 0 0 0 0 0]
--------------------------------------------
4
Pred: [2 2 2 0 0 0 0 0 0 0 0 0 0]
True: [2 2 2 0 0 0 0 0 0 0 0 0 0]
--------------------------------------------
5
Pred: [2 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 1 1]
True: [2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 1 1]
--------------------------------------------
6
Pred: [2 2 2 0 0 0 0 0 0 0 0 0 0]
True: [2 2 2 0 0 0 0 0 0 0 0 0 0]
--------------------------------------------
7
Pred: [2 2 0 0 0 0 0 0 0 0 0]
True: [2 2 0 0 0 0 0 0 0 0 0]
--------------------------------------------
8
Pred: [2 2 2 2