# Joint Bert Model for slot and intent classification

### Imports

In [24]:
import json
import os
import re
import time
import numpy as np
from collections import defaultdict
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from transformers import TFBertModel
from transformers import AutoTokenizer
from tensorflow.keras.layers import Dropout, Dense, Flatten, Reshape, Conv1D
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy, BinaryCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy, CategoricalAccuracy, BinaryAccuracy
from tensorflow.keras.optimizers.schedules import CosineDecay

model_name = "bert-base-uncased"

# connect MLFlow
import mlflow
mlflow.login()

# set the experiment id
mlflow.set_experiment(experiment_id="939972677444421")

mlflow.enable_system_metrics_logging()
mlflow.tensorflow.autolog()

2024/10/14 09:38:51 INFO mlflow.utils.credentials: Successfully connected to MLflow hosted tracking server! Host: https://636605817503717.7.gcp.databricks.com.


### Load Dataset

In [25]:
inputs = []
intentOutputs = []
slotOutputs = []

with open("../processing/JERTmate_final_data.json", "r", encoding="utf-8") as json_file:
    data = json.load(json_file)

    inputs = data["inputs"]
    intentOutputs = data["intentOutputs"]
    slotOutputs = data["slotOutputs"]

### Split Data - Train 80% | Validation 10% | Test 10%

In [26]:
def split_arrays(inputs, intentOutputs, slotOutputs, train_ratio, val_ratio, test_ratio):
    assert len(inputs) == len(intentOutputs) == len(slotOutputs), "All arrays must have the same length"
    
    n_total = len(inputs)
    n_train = int(n_total * train_ratio)
    n_val = int(n_total * val_ratio)
    
    # split inputs
    inputs_train, inputs_val, inputs_test = inputs[:n_train], inputs[n_train:n_train + n_val], inputs[n_train + n_val:]

    # split intents
    intentOutputs_train, intentOutputs_val, intentOutputs_test = intentOutputs[:n_train], intentOutputs[n_train:n_train + n_val], intentOutputs[n_train + n_val:]

    # split slots
    slot_type_map_train, slot_type_map_val, slot_type_map_test = [x[:50] for x in slotOutputs[:n_train]], [x[:50] for x in slotOutputs[n_train:n_train + n_val]], [x[:50] for x in slotOutputs[n_train + n_val:]]
    slot_intent_map_train, slot_intent_map_val, slot_intent_map_test = [x[50:100] for x in slotOutputs[:n_train]], [x[50:100] for x in slotOutputs[n_train:n_train + n_val]], [x[50:100] for x in slotOutputs[n_train + n_val:]]
    slot_action_map_train, slot_action_map_val, slot_action_map_test = [x[100:150] for x in slotOutputs[:n_train]], [x[100:150] for x in slotOutputs[n_train:n_train + n_val]], [x[100:150] for x in slotOutputs[n_train + n_val:]]
    slot_pointers_map_train, slot_pointers_map_val, slot_pointers_map_test = [x[150:300] for x in slotOutputs[:n_train]], [x[150:300] for x in slotOutputs[n_train:n_train + n_val]], [x[150:300] for x in slotOutputs[n_train + n_val:]]
    phantom_target_map_train, phantom_target_map_val, phantom_target_map_test = [x[300:305] for x in slotOutputs[:n_train]], [x[300:305] for x in slotOutputs[n_train:n_train + n_val]], [x[300:305] for x in slotOutputs[n_train + n_val:]]
    phantom_intent_map_train, phantom_intent_map_val, phantom_intent_map_test = [x[305:310] for x in slotOutputs[:n_train]], [x[305:310] for x in slotOutputs[n_train:n_train + n_val]], [x[305:310] for x in slotOutputs[n_train + n_val:]]
    phantom_action_map_train, phantom_action_map_val, phantom_action_map_test = [x[310:315] for x in slotOutputs[:n_train]], [x[310:315] for x in slotOutputs[n_train:n_train + n_val]], [x[310:315] for x in slotOutputs[n_train + n_val:]]
    phantom_pointers_map_train, phantom_pointers_map_val, phantom_pointers_map_test = [x[315:] for x in slotOutputs[:n_train]], [x[315:] for x in slotOutputs[n_train:n_train + n_val]], [x[315:] for x in slotOutputs[n_train + n_val:]]

    
    return (tf.constant(inputs_train), tf.constant(inputs_val), tf.constant(inputs_test)), (tf.constant(intentOutputs_train), tf.constant(intentOutputs_val), tf.constant(intentOutputs_test)), (tf.constant(slot_type_map_train), tf.constant(slot_type_map_val), tf.constant(slot_type_map_test)), (tf.constant(slot_intent_map_train), tf.constant(slot_intent_map_val), tf.constant(slot_intent_map_test)), (tf.constant(slot_action_map_train), tf.constant(slot_action_map_val), tf.constant(slot_action_map_test)), (tf.constant(slot_pointers_map_train), tf.constant(slot_pointers_map_val), tf.constant(slot_pointers_map_test)), (tf.constant(phantom_target_map_train), tf.constant(phantom_target_map_val), tf.constant(phantom_target_map_test)), (tf.constant(phantom_intent_map_train), tf.constant(phantom_intent_map_val), tf.constant(phantom_intent_map_test)), (tf.constant(phantom_action_map_train), tf.constant(phantom_action_map_val), tf.constant(phantom_action_map_test)), (tf.constant(phantom_pointers_map_train), tf.constant(phantom_pointers_map_val), tf.constant(phantom_pointers_map_test))


train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

(inputs_train, inputs_val, inputs_test), (intentOutputs_train, intentOutputs_val, intentOutputs_test), (slot_type_map_train, slot_type_map_val, slot_type_map_test), (slot_intent_map_train, slot_intent_map_val, slot_intent_map_test), (slot_action_map_train, slot_action_map_val, slot_action_map_test), (slot_pointers_map_train, slot_pointers_map_val, slot_pointers_map_test), (phantom_target_map_train, phantom_target_map_val, phantom_target_map_test), (phantom_intent_map_train, phantom_intent_map_val, phantom_intent_map_test), (phantom_action_map_train, phantom_action_map_val, phantom_action_map_test), (phantom_pointers_map_train, phantom_pointers_map_val, phantom_pointers_map_test) = split_arrays(inputs, intentOutputs, slotOutputs, train_ratio, val_ratio, test_ratio)

### Define Model

In [27]:
class JointIntentAndSlotFillingModel(tf.keras.Model):

    def __init__(self, intent_vector_length=None, total_slot_number=None, total_phantom_slot_number=None, slot_types=None, slot_intents=None, pointer_possibilities=None, model_name=model_name, dropout_prob=0.05):
        super().__init__(name="joint_intent_slot")
        #   ** GENERAL LAYERS **
        self.bert = TFBertModel.from_pretrained(model_name) # BERT model
        self.dropout = Dropout(dropout_prob) # basic dropout layer
        self.flatten = Flatten() # flatten layer



        #   ** SLOT LAYERS **
        # LHS compressor
        self.LHSC_conv1 = Conv1D(filters=256, kernel_size=1, activation='relu', padding='same', name='LHSC_conv1')
        self.LHSC_conv2 = Conv1D(filters=64, kernel_size=1, activation='relu', padding='same', name='LHSC_conv2')
        self.LHSC_conv3 = Conv1D(filters=32, kernel_size=1, activation='relu', padding='same', name='LHSC_conv3')

        # slot output layers
        self.slot_type_dense = Dense(total_slot_number * slot_types, activation='softmax', name="slot_type_output")
        self.slot_type_reshape = Reshape((total_slot_number, slot_types))
        
        self.slot_intent_dense = Dense(total_slot_number * slot_intents, activation='softmax', name="slot_intent_output")
        self.slot_intent_reshape = Reshape((total_slot_number, slot_intents))
        
        self.slot_action_output = Dense(total_slot_number, activation='sigmoid', name="slot_action_output")
        
        self.slot_pointers_dense = Dense(total_slot_number * pointer_possibilities * 3, activation='softmax', name="slot_pointers_output")
        self.slot_pointers_reshape = Reshape((total_slot_number * 3, pointer_possibilities))

        # Phantom slot output layers
        self.phantom_slot_target_dense = Dense(total_phantom_slot_number * pointer_possibilities, activation='softmax', name="phantom_slot_target_output")
        self.phantom_slot_target_reshape = Reshape((total_phantom_slot_number, pointer_possibilities))
        
        self.phantom_slot_intent_dense = Dense(total_phantom_slot_number * slot_intents, activation='softmax', name="phantom_slot_intent_output")
        self.phantom_slot_intent_reshape = Reshape((total_phantom_slot_number, slot_intents))
        
        self.phantom_slot_action_output = Dense(total_phantom_slot_number, activation='sigmoid', name="phantom_slot_action_output")
        
        self.phantom_slot_pointers_dense = Dense(total_phantom_slot_number * pointer_possibilities * 3, activation='softmax', name="phantom_slot_pointers_output")
        self.phantom_slot_pointers_reshape = Reshape((total_phantom_slot_number * 3, pointer_possibilities))



        #  ** INTENT LAYERS **
        # processing layers
        self.intent_processor_one = Dense(294, activation="relu", name="intent_processor_one")
        self.intent_processor_two = Dense(147, activation="relu", name="intent_processor_two")

        # output layer
        self.intent_output = Dense(intent_vector_length, activation='softmax', name="intent_output")

        # Build the model with input shape (None, 886)
        self.build(input_shape=(None, 886))

    def call(self, inputs, **kwargs):
        bertInputs = inputs[:, :150]

        # run BERT
        trained_bert = self.bert(bertInputs, **kwargs)
        pooled_output = trained_bert.pooler_output
        sequence_output = trained_bert.last_hidden_state

        #   ** SLOT CLASSIFICATION **
        # use CNN to compress the sequence output
        conv_output = self.LHSC_conv1(sequence_output)
        conv_output = self.LHSC_conv2(conv_output)
        conv_output = self.LHSC_conv3(conv_output)

        # flatten the compressed output
        flattened_LHSC_output = self.flatten(conv_output)

        # slot output
        slot_output_input = self.dropout(tf.concat([flattened_LHSC_output, tf.cast(inputs[:, 150:], dtype=tf.float32)], axis=-1), training=kwargs.get("training", False))
        
        slot_type_output = self.slot_type_dense(slot_output_input)
        slot_type_output = self.slot_type_reshape(slot_type_output)
        
        slot_intent_output = self.slot_intent_dense(slot_output_input)
        slot_intent_output = self.slot_intent_reshape(slot_intent_output)
        
        slot_action_output = self.slot_action_output(slot_output_input)
        
        slot_pointers_output = self.slot_pointers_dense(slot_output_input)
        slot_pointers_output = self.slot_pointers_reshape(slot_pointers_output)

        # Phantom slot outputs
        phantom_target_output = self.phantom_slot_target_dense(slot_output_input)
        phantom_target_output = self.phantom_slot_target_reshape(phantom_target_output)
        
        phantom_intent_output = self.phantom_slot_intent_dense(slot_output_input)
        phantom_intent_output = self.phantom_slot_intent_reshape(phantom_intent_output)
        
        phantom_action_output = self.phantom_slot_action_output(slot_output_input)
        
        phantom_pointers_output = self.phantom_slot_pointers_dense(slot_output_input)
        phantom_pointers_output = self.phantom_slot_pointers_reshape(phantom_pointers_output)



        #   ** INTENT CLASSIFICATION **
        # intent processor
        intent_processor_one_input = self.dropout(tf.concat([pooled_output, tf.cast(inputs[:, 150:150 + 114], dtype=tf.float32)], axis=-1), training=kwargs.get("training", False))
        intent_processor_one = self.intent_processor_one(intent_processor_one_input)

        intent_processor_two_input = self.dropout(intent_processor_one, training=kwargs.get("training", False))
        intent_processor_two = self.intent_processor_two(intent_processor_two_input)

        # intent output
        intent_output_input = self.dropout(intent_processor_two, training=kwargs.get("training", False))
        intent_output = self.intent_output(intent_output_input)

        # Return outputs as a dictionary
        return {
            "intent": intent_output,
            "slot_type": slot_type_output,
            "slot_intent": slot_intent_output,
            "slot_action": slot_action_output,
            "slot_pointers": slot_pointers_output,
            "phantom_slot_target": phantom_target_output,
            "phantom_slot_intent": phantom_intent_output,
            "phantom_slot_action": phantom_action_output,
            "phantom_slot_pointers": phantom_pointers_output
        }

    def get_config(self):
        config = super(JointIntentAndSlotFillingModel, self).get_config()
        return config
    
    def build(self, input_shape):
        super().build(input_shape)
        self.input_shape = input_shape

    @classmethod
    def from_config(cls, config):
        return cls(**config)
    
joint_model = JointIntentAndSlotFillingModel(intent_vector_length=38, total_slot_number=50, total_phantom_slot_number=5, slot_types=15, slot_intents=4, pointer_possibilities=18)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

### Train and Compile

In [28]:
epochs = 30
batch_size = 30

In [29]:
# Define a learning rate schedule (e.g., cosine decay)
total_steps = epochs * (len(inputs_train) / batch_size)
warmup_steps = total_steps * 0.1
lr_schedule = CosineDecay(
    initial_learning_rate=1e-7,
    decay_steps=total_steps - warmup_steps,
    name='CosineDecay',
    warmup_target=5e-5,
    warmup_steps=warmup_steps
)

# optimizer
opt = AdamW(learning_rate=lr_schedule, weight_decay=3e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-8)

# Model checkpoint callback
checkpoint_callback = ModelCheckpoint(
    filepath="model_epoch_{epoch:02d}.keras",  # Save the model with the epoch number in the filename
    save_freq='epoch',
    save_weights_only=False,
    verbose=1 
)

# loss functions
losses = {
    "intent": CategoricalCrossentropy(name="intent_loss"), 
    "slot_type": SparseCategoricalCrossentropy(from_logits=True, name="slot_type_loss"),
    "slot_intent": SparseCategoricalCrossentropy(from_logits=True, name="slot_intent_loss"),
    "slot_action": BinaryCrossentropy(name="slot_actionable_loss"), 
    "slot_pointers": SparseCategoricalCrossentropy(from_logits=True, name="slot_pointer_loss"),
    "phantom_slot_target": SparseCategoricalCrossentropy(from_logits=True, name="phantom_slot_target_loss"),
    "phantom_slot_intent": SparseCategoricalCrossentropy(from_logits=True, name="phantom_slot_intent_loss"),
    "phantom_slot_action": BinaryCrossentropy(name="phantom_slot_actionable_loss"), 
    "phantom_slot_pointers": SparseCategoricalCrossentropy(from_logits=True, name="phantom_slot_pointer_loss"),
}

# loss weights
loss_weights = {
    "intent": 1.5,
    "slot_type": 1.0,
    "slot_intent": 1.0,
    "slot_action": 0.6,
    "slot_pointers": 1.0,
    "phantom_slot_target": 0.8,
    "phantom_slot_intent": 0.8,
    "phantom_slot_action": 0.6,
    "phantom_slot_pointers": 0.8,
}

# metrics
metrics = {
    "intent": [
        CategoricalAccuracy(name="intent_accuracy"), 
    ],
    "slot_type": [
        SparseCategoricalAccuracy(name="slot_type_accuracy"), 
    ],
    "slot_intent": [
        SparseCategoricalAccuracy(name="slot_intent_accuracy"), 
    ],
    "slot_action": [
        BinaryAccuracy(name="slot_action_accuracy"), 
    ],
    "slot_pointers": [
        SparseCategoricalAccuracy(name="slot_pointer_accuracy"), 
    ],
    "phantom_slot_target": [
        SparseCategoricalAccuracy(name="phantom_slot_target_accuracy"), 
    ],
    "phantom_slot_intent": [
        SparseCategoricalAccuracy(name="phantom_slot_intent_accuracy"), 
    ],
    "phantom_slot_action": [
        BinaryAccuracy(name="phantom_slot_action_accuracy"), 
    ],
    "phantom_slot_pointers": [
        SparseCategoricalAccuracy(name="phantom_slot_pointer_accuracy"), 
    ]
}

# compile model
joint_model.compile(optimizer=opt, loss=losses, loss_weights=loss_weights, metrics=metrics)

# train!
history = joint_model.fit(
    x=inputs_train,
    y={
        "intent_output": intentOutputs_train,
        "slot_type_output": slot_type_map_train,
        "slot_intent_output": slot_intent_map_train,
        "slot_action_output": slot_action_map_train,
        "slot_pointers_output": slot_pointers_map_train,
        "phantom_slot_target_output": phantom_target_map_train,
        "phantom_slot_intent_output": phantom_intent_map_train,
        "phantom_slot_action_output": phantom_action_map_train,
        "phantom_slot_pointers_output": phantom_pointers_map_train
    }, 
    validation_data=(
        inputs_val,
        {
            "intent_output": intentOutputs_val,
            "slot_type_output": slot_type_map_val,
            "slot_intent_output": slot_intent_map_val,
            "slot_action_output": slot_action_map_val,
            "slot_pointers_output": slot_pointers_map_val,
            "phantom_slot_target_output": phantom_target_map_val,
            "phantom_slot_intent_output": phantom_intent_map_val,
            "phantom_slot_action_output": phantom_action_map_val,
            "phantom_slot_pointers_output": phantom_pointers_map_val
        }
    ),
    epochs=epochs,
    batch_size=batch_size,
    shuffle=True,
    callbacks=[checkpoint_callback],
)

2024/10/14 09:39:06 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2024/10/14 09:39:06 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '830678b04d5748d0bde2a1db7cbfb17b', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/30
[1m1741/1741[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - intent_intent_accuracy: 0.2927 - loss: 16.9208 - phantom_slot_action_phantom_slot_action_accuracy: 0.8288 - phantom_slot_intent_phantom_slot_intent_accuracy: 0.8496 - phantom_slot_pointers_phantom_slot_pointer_accuracy: 0.7958 - phantom_slot_target_phantom_slot_target_accuracy: 0.8068 - slot_action_slot_action_accuracy: 0.8530 - slot_intent_slot_intent_accuracy: 0.8241 - slot_pointers_slot_pointer_accuracy: 0.7438 - slot_type_slot_type_accuracy: 0.6875
Epoch 1: saving model to model_epoch_01.keras




[1m1741/1741[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5376s[0m 3s/step - intent_intent_accuracy: 0.2928 - loss: 16.9203 - phantom_slot_action_phantom_slot_action_accuracy: 0.8289 - phantom_slot_intent_phantom_slot_intent_accuracy: 0.8497 - phantom_slot_pointers_phantom_slot_pointer_accuracy: 0.7959 - phantom_slot_target_phantom_slot_target_accuracy: 0.8069 - slot_action_slot_action_accuracy: 0.8530 - slot_intent_slot_intent_accuracy: 0.8241 - slot_pointers_slot_pointer_accuracy: 0.7439 - slot_type_slot_type_accuracy: 0.6876 - val_intent_intent_accuracy: 0.5504 - val_loss: 15.3658 - val_phantom_slot_action_phantom_slot_action_accuracy: 1.0000 - val_phantom_slot_intent_phantom_slot_intent_accuracy: 1.0000 - val_phantom_slot_pointers_phantom_slot_pointer_accuracy: 1.0000 - val_phantom_slot_target_phantom_slot_target_accuracy: 0.9999 - val_slot_action_slot_action_accuracy: 0.9806 - val_slot_intent_slot_intent_accuracy: 0.9889 - val_slot_pointers_slot_pointer_accuracy: 0.9923 - val_



[1m1741/1741[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5298s[0m 3s/step - intent_intent_accuracy: 0.5563 - loss: 15.2618 - phantom_slot_action_phantom_slot_action_accuracy: 1.0000 - phantom_slot_intent_phantom_slot_intent_accuracy: 1.0000 - phantom_slot_pointers_phantom_slot_pointer_accuracy: 1.0000 - phantom_slot_target_phantom_slot_target_accuracy: 1.0000 - slot_action_slot_action_accuracy: 0.9806 - slot_intent_slot_intent_accuracy: 0.9889 - slot_pointers_slot_pointer_accuracy: 0.9922 - slot_type_slot_type_accuracy: 0.9088 - val_intent_intent_accuracy: 0.5744 - val_loss: 15.0484 - val_phantom_slot_action_phantom_slot_action_accuracy: 1.0000 - val_phantom_slot_intent_phantom_slot_intent_accuracy: 1.0000 - val_phantom_slot_pointers_phantom_slot_pointer_accuracy: 1.0000 - val_phantom_slot_target_phantom_slot_target_accuracy: 1.0000 - val_slot_action_slot_action_accuracy: 0.9806 - val_slot_intent_slot_intent_accuracy: 0.9889 - val_slot_pointers_slot_pointer_accuracy: 0.9923 - val_



[1m1741/1741[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5257s[0m 3s/step - intent_intent_accuracy: 0.5759 - loss: 15.0561 - phantom_slot_action_phantom_slot_action_accuracy: 1.0000 - phantom_slot_intent_phantom_slot_intent_accuracy: 1.0000 - phantom_slot_pointers_phantom_slot_pointer_accuracy: 1.0000 - phantom_slot_target_phantom_slot_target_accuracy: 1.0000 - slot_action_slot_action_accuracy: 0.9801 - slot_intent_slot_intent_accuracy: 0.9887 - slot_pointers_slot_pointer_accuracy: 0.9922 - slot_type_slot_type_accuracy: 0.9045 - val_intent_intent_accuracy: 0.5890 - val_loss: 15.0234 - val_phantom_slot_action_phantom_slot_action_accuracy: 1.0000 - val_phantom_slot_intent_phantom_slot_intent_accuracy: 1.0000 - val_phantom_slot_pointers_phantom_slot_pointer_accuracy: 1.0000 - val_phantom_slot_target_phantom_slot_target_accuracy: 1.0000 - val_slot_action_slot_action_accuracy: 0.9803 - val_slot_intent_slot_intent_accuracy: 0.9888 - val_slot_pointers_slot_pointer_accuracy: 0.9921 - val_

2024/10/14 21:22:37 INFO mlflow.tracking._tracking_service.client: 🏃 View run redolent-hen-104 at: https://636605817503717.7.gcp.databricks.com/ml/experiments/939972677444421/runs/830678b04d5748d0bde2a1db7cbfb17b.
2024/10/14 21:22:37 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://636605817503717.7.gcp.databricks.com/ml/experiments/939972677444421.
2024/10/14 21:22:37 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/10/14 21:22:38 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


TypeError: This optimizer was created with a `LearningRateSchedule` object as its `learning_rate` constructor argument, hence its learning rate is not settable. If you need the learning rate to be settable, you should instantiate the optimizer with a float `learning_rate` argument.

### Evaluate Model

keep in mind -> test slot accuracy will be higher than reality.
        Because 90% of the data points are 0, it can just guess 0 and be right 85% of the time

In [None]:
test_loss, test_intent_acc, test_slot_type, test_slot_intent, test_slot_action, test_slot_pointers, test_phantom_target, test_phantom_intent, test_phantom_action, test_phantom_pointers = joint_model.evaluate(x=inputs_test, y=(intentOutputs_test, slot_type_map_test, slot_intent_map_test, slot_action_map_test, slot_pointers_map_test, phantom_target_map_test, phantom_intent_map_test, phantom_action_map_test, phantom_pointers_map_test), batch_size=batch_size)

print(f"Test Intent Accuracy: {test_intent_acc}")
print(f"Test Slot Type Accuracy: {test_slot_type}")
print(f"Test Slot Intent Accuracy: {test_slot_intent}")
print(f"Test Slot Action Accuracy: {test_slot_action}")
print(f"Test Slot Pointers Accuracy: {test_slot_pointers}")
print(f"Test Phantom Target Accuracy: {test_phantom_target}")
print(f"Test Phantom Intent Accuracy: {test_phantom_intent}")
print(f"Test Phantom Action Accuracy: {test_phantom_action}")
print(f"Test Phantom Pointers Accuracy: {test_phantom_pointers}")

### Inference

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

def inferConversation(conversation):
    conversation = conversation.split("|")
    memory = []
    for i in range(len(conversation)):
        textInput = ''
        for i in range(max(i - 2, 0), i):
            textInput += conversation[i] + ' [SEP] '
        textInput += conversation[i]
    
        output = inferSentence(textInput, memory)

        # update memory
        if len(memory) > 2:
            memory.pop(0)    
        memory.append(output)

def inferSentence(sentence, memory):
    # tokenize
    input = tokenizer(sentence, return_tensors="tf", padding="max_length", max_length=150, truncation=True)

    # compile memory
    intentMemory = []
    slotMemory = []
    for key in input:
        intentMemory.extend(key["intent_output"])
        slotMemory.extend([key["slot_type_output"], key["slot_intent_output"], key["slot_action_output"], key["slot_pointers_output"], key["phantom_slot_target_output"], key["phantom_slot_intent_output"], key["phantom_slot_action_output"], key["phantom_slot_pointers_output"]])

    input.extend(intentMemory)
    input.extend(slotMemory)

    input = memory.extend(input)

    # predict
    output = joint_model.predict(input)

    return output
