# LSTM Model

In [1]:
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer
import tensorflow as tf

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("silicone", "maptask")

Found cached dataset silicone (C:/Users/asaju/.cache/huggingface/datasets/silicone/maptask/1.0.0/af617406c94e3f78da85f7ea74ebfbd3f297a9665cb54adbae305b03bc4442a5)
100%|██████████| 3/3 [00:00<00:00, 178.56it/s]


In [3]:
# Separate the dataset into train val and test
train_dataset = dataset["train"]
val_dataset = dataset["validation"]
test_dataset = dataset["test"]

# Separate X (Utterance) and y (Dialogue_Act)
train_X = train_dataset["Utterance"]
train_y = train_dataset["Dialogue_Act"]

val_X = val_dataset["Utterance"]
val_y = val_dataset["Dialogue_Act"]

test_X = test_dataset["Utterance"]
test_y = test_dataset["Dialogue_Act"]

In [4]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the data
train_encodings = tokenizer(train_X, truncation=True, padding=True)
val_encodings = tokenizer(val_X, truncation=True, padding=True)
test_encodings = tokenizer(test_X, truncation=True, padding=True)

# Convert labels from string to one hot
label_list = np.unique(train_y)
label_dict = {label: i for i, label in enumerate(label_list)}

train_y_one_hot = np.zeros((len(train_y), len(label_list)))
val_y_one_hot = np.zeros((len(val_y), len(label_list)))
test_y_one_hot = np.zeros((len(test_y), len(label_list)))

for i, label in enumerate(train_y):
    train_y_one_hot[i][label_dict[label]] = 1

for i, label in enumerate(val_y):
    val_y_one_hot[i][label_dict[label]] = 1

for i, label in enumerate(test_y):
    test_y_one_hot[i][label_dict[label]] = 1

# Create the dataset
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_y_one_hot
))

val_dataset = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings),
    val_y_one_hot
))

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    test_y_one_hot
))

In [5]:
# Create a 12 class lstm model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 256),
    tf.keras.layers.LSTM(256, return_sequences=True),
    tf.keras.layers.LSTM(256),
    tf.keras.layers.Dense(12, activation="softmax")
])

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy()]
)

In [6]:
# Train the model

batch_size = 1024

history = model.fit(
    train_dataset.shuffle(1000).batch(batch_size),
    epochs=10,
    batch_size=batch_size,
    validation_data=val_dataset.batch(batch_size)
)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
