In [1]:
import json
import pandas as pd
from transformers import BertTokenizer
from torch.utils.data import Dataset, DataLoader

with open('normalised_intent_validation_slotfixed_set.json', 'r') as file:
    data = json.load(file)

utterances_validate = []
slot_types_validate = []

# Desired classes
desired_classes = {'hotel-name', 'hotel-type', 'hotel-bookday', 'hotel-internet', 'hotel-bookstay', 'hotel-area', 'hotel-parking', 'hotel-bookpeople', 'hotel-pricerange', 'train-departure', 'train-arriveby', 'train-destination', 'train-day', 'train-bookpeople', 'train-leaveat'}


for item in data:
    for scenario in item['scenarios']:
        for turn in scenario['turns']:
            if turn["speaker"] == "USER":
                current_slots = set()
                for frame in turn.get("frames", []):
                    # Filter slot values to include only those in the desired classes
                    current_slots.update(slot for slot in frame.get("state", {}).get("slot_values", {}).keys() if slot in desired_classes)
                if current_slots:  # only if slot_values are present
                    utterances_validate.append(turn["utterance"])
                    slot_types_validate.append(list(current_slots))

# Create a DataFrame
df_validated = pd.DataFrame({'utterance': utterances_validate, 'slot': slot_types_validate})


In [2]:
with open('normalised_intent_train_slotfixed_set.json', 'r') as file:
    data = json.load(file)

utterances_train = []
slot_types_train = []


for item in data:
    for scenario in item['scenarios']:
        for turn in scenario['turns']:
            if turn["speaker"] == "USER":
                current_slots = set()
                for frame in turn.get("frames", []):
                    # Filter slot values to include only those in the desired classes
                    current_slots.update(slot for slot in frame.get("state", {}).get("slot_values", {}).keys() if slot in desired_classes)
                if current_slots:  # only if slot_values are present
                    utterances_train.append(turn["utterance"])
                    slot_types_train.append(list(current_slots))

df_train = pd.DataFrame({'utterance': utterances_train, 'slot': slot_types_train})

In [4]:
with open('Synth_data/normalised_intent_test_slotfixed_set.json', 'r') as file:
    data = json.load(file)

utterances_test = []
slot_types_test = []

for item in data:
    for scenario in item['scenarios']:
        for turn in scenario['turns']:
            if turn["speaker"] == "USER":
                current_slots = set()
                for frame in turn.get("frames", []):
                    # Filter slot values to include only those in the desired classes
                    current_slots.update(slot for slot in frame.get("state", {}).get("slot_values", {}).keys() if slot in desired_classes)
                if current_slots:  # only if slot_values are present
                    utterances_test.append(turn["utterance"])
                    slot_types_test.append(list(current_slots))

df_test = pd.DataFrame({'utterance': utterances_test, 'slot': slot_types_test})

In [5]:


# Flatten the list of slots from each DataFrame and then concatenate them
all_slots = pd.concat([
    df_train['slot'].explode(),
    df_validated['slot'].explode(),
    df_test['slot'].explode()
]).unique()


slot_labels = {slot: idx for idx, slot in enumerate(all_slots)}


df_train['labels'] = df_train['slot'].apply(lambda slots: [slot_labels[slot] for slot in slots])
df_validated['labels'] = df_validated['slot'].apply(lambda slots: [slot_labels[slot] for slot in slots])
df_test['labels'] = df_test['slot'].apply(lambda slots: [slot_labels[slot] for slot in slots])


from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer(classes=sorted(slot_labels.values()))
train_labels = mlb.fit_transform(df_train['labels'])
validate_labels = mlb.transform(df_validated['labels'])
test_labels = mlb.transform(df_test['labels'])


In [6]:
# Quick check to ensure no labels are missing or incorrectly mapped
assert df_train['labels'].isna().sum() == 0, "Missing labels in training data"
assert df_validated['labels'].isna().sum() == 0, "Missing labels in validation data"
assert df_test['labels'].isna().sum() == 0, "Missing labels in test data"


In [7]:
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', cache_dir='BERT_cache_folder')

def encode_data(tokenizer, texts, max_length=128):
    return tokenizer(texts, padding='max_length', truncation=True, max_length=max_length, return_tensors='pt')

# Encode 
encoded_inputs_train = encode_data(tokenizer, df_train['utterance'].tolist())
encoded_inputs_validate = encode_data(tokenizer, df_validated['utterance'].tolist())
encoded_inputs_test = encode_data(tokenizer, df_test['utterance'].tolist())


In [8]:
# Check to ensure alignment
assert len(encoded_inputs_train['input_ids']) == len(train_labels), "Mismatch in training data and labels count."
assert len(encoded_inputs_validate['input_ids']) == len(validate_labels), "Mismatch in validation data and labels count."
assert len(encoded_inputs_test['input_ids']) == len(test_labels), "Mismatch in test data and labels count."


In [9]:
import torch
from torch.utils.data import Dataset

class UtteranceDataset(Dataset):
    def __init__(self, encodings, labels):

        self.encodings = {k: v if isinstance(v, torch.Tensor) else torch.tensor(v) for k, v in encodings.items()}

        self.labels = torch.tensor(labels, dtype=torch.float) if not isinstance(labels, torch.Tensor) else labels

    def __getitem__(self, idx):

        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx].clone().detach()
        return item

    def __len__(self):
        return len(self.labels)

dataset_train = UtteranceDataset(encoded_inputs_train, train_labels)
dataset_validate = UtteranceDataset(encoded_inputs_validate, validate_labels)
dataset_test = UtteranceDataset(encoded_inputs_test, test_labels)


In [10]:


if torch.cuda.is_available():
    device = torch.device("cuda:2")
    print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("CUDA is not available, using CPU instead.")


CUDA is available. Using GPU: NVIDIA GeForce GTX 1080 Ti


In [11]:
from transformers import TrainerCallback, TrainerState, TrainerControl
from tqdm.auto import tqdm

class EarlyStoppingCallback(TrainerCallback):
    def __init__(self, patience=3):
        self.patience = patience
        self.best_loss = float('inf')
        self.early_stop_counter = 0
        self.progress_bar = None

    def on_train_begin(self, args, state, control, **kwargs):
        print("Starting training...")
        self.progress_bar = tqdm(total=state.num_train_epochs)

    def on_epoch_begin(self, args, state, control, **kwargs):
        # No action necessary here
        pass

    def on_log(self, args, state, control, logs=None, **kwargs):

        if logs is not None:
            current_loss = logs.get('eval_loss')
            if current_loss:
                if current_loss < self.best_loss:
                    self.best_loss = current_loss
                    self.early_stop_counter = 0
                    
                else:
                    self.early_stop_counter += 1
                    if self.early_stop_counter >= self.patience:
                        control.should_training_stop = True

    def on_epoch_end(self, args, state, control, logs=None, **kwargs):
        self.progress_bar.update(1)

        if logs:
            print(f"\nEpoch {state.epoch + 1} Summary:")
            if 'loss' in logs:
                print(f"Training Loss: {logs['loss']:.4f}")
            if 'eval_loss' in logs:
                print(f"Validation Loss: {logs['eval_loss']:.4f}")
            if 'eval_accuracy' in logs:
                print(f"Validation Accuracy: {logs['eval_accuracy']:.4f}")
            print(f"Best Validation Loss So Far: {self.best_loss:.4f}")
            if self.early_stop_counter > 0:
                print(f"No improvement in validation loss for {self.early_stop_counter} consecutive epoch(s).")
            if control.should_training_stop:
                print("Early stopping triggered.")

    def on_train_end(self, args, state, control, **kwargs):
        self.progress_bar.close()
        print("Training completed.")


In [12]:
from transformers import BertForSequenceClassification

num_labels = train_labels.shape[1] 

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)
model.to(device)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [14]:
import torch
from torch.utils.data import DataLoader
from transformers import get_scheduler
from torch.optim import AdamW
from torch.nn import BCEWithLogitsLoss
from tqdm import tqdm

# Setup DataLoader and other components
train_loader = DataLoader(dataset_train, batch_size=8, shuffle=True, drop_last=True)
validate_loader = DataLoader(dataset_validate, batch_size=8, shuffle=False, drop_last=True)
optimizer = AdamW(model.parameters(), lr=5e-5)
num_training_steps = len(train_loader) * 10  
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=500, num_training_steps=num_training_steps)
loss_function = BCEWithLogitsLoss()


best_validation_loss = float('inf')

# Training loop
progress_bar = tqdm(total=num_training_steps, desc="Training progress")
model.train()

for epoch in range(10):  # Number of epochs
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = batch.pop('labels')  # Separate labels from inputs
        outputs = model(**batch)
        logits = outputs.logits  # Get model logits
        loss = loss_function(logits, labels.float())  # Calculate loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)
    
    # Validation at the end of each epoch
    model.eval()
    total_eval_loss = 0
    with torch.no_grad():
        for batch in validate_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            labels = batch.pop('labels')
            outputs = model(**batch)
            logits = outputs.logits
            loss = loss_function(logits, labels.float())
            total_eval_loss += loss.item()

    avg_val_loss = total_eval_loss / len(validate_loader)
    print(f"Epoch {epoch+1}, Validation Loss: {avg_val_loss}")

    # Update the best loss and save model if it's the best
    if avg_val_loss < best_validation_loss:
        best_validation_loss = avg_val_loss
        print(f"New Best Validation Loss: {best_validation_loss}")
        model.save_pretrained("./model_save_synth_slots")
        tokenizer.save_pretrained("./model_save_synth_slots")

progress_bar.close()


Training progress:  10%|█████████▋                                                                                       | 151/1510 [00:17<02:33,  8.86it/s]

Epoch 1, Validation Loss: 0.38869460423787433
New Best Validation Loss: 0.38869460423787433


Training progress:  20%|███████████████████▍                                                                             | 302/1510 [00:39<02:14,  8.97it/s]

Epoch 2, Validation Loss: 0.3336358500851525
New Best Validation Loss: 0.3336358500851525


Training progress:  30%|█████████████████████████████                                                                    | 453/1510 [01:01<01:57,  8.97it/s]

Epoch 3, Validation Loss: 0.27900029884444344
New Best Validation Loss: 0.27900029884444344


Training progress:  40%|██████████████████████████████████████▊                                                          | 604/1510 [01:22<01:41,  8.93it/s]

Epoch 4, Validation Loss: 0.2364611973365148
New Best Validation Loss: 0.2364611973365148


Training progress:  50%|████████████████████████████████████████████████▌                                                | 755/1510 [01:44<01:24,  8.89it/s]

Epoch 5, Validation Loss: 0.20277572671572366
New Best Validation Loss: 0.20277572671572366


Training progress:  60%|██████████████████████████████████████████████████████████▏                                      | 906/1510 [02:06<01:07,  8.89it/s]

Epoch 6, Validation Loss: 0.1822800429330932
New Best Validation Loss: 0.1822800429330932


Training progress:  70%|███████████████████████████████████████████████████████████████████▏                            | 1057/1510 [02:28<00:51,  8.88it/s]

Epoch 7, Validation Loss: 0.1593522677818934
New Best Validation Loss: 0.1593522677818934


Training progress:  80%|████████████████████████████████████████████████████████████████████████████▊                   | 1208/1510 [02:50<00:34,  8.86it/s]

Epoch 8, Validation Loss: 0.14834850860966575
New Best Validation Loss: 0.14834850860966575


Training progress:  90%|██████████████████████████████████████████████████████████████████████████████████████▌         | 1361/1510 [03:12<00:27,  5.37it/s]

Epoch 9, Validation Loss: 0.14875762330161202


Training progress: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 1510/1510 [03:29<00:00,  8.85it/s]

Epoch 10, Validation Loss: 0.1474788420730167
New Best Validation Loss: 0.1474788420730167


Training progress: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 1510/1510 [03:35<00:00,  7.02it/s]


In [15]:
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
from torch.utils.data import DataLoader

test_loader = DataLoader(dataset_test, batch_size=8, shuffle=False, drop_last=True)

model.eval()
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

predictions, true_labels = [], []

# Evaluate the model
with torch.no_grad():
    for batch in test_loader:
        inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
        labels = batch['labels'].to(device)
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.sigmoid(logits)  
        preds = (probs > 0.5).int()  
        predictions.append(preds.detach().cpu().numpy())
        true_labels.append(labels.detach().cpu().numpy())


predictions = np.vstack(predictions)
true_labels = np.vstack(true_labels)

# Calculate metrics, consider each label independently
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro') 
accuracy = accuracy_score(true_labels, predictions)  

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Accuracy: 0.6328125
Precision: 0.6572211067372357
Recall: 0.5625514650304566
F1 Score: 0.6026484211772611


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
