In [1]:
import json
import pandas as pd
from transformers import BertTokenizer
from torch.utils.data import Dataset, DataLoader

# Load the dataset
with open('normalised_intent_validation_slotfixed_set.json', 'r') as file: 
    data = json.load(file)

utterances_validate = []
intents_validate = []

# Navigate through the nested structure
for item in data:
    for scenario in item['scenarios']:
        for turn in scenario['turns']:
            if turn["speaker"] == "USER":
                for frame in turn.get("frames", []):
                    service = frame.get("service")
                    active_intent = frame["state"]["active_intent"]
                    
                    # Check if the service is "hotel" or "train" and intent is not "NONE"
                    if service in ["hotel", "train"] and active_intent != "none":
                        utterances_validate.append(turn["utterance"])
                        intents_validate.append(active_intent)

# Create a DataFrame
df_validated = pd.DataFrame({'utterance': utterances_validate, 'intent': intents_validate})


In [2]:
with open('normalised_intent_test_slotfixed_set.json', 'r') as file: 
    data = json.load(file)

utterances_test = []
intents_test = []

# Navigate through the nested structure
for item in data:
    for scenario in item['scenarios']:
        for turn in scenario['turns']:
            if turn["speaker"] == "USER":
                for frame in turn.get("frames", []):
                    service = frame.get("service")
                    active_intent = frame["state"]["active_intent"]
                    
                    # Check if the service is "hotel" or "train" and intent is not "NONE"
                    if service in ["hotel", "train"] and active_intent != "none":
                        utterances_test.append(turn["utterance"])
                        intents_test.append(active_intent)

# Create a DataFrame
df_test = pd.DataFrame({'utterance': utterances_test, 'intent': intents_test})

In [3]:
with open('normalised_intent_train_slotfixed_set.json', 'r') as file: 
    data = json.load(file)

utterances_train = []
intents_train = []

# Navigate through the nested structure
for item in data:
    for scenario in item['scenarios']:
        for turn in scenario['turns']:
            if turn["speaker"] == "USER":
                for frame in turn.get("frames", []):
                    service = frame.get("service")
                    active_intent = frame["state"]["active_intent"]
                    
                    # Check if the service is "hotel" or "train" and intent is not "NONE"
                    if service in ["hotel", "train"] and active_intent != "none":
                        utterances_train.append(turn["utterance"])
                        intents_train.append(active_intent)

df_train = pd.DataFrame({'utterance': utterances_train, 'intent': intents_train})

In [4]:
all_intents = pd.concat([df_train['intent'], df_validated['intent'], df_test['intent']]).unique()
intent_labels = {intent: idx for idx, intent in enumerate(all_intents)}

# Map labels according to the unified dictionary
df_train['label'] = df_train['intent'].map(intent_labels)
df_validated['label'] = df_validated['intent'].map(intent_labels)
df_test['label'] = df_test['intent'].map(intent_labels)


# Prepare labels
train_labels = df_train['label'].tolist()
validate_labels = df_validated['label'].tolist()
test_labels = df_test['label'].tolist()


In [5]:
# Check to ensure no labels are missing or incorrectly mapped
assert df_train['label'].isna().sum() == 0, "Missing labels in training data"
assert df_validated['label'].isna().sum() == 0, "Missing labels in validation data"
assert df_test['label'].isna().sum() == 0, "Missing labels in test data"


In [6]:
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', cache_dir='BERT_cache_folder')

def encode_data(tokenizer, texts, max_length=128):
    return tokenizer(texts, padding='max_length', truncation=True, max_length=max_length, return_tensors='pt')

# Encode 
encoded_inputs_train = encode_data(tokenizer, df_train['utterance'].tolist())
encoded_inputs_validate = encode_data(tokenizer, df_validated['utterance'].tolist())

encoded_inputs_test = encode_data(tokenizer, df_test['utterance'].tolist())


In [7]:
# Check to ensure alignment
assert len(encoded_inputs_train['input_ids']) == len(train_labels), "Mismatch in training data and labels count."
assert len(encoded_inputs_validate['input_ids']) == len(validate_labels), "Mismatch in validation data and labels count."
assert len(encoded_inputs_test['input_ids']) == len(test_labels), "Mismatch in test data and labels count."


In [8]:
class UtteranceDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings 
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() if torch.is_tensor(val[idx]) else val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)  # Convert labels to tensor
        return item

    def __len__(self):
        return len(self.labels) 




dataset_train = UtteranceDataset(encoded_inputs_train, train_labels)
dataset_validate = UtteranceDataset(encoded_inputs_validate, validate_labels)
dataset_test = UtteranceDataset(encoded_inputs_test, test_labels)



In [9]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("CUDA is not available, using CPU instead.")


CUDA is available. Using GPU: NVIDIA GeForce GTX 1080 Ti


In [10]:
from transformers import TrainerCallback, TrainerState, TrainerControl
from tqdm.auto import tqdm

class EarlyStoppingCallback(TrainerCallback):
    def __init__(self, patience=3):
        self.patience = patience
        self.best_loss = float('inf')
        self.early_stop_counter = 0
        self.progress_bar = None

    def on_train_begin(self, args, state, control, **kwargs):
        print("Starting training...")
        self.progress_bar = tqdm(total=state.num_train_epochs)

    def on_epoch_begin(self, args, state, control, **kwargs):
        # No action needed
        pass

    def on_log(self, args, state, control, logs=None, **kwargs):
       
        if logs is not None:
            current_loss = logs.get('eval_loss')
            if current_loss:
                if current_loss < self.best_loss:
                    self.best_loss = current_loss
                    self.early_stop_counter = 0
                    
                else:
                    self.early_stop_counter += 1
                    if self.early_stop_counter >= self.patience:
                        control.should_training_stop = True

    def on_epoch_end(self, args, state, control, logs=None, **kwargs):
        self.progress_bar.update(1)
        if logs:
            print(f"\nEpoch {state.epoch + 1} Summary:")
            if 'loss' in logs:
                print(f"Training Loss: {logs['loss']:.4f}")
            if 'eval_loss' in logs:
                print(f"Validation Loss: {logs['eval_loss']:.4f}")
            if 'eval_accuracy' in logs:
                print(f"Validation Accuracy: {logs['eval_accuracy']:.4f}")
            print(f"Best Validation Loss So Far: {self.best_loss:.4f}")
            if self.early_stop_counter > 0:
                print(f"No improvement in validation loss for {self.early_stop_counter} consecutive epoch(s).")
            if control.should_training_stop:
                print("Early stopping triggered.")

    def on_train_end(self, args, state, control, **kwargs):
        self.progress_bar.close()
        print("Training completed.")


In [17]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(all_intents))

model.to(device) 


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [18]:
print("Number of unique intents:", len(all_intents))


Number of unique intents: 4


In [19]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch

# Calculate class weights based on the training data
class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

In [20]:
import torch
from torch.utils.data import DataLoader
from transformers import get_scheduler, BertForSequenceClassification, BertTokenizer
from torch.optim import AdamW
from tqdm import tqdm
import torch.nn as nn

# Continue with your DataLoader setup
train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True, drop_last=True)
validate_loader = DataLoader(dataset_validate, batch_size=32, shuffle=False, drop_last=True)


optimizer = AdamW(model.parameters(), lr=5e-5)
num_training_steps = len(train_loader) * 10  # Assuming 10 epochs
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=500, num_training_steps=num_training_steps)


loss_function = nn.CrossEntropyLoss(weight=class_weights)


best_validation_loss = float('inf')

# Training loop
progress_bar = tqdm(total=num_training_steps, desc="Training progress")
model.train()

for epoch in range(10):  # Number of epochs
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        logits = outputs.logits
        labels = batch['labels']
        loss = loss_function(logits, labels)
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)
    
    # Validation at the end of each epoch
    model.eval()
    total_eval_loss = 0
    with torch.no_grad():
        for batch in validate_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            logits = outputs.logits
            labels = batch['labels']
            loss = loss_function(logits, labels)
            total_eval_loss += loss.item()

    avg_val_loss = total_eval_loss / len(validate_loader)
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")
    print(f"Validation Loss: {avg_val_loss}")

    # Update the best loss and save model if it's the best
    if avg_val_loss < best_validation_loss:
        best_validation_loss = avg_val_loss
        print(f"New Best Validation Loss: {best_validation_loss}")
        model.save_pretrained("./model_save_synth_intent_temp")
        tokenizer.save_pretrained("./model_save_synth_intent_temp")

progress_bar.close()


Training progress:  10%|█████████▉                                                                                         | 55/550 [00:19<03:00,  2.75it/s]

Epoch 1, Loss: 1.279611349105835
Validation Loss: 1.2684766451517742
New Best Validation Loss: 1.2684766451517742


Training progress:  20%|███████████████████▌                                                                              | 110/550 [00:44<02:37,  2.80it/s]

Epoch 2, Loss: 0.9493752717971802
Validation Loss: 0.8177934288978577
New Best Validation Loss: 0.8177934288978577


Training progress:  30%|█████████████████████████████▍                                                                    | 165/550 [01:08<02:18,  2.78it/s]

Epoch 3, Loss: 0.5583636164665222
Validation Loss: 0.46503254771232605
New Best Validation Loss: 0.46503254771232605


Training progress:  40%|███████████████████████████████████████▏                                                          | 220/550 [01:33<01:59,  2.77it/s]

Epoch 4, Loss: 0.6113447546958923
Validation Loss: 0.4642924467722575
New Best Validation Loss: 0.4642924467722575


Training progress:  50%|█████████████████████████████████████████████████                                                 | 275/550 [01:58<01:40,  2.73it/s]

Epoch 5, Loss: 0.34106290340423584
Validation Loss: 0.35600229104359943
New Best Validation Loss: 0.35600229104359943


Training progress:  60%|██████████████████████████████████████████████████████████▊                                       | 330/550 [02:24<01:21,  2.71it/s]

Epoch 6, Loss: 0.24351772665977478
Validation Loss: 0.30965806047121686
New Best Validation Loss: 0.30965806047121686


Training progress:  70%|████████████████████████████████████████████████████████████████████▊                             | 386/550 [02:50<01:23,  1.97it/s]

Epoch 7, Loss: 0.2674175500869751
Validation Loss: 0.36411969860394794


Training progress:  80%|██████████████████████████████████████████████████████████████████████████████▌                   | 441/550 [03:11<00:56,  1.92it/s]

Epoch 8, Loss: 0.2324085235595703
Validation Loss: 0.3832139770189921


Training progress:  90%|████████████████████████████████████████████████████████████████████████████████████████▍         | 496/550 [03:33<00:28,  1.91it/s]

Epoch 9, Loss: 0.40584537386894226
Validation Loss: 0.3673328260580699


Training progress: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 550/550 [03:54<00:00,  2.61it/s]

Epoch 10, Loss: 0.20053111016750336
Validation Loss: 0.2694485733906428
New Best Validation Loss: 0.2694485733906428


Training progress: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 550/550 [04:04<00:00,  2.25it/s]


In [12]:
import torch
from torch.utils.data import DataLoader
from transformers import get_scheduler
from torch.optim import AdamW
from tqdm import tqdm

# Setup DataLoader and components
train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True, drop_last=True)
validate_loader = DataLoader(dataset_validate, batch_size=32, shuffle=False, drop_last=True)
optimizer = AdamW(model.parameters(), lr=5e-5)
num_training_steps = len(train_loader) * 10  # Assuming 10 epochs
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=500, num_training_steps=num_training_steps)


best_validation_loss = float('inf')

# Training loop
progress_bar = tqdm(total=num_training_steps, desc="Training progress")
model.train()

for epoch in range(10):  # Number of epochs
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)
    
    # Validation at the end of each epoch
    model.eval()
    total_eval_loss = 0
    with torch.no_grad():
        for batch in validate_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            total_eval_loss += loss.item()

    avg_val_loss = total_eval_loss / len(validate_loader)
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")
    print(f"Validation Loss: {avg_val_loss}")

    # Update the best loss and save model if it's the best
    if avg_val_loss < best_validation_loss:
        best_validation_loss = avg_val_loss
        print(f"New Best Validation Loss: {best_validation_loss}")
        model.save_pretrained("./model_save_synth_intent")
        tokenizer.save_pretrained("./model_save_synth_intent")

progress_bar.close()


Training progress:  10%|█████████▉                                                                                         | 55/550 [00:20<02:58,  2.77it/s]

Epoch 1, Loss: 6.511747360229492
Validation Loss: 6.663471062978108
New Best Validation Loss: 6.663471062978108


Training progress:  20%|███████████████████▌                                                                              | 110/550 [00:44<02:38,  2.78it/s]

Epoch 2, Loss: 4.585571765899658
Validation Loss: 4.243161122004191
New Best Validation Loss: 4.243161122004191


Training progress:  30%|█████████████████████████████▍                                                                    | 165/550 [01:09<02:18,  2.78it/s]

Epoch 3, Loss: 2.4873363971710205
Validation Loss: 2.1329411268234253
New Best Validation Loss: 2.1329411268234253


Training progress:  40%|███████████████████████████████████████▏                                                          | 220/550 [01:34<01:59,  2.75it/s]

Epoch 4, Loss: 1.1836904287338257
Validation Loss: 1.211892346541087
New Best Validation Loss: 1.211892346541087


Training progress:  50%|█████████████████████████████████████████████████                                                 | 275/550 [01:59<01:39,  2.76it/s]

Epoch 5, Loss: 0.7390127182006836
Validation Loss: 0.8105296889940897
New Best Validation Loss: 0.8105296889940897


Training progress:  60%|██████████████████████████████████████████████████████████▊                                       | 330/550 [02:24<01:20,  2.74it/s]

Epoch 6, Loss: 0.5064610242843628
Validation Loss: 0.5568306942780813
New Best Validation Loss: 0.5568306942780813


Training progress:  70%|████████████████████████████████████████████████████████████████████▌                             | 385/550 [02:50<01:01,  2.70it/s]

Epoch 7, Loss: 0.33322155475616455
Validation Loss: 0.4429537355899811
New Best Validation Loss: 0.4429537355899811


Training progress:  80%|██████████████████████████████████████████████████████████████████████████████▍                   | 440/550 [03:15<00:41,  2.67it/s]

Epoch 8, Loss: 0.2540509104728699
Validation Loss: 0.40886401136716205
New Best Validation Loss: 0.40886401136716205


Training progress:  90%|████████████████████████████████████████████████████████████████████████████████████████▏         | 495/550 [03:41<00:20,  2.70it/s]

Epoch 9, Loss: 0.30834314227104187
Validation Loss: 0.3633876442909241
New Best Validation Loss: 0.3633876442909241


Training progress: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 550/550 [04:06<00:00,  2.64it/s]

Epoch 10, Loss: 0.22958438098430634
Validation Loss: 0.2923645426829656
New Best Validation Loss: 0.2923645426829656


Training progress: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 550/550 [04:11<00:00,  2.18it/s]


In [13]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False, drop_last=True)  # Set drop_last to True to avoid partial batches


model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

predictions, true_labels = [], []

# Evaluate the model
with torch.no_grad():
    for batch in test_loader:
        inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
        labels = batch['labels'].to(device)
        outputs = model(**inputs)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
        predictions.extend(preds.detach().cpu().numpy())
        true_labels.extend(labels.detach().cpu().numpy())

# Calculate metrics
accuracy = accuracy_score(true_labels, predictions)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')  # adjust 'average' as needed

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Accuracy: 0.8611111111111112
Precision: 0.7798915211213826
Recall: 0.8329687123592884
F1 Score: 0.8010764261984216
