In [1]:
import numpy as np
import argparse
from shutil import copyfile
from create_data import createData
from create_data import divideData

In [3]:
print('Create WOZ-like dialogues. Get yourself a coffee, this might take a while.')
args = argparse.Namespace(
    main_dir="/kaggle/input/multiwoz2-4",
    mwz_ver="2.4",
    target_path="/kaggle/working/"
)

delex_data = createData(args)
print('Divide dialogues...')
divideData(delex_data,args)

Create WOZ-like dialogues. Get yourself a coffee, this might take a while.
Divide dialogues...
# of dialogues: Train 8420, Val 1000, Test 999


In [4]:
import json

def load_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def save_json(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4, ensure_ascii=False)

def add_dialogue_acts(dialogue_acts, data_files):
    for file_path in data_files:
        data = load_json(file_path)
        
        for dialogue in data:
            dialogue_idx = dialogue['dialogue_idx'].replace('.json', '')

            if dialogue_idx in dialogue_acts:
                acts = dialogue_acts[dialogue_idx]

                for turn in dialogue['dialogue']:
                    turn_idx = str(turn['turn_idx'])
                    if turn_idx in acts:
                        turn['dialogue_act'] = acts[turn_idx]

        save_json(data, file_path)


In [5]:
dialogue_acts_path = '/kaggle/input/multiwoz2-4/dialogue_acts.json'
data_files = ['/kaggle/working/train_dials.json', '/kaggle/working/dev_dials.json', '/kaggle/working/test_dials.json']

dialogue_acts = load_json(dialogue_acts_path)

add_dialogue_acts(dialogue_acts, data_files)

print("Dialogue acts have been added to all data files.")

Dialogue acts have been added to all data files.


# Slot Filling

In [6]:
import json
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForTokenClassification, AdamW
from sklearn.metrics import classification_report
import numpy as np
from tqdm import tqdm

In [None]:
class SlotFillingDataset(Dataset):
    def __init__(self, file_path, tokenizer, label2id=None, max_len=128):
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.label2id = label2id
        
        
        if self.label2id is None: # create label id
            # First, get all possible labels from the data
            unique_labels = {'O'}  # Initialize with 'O' tag
            with open(file_path, 'r') as f:
                data = json.load(f)
                for dialogue in data:
                    for turn in dialogue['dialogue']:
                        labels = turn.get('turn_label', [])
                        for slot_name, _ in labels:
                            unique_labels.add(f'B-{slot_name}')
                            unique_labels.add(f'I-{slot_name}')
            
            self.label2id = {label: idx for idx, label in enumerate(sorted(unique_labels))}
        
        self.data = self.load_and_process_data(file_path)
        
    def load_and_process_data(self, file_path):
        with open(file_path, 'r') as f:
            data = json.load(f)
        
        processed_data = []
        for dialogue in data:
            for turn in dialogue['dialogue']:
                text = turn['transcript']
                labels = turn.get('turn_label', [])
                
                encoding = self.tokenizer(
                    text,
                    padding='max_length',
                    truncation=True,
                    max_length=self.max_len,
                    return_tensors='pt'
                )
                
                
                label_ids = ['O'] * len(encoding['input_ids'][0]) # Init labels as 'O'
                
                
                for slot_name, slot_value in labels: # Map slots to BIO tags
                    # Tokenize slot value
                    slot_tokens = self.tokenizer.tokenize(slot_value)
                    text_tokens = self.tokenizer.tokenize(text)
                    
                    # Find slot tokens in text
                    for i in range(len(text_tokens)):
                        if text_tokens[i:i+len(slot_tokens)] == slot_tokens:
                            # Account for [CLS] token
                            label_ids[i+1] = f'B-{slot_name}'
                            for j in range(1, len(slot_tokens)):
                                if i+j+1 < len(label_ids):
                                    label_ids[i+j+1] = f'I-{slot_name}'
                
                label_ids = [self.label2id[label] for label in label_ids]
                labels_tensor = torch.tensor(label_ids)
                
                processed_data.append({
                    'input_ids': encoding['input_ids'][0],
                    'attention_mask': encoding['attention_mask'][0],
                    'labels': labels_tensor
                })
        
        return processed_data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
def collate_fn(batch):
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    labels = torch.stack([item['labels'] for item in batch])
    
    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }

In [None]:
def train_model(model, train_loader, val_loader, device, num_epochs=20):
    optimizer = AdamW(model.parameters(), lr=2e-5)
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            
            loss = outputs.loss
            total_loss += loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)
                
                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )
                
                val_loss += outputs.loss.item()
        
        avg_val_loss = val_loss / len(val_loader)
        print(f'Epoch {epoch + 1}, Validation Loss: {avg_val_loss:.4f}')
        
        # Saving
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'slot_filling_model.pt')

In [7]:
def evaluate_model(model, test_loader, device, id2label):
    model.eval()
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels']
            
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            
            predictions = torch.argmax(outputs.logits, dim=2)
            
            for pred, label, mask in zip(predictions, labels, attention_mask):
                pred = [id2label[p.item()] for p, m in zip(pred, mask) if m.item() == 1]
                label = [id2label[l.item()] for l, m in zip(label, mask) if m.item() == 1]
                
                all_predictions.extend(pred)
                all_labels.extend(label)
    
    return classification_report(all_labels, all_predictions)


# Training

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Using device: cuda


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



In [12]:
train_dataset = SlotFillingDataset('/kaggle/working/train_dials.json', tokenizer)
label2id = train_dataset.label2id

In [13]:
print("Label to ID mapping:", label2id) 

Label to ID mapping: {'B-attraction-area': 0, 'B-attraction-name': 1, 'B-attraction-type': 2, 'B-hospital-department': 3, 'B-hotel-area': 4, 'B-hotel-book day': 5, 'B-hotel-book people': 6, 'B-hotel-book stay': 7, 'B-hotel-internet': 8, 'B-hotel-name': 9, 'B-hotel-parking': 10, 'B-hotel-pricerange': 11, 'B-hotel-stars': 12, 'B-hotel-type': 13, 'B-restaurant-area': 14, 'B-restaurant-book day': 15, 'B-restaurant-book people': 16, 'B-restaurant-book time': 17, 'B-restaurant-food': 18, 'B-restaurant-name': 19, 'B-restaurant-pricerange': 20, 'B-taxi-arriveby': 21, 'B-taxi-departure': 22, 'B-taxi-destination': 23, 'B-taxi-leaveat': 24, 'B-train-arriveby': 25, 'B-train-book people': 26, 'B-train-day': 27, 'B-train-departure': 28, 'B-train-destination': 29, 'B-train-leaveat': 30, 'I-attraction-area': 31, 'I-attraction-name': 32, 'I-attraction-type': 33, 'I-hospital-department': 34, 'I-hotel-area': 35, 'I-hotel-book day': 36, 'I-hotel-book people': 37, 'I-hotel-book stay': 38, 'I-hotel-internet

In [14]:
train_dataset = SlotFillingDataset('/kaggle/working/train_dials.json', tokenizer, label2id)
val_dataset = SlotFillingDataset('/kaggle/working/dev_dials.json', tokenizer, label2id)
test_dataset = SlotFillingDataset('/kaggle/working/test_dials.json', tokenizer, label2id)

In [15]:
# global label2id
# unique_labels = set()
# for dataset in [train_dataset, val_dataset, test_dataset]:
#     for item in dataset.data:
#         unique_labels.update(item['labels'])
# label2id = {label: idx for idx, label in enumerate(sorted(unique_labels))}

In [16]:
# unique_labels = set()
# for dataset in [train_dataset, val_dataset, test_dataset]:
#     for item in dataset.data:
#         unique_labels.update(item['labels'])
# label2id = {label: idx for idx, label in enumerate(sorted(unique_labels))}

# train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=256)


In [17]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=128, collate_fn=collate_fn)


In [18]:
model = BertForTokenClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=len(label2id),
    id2label={v: k for k, v in label2id.items()},
    label2id=label2id
).to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
train_model(model, train_loader, val_loader, device)

Epoch 1/20: 100%|██████████| 443/443 [19:22<00:00,  2.62s/it]


Epoch 1, Validation Loss: 0.0331


Epoch 2/20: 100%|██████████| 443/443 [19:28<00:00,  2.64s/it]


Epoch 2, Validation Loss: 0.0177


Epoch 3/20: 100%|██████████| 443/443 [19:28<00:00,  2.64s/it]


Epoch 3, Validation Loss: 0.0115


Epoch 4/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 4, Validation Loss: 0.0085


Epoch 5/20: 100%|██████████| 443/443 [19:27<00:00,  2.63s/it]


Epoch 5, Validation Loss: 0.0071


Epoch 6/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 6, Validation Loss: 0.0070


Epoch 7/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 7, Validation Loss: 0.0063


Epoch 8/20: 100%|██████████| 443/443 [19:27<00:00,  2.63s/it]


Epoch 8, Validation Loss: 0.0063


Epoch 9/20: 100%|██████████| 443/443 [19:28<00:00,  2.64s/it]


Epoch 9, Validation Loss: 0.0062


Epoch 10/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 10, Validation Loss: 0.0059


Epoch 11/20: 100%|██████████| 443/443 [19:28<00:00,  2.64s/it]


Epoch 11, Validation Loss: 0.0065


Epoch 12/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 12, Validation Loss: 0.0065


Epoch 13/20: 100%|██████████| 443/443 [19:27<00:00,  2.63s/it]


Epoch 13, Validation Loss: 0.0064


Epoch 14/20: 100%|██████████| 443/443 [19:26<00:00,  2.63s/it]


Epoch 14, Validation Loss: 0.0070


Epoch 15/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 15, Validation Loss: 0.0070


Epoch 16/20: 100%|██████████| 443/443 [19:28<00:00,  2.64s/it]


Epoch 16, Validation Loss: 0.0075


Epoch 17/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 17, Validation Loss: 0.0076


Epoch 18/20: 100%|██████████| 443/443 [19:27<00:00,  2.63s/it]


Epoch 18, Validation Loss: 0.0080


Epoch 19/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 19, Validation Loss: 0.0085


Epoch 20/20: 100%|██████████| 443/443 [19:27<00:00,  2.64s/it]


Epoch 20, Validation Loss: 0.0084


# Evaluating

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Using device: cuda


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



In [9]:
train_dataset = SlotFillingDataset('/kaggle/working/train_dials.json', tokenizer)
label2id = train_dataset.label2id

In [10]:
model = BertForTokenClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=len(label2id),
    id2label={v: k for k, v in label2id.items()},
    label2id=label2id
).to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
model.load_state_dict(torch.load('/kaggle/input/slotactdectection/transformers/default/1/slot_filling_model.pt'))

  model.load_state_dict(torch.load('/kaggle/input/slotactdectection/transformers/default/1/best_slot_filling_model.pt'))


<All keys matched successfully>

In [18]:
def evaluate_model(model, test_loader, device, id2label):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)  
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            
            predictions = torch.argmax(outputs.logits, dim=2)
            
            for pred, label, mask in zip(predictions, labels, attention_mask):
                pred = [id2label[p.item()] for p, m in zip(pred, mask) if m.item() == 1]
                label = [id2label[l.item()] for l, m in zip(label, mask) if m.item() == 1]

                all_predictions.append(pred)
                all_labels.append(label)

    flat_predictions = [p for preds in all_predictions for p in preds]
    flat_labels = [l for labels in all_labels for l in labels]
    return flat_labels, flat_predictions

In [None]:
id2label = {v: k for k, v in label2id.items()}

In [21]:
test_dataset = SlotFillingDataset('/kaggle/working/test_dials.json', tokenizer, label2id)
test_loader = DataLoader(test_dataset, batch_size=512, collate_fn=collate_fn)
true_labels, predicted_labels = evaluate_model(model, test_loader, device, id2label)
print("\nTest Set Results:")
print(classification_report(true_labels, predicted_labels))


Test Set Results:
                          precision    recall  f1-score   support

       B-attraction-area       0.88      0.88      0.88       208
       B-attraction-name       0.83      0.87      0.85       108
       B-attraction-type       0.83      0.91      0.87       235
            B-hotel-area       0.82      0.83      0.83       135
        B-hotel-book day       0.86      0.97      0.91       238
     B-hotel-book people       0.82      0.91      0.86       198
       B-hotel-book stay       0.89      0.98      0.93       299
        B-hotel-internet       0.71      0.85      0.77        20
            B-hotel-name       0.90      0.96      0.93       127
         B-hotel-parking       0.89      0.57      0.70        14
      B-hotel-pricerange       0.82      0.83      0.83       215
           B-hotel-stars       0.88      0.98      0.92       167
            B-hotel-type       0.65      0.93      0.76       200
       B-restaurant-area       0.83      0.87      0.85 

In [22]:
from sklearn.metrics import accuracy_score, f1_score

accuracy = accuracy_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels, average='weighted')  

print("\nAccuracy:", accuracy)
print("F1 Score (Weighted):", f1)


Accuracy: 0.9857674891718073
F1 Score (Weighted): 0.9860700347442711


# 