In [None]:
from datasets import load_dataset, load_from_disk
import pandas as pd
import json
pd.set_option('display.max_colwidth', None)

In [None]:
!pip install keras_preprocessing

# Download wordsegment package

In [None]:
!pip install vncorenlp
!mkdir -p vncorenlp/models/wordsegmenter
!wget https://raw.githubusercontent.com/vncorenlp/VnCoreNLP/master/VnCoreNLP-1.1.1.jar
!wget https://raw.githubusercontent.com/vncorenlp/VnCoreNLP/master/models/wordsegmenter/vi-vocab
!wget https://raw.githubusercontent.com/vncorenlp/VnCoreNLP/master/models/wordsegmenter/wordsegmenter.rdr
!mv VnCoreNLP-1.1.1.jar vncorenlp/ 
!mv vi-vocab vncorenlp/models/wordsegmenter/
!mv wordsegmenter.rdr vncorenlp/models/wordsegmenter/

# Load datasets for stage 2

## Load train data

In [None]:
lines = []
with open("/kaggle/input/soict2023-slu/SLU/train_20230909.jsonl") as f:
    for line in f.readlines():
        lines.append(json.loads(line))
train_stage2 = pd.DataFrame(lines)
train_stage2.sample(5)

In [None]:
train_stage2["intent"].unique()

In [None]:
train_stage2["entities"].apply(lambda x: 
                               [list(i.values())[0] for i in x]).explode().unique()

## Word segment

In [None]:
from vncorenlp import VnCoreNLP
rdrsegmenter = VnCoreNLP("vncorenlp/VnCoreNLP-1.1.1.jar", annotators="wseg", max_heap_size='-Xmx500m') 

text = 'Tôn Ngộ Không phò Đường Tăng đi thỉnh kinh tại Tây Trúc'
words = rdrsegmenter.tokenize(text)[0]
print('text_masked_tok: \n', words)

In [None]:
!pip install pandarallel
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True)

In [None]:
# import re
# chars_to_ignore_regex = '[\`\'\,\?\.\!\-\;\:\/"]'

# def preprocessing(x):
#     x = x.strip()
#     words = []
#     for word in x.split():
#         word = word.strip()
#         words.append(word)
#     x = " ".join(words) 
#     x = re.sub(chars_to_ignore_regex, '', x)
#     x = rdrsegmenter.tokenize(x.lower())[0]
#     return x

# train_stage2["words"] = train_stage2["sentence"].parallel_apply(preprocessing)

In [None]:
train_stage2[train_stage2["sentence"].str.contains("/")]

## Replace entities in the sentence

In [None]:
import re
chars_to_ignore_regex = '[\`\'\?\.\!\-\;\/"]'
def remove_special_characters(x):
    x = x.lower().strip()
    x = re.sub(chars_to_ignore_regex, '', x)
    x = re.sub(",", " ", x)
    return x

def replace_entities(x):
    x = remove_special_characters(x)
    replacement = "_"
    x = re.sub(r'(\S*)\[([^:\]]+):\s*([^:\]]+)\](\S*)', 
               lambda match: match.group(1) + " [" + match.group(2) + " : " + match.group(3) + "] " + match.group(4), x)
#     x = re.sub(r'\[([^:\]]+):\s*([^:\]]+)\]', 
#             lambda match: match.group(2).strip().replace(" ", replacement), x)
    return x

def word_segment(x):
    x = rdrsegmenter.tokenize(x)[0]
    return x

def preprocessing(x):
    x = replace_entities(x)
#     x = word_segment(x)
    return x

# train_stage2["words"] = train_stage2["sentence"].parallel_apply(replace_entities).str.split()

## Make words (tokenize the sentence)

In [None]:
import re
def make_words(df):
    labels = []
    # Pattern to extract the annotations [ label : value ]
    pattern = r"\[([^\]]+)\]"

    sentence_standard = remove_special_characters(df["sentence_annotation"])
    words = re.split(r"(\[[^\]]+\])", sentence_standard)
    results = []
    for word in words:
        match = re.findall(pattern, word)
        if match:
            parts = match[0].split(":")
            value = parts[1].strip()
            results.extend(value.split())
        else:
            results.extend(word.strip().split())
    return results 
train_stage2["words"] = train_stage2.parallel_apply(make_words, axis=1)
train_stage2["words"]

## Make tags for the sentences

In [None]:
train_stage2["tags"] = train_stage2["entities"].parallel_apply(lambda x: 
        [{list(i.values())[1].lower().strip().replace(" ", "_"): list(i.values())[0]} for i in x])

In [None]:
train_stage2["tags"]

In [None]:
train_stage2.head(1)

In [None]:
label_mapping = {i:i for i in train_stage2["entities"].apply(lambda x: 
                               [list(i.values())[0] for i in x]).explode().unique()}

import re
def make_tags(df):
    labels = []
    # Pattern to extract the annotations [ label : value ]
    pattern = r"\[([^\]]+)\]"

    sentence_standard = remove_special_characters(df["sentence_annotation"])
    sentence_standard = re.sub(r'(\S*)\[([^:\]]+):\s*([^:\]]+)\](\S*)', 
            lambda match: match.group(1) + " [" + match.group(2) + " : " + match.group(3) + "] " + match.group(4), sentence_standard)
    # Extract annotations from the sentence
    annotations = [tag.strip() for tag in re.findall(pattern, 
                                    sentence_standard)]

    words = re.split(r"(\[[^\]]+\])", sentence_standard)
    for word in words:
        match = re.findall(pattern, word)
        if match:
            parts = match[0].split(":")
            label = parts[0].strip()
            value = parts[1].strip()
            mapped_label = label_mapping.get(label, "O")
            labels.extend([mapped_label] * len(value.split()))
        else:
            labels.extend(["O"] * len(word.split()))
            
    return labels
    
train_stage2["tags"] = train_stage2.parallel_apply(make_tags, axis=1)

In [None]:
train_stage2[["sentence_annotation", "words", "tags"]].sample(3)

In [None]:
wrong_anno = train_stage2[train_stage2["words"].apply(len) != train_stage2["tags"].apply(len)]
wrong_anno

### Vì đề bài không cho relabel lại các annotation bị sai, do đó ta sẽ giữ nguyên mà không fix các annotation này

In [None]:
wrong_anno.shape[0]

Có một số từ bị annotation sai, ta sẽ tiến hành relabel những từ này

In [None]:
# annos = [
#     "[ command : tăng ] [ device : quạt hút mùi ] lên số [ target number : 3 ] vào [ target number : 23 ] giờ 40 phút",
#     "[ command : tăng ] cho em cái [ device : quạt hút mùi ] lên số [ target number : 3 ] vào lúc [ target number : 23 ] giờ 40 phút nhá",
#     "[ command : tăng ] [ device : quạt hút mùi ] lên [ target number : 3 ] lúc [ target number : 23 ] giờ 40 phút",
#     "em ơi [ command : giảm ] cho anh cái [ device : bóng ] ở [ location : phòng thờ ] xuống [ target number : 77% ] với",
#     "[ command : tăng ] cái [ device : quạt hút mùi ] lên số [ target number : 3 ] lúc [ target number : 23 ] giờ 40 phút nhá",
#     "hãy [ command : tăng ] giúp tôi cái [ device : quạt hút mùi ] lên số [ target number : 3 ] vào lúc [ target number : 23 ] giờ 40 phút nhé"
# ]
# train_stage2.loc[wrong_anno.index, "sentence_annotation"] = annos
# train_stage2.loc[wrong_anno.index]

#### Tính lại giá trị `tags` sau khi fix annotation

In [None]:
# train_stage2["tags"] = train_stage2.parallel_apply(make_tags, axis=1)

In [None]:
# train_stage2[train_stage2["words"].apply(len) != train_stage2["tags"].apply(len)]

---

In [None]:
!mkdir data
from sklearn.model_selection import train_test_split
train, test = train_test_split(train_stage2, test_size=0.1, random_state=42)

with open('data/full_stage2.jsonl', 'w', encoding='utf-8') as file:
    train_stage2.to_json("data/full_stage2.jsonl", lines=True, orient="records", force_ascii=False)

with open('data/train_stage2.jsonl', 'w', encoding='utf-8') as file:
    train.to_json("data/train_stage2.jsonl", lines=True, orient="records", force_ascii=False)
    
with open('data/valid_stage2.jsonl', 'w', encoding='utf-8') as file:
    test.to_json("data/valid_stage2.jsonl", lines=True, orient="records", force_ascii=False)

In [None]:
slots = train['tags'].explode().unique()
with open("data/slot", "w") as f:
    for line in slots:
        f.write(line + "\n") 

In [None]:
intents = train['intent'].unique()
with open("data/intent", "w") as f:
    for line in intents:
        f.write(line + "\n") 

In [None]:
slots, intents

# Training the model

In [None]:
%%writefile config.py
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
total_epoch = 500
max_len = 50
batch = 16
learning_rate = 0.001
DROPOUT = 0.2 # 0.2, 0.3, 0.4


embedding_size = 300
lstm_hidden_size = 200


train_file = 'data/train_stage2.jsonl'
valid_file = 'data/valid_stage2.jsonl'

vocab_intent_file = 'data/intent'
vocab_slot_file = 'data/slot'

In [None]:
%%writefile make_dict.py
import pandas as pd
import json
import config as cf

def read_file(filepath):
    lines = []
    with open(filepath) as f:
        for line in f.readlines():
            lines.append(json.loads(line))
    df = pd.DataFrame(lines)
    return df

train_data = read_file(cf.train_file)
train_data["sentence_len"] = train_data["words"].apply(len)
valid_data = read_file(cf.valid_file)

# Xây dựng vocab cho word và tag
words = list(train_data['words'].explode().unique())
slots = list(train_data['tags'].explode().unique())

# Tạo dict word to index, thêm 2 từ đặc biệt là Unknown và Padding
word2idx = {w : i + 2 for i, w in enumerate(words)}
word2idx["UNK"] = 1
word2idx["PAD"] = 0

## Tạo dict slot to index, thêm 1 tag đặc biệt là Padding
# slot2idx = {t : i + 1 for i, t in enumerate(slots)}
# slot2idx["PAD"] = 0

slot2idx = {t : i for i, t in enumerate(slots)}

# Tạo 2 dict index to word và index to slot
idx2word = {i: w for w, i in word2idx.items()}
idx2slot = {i: w for w, i in slot2idx.items()}

# Tạo intent dict 
idx2intent = {i : val for i, val in enumerate(train_data["intent"].unique())}
intent2idx = {val : i for i, val in idx2intent.items()}

print('Number of training samples: ', len(train_data))
print('Number of test samples: ', len(valid_data))
print('Number of words: ', len(word2idx))
print('Number of intent labels: ', len(intent2idx))
print('Number of slot labels', len(slot2idx))

In [None]:
!python make_dict.py

In [None]:
%%writefile model.py
from make_dict import word2idx, intent2idx, slot2idx
import torch 
import torch.nn as nn
import torch.nn.functional as F

from config import device, DROPOUT
import config as cfg


# Bi-model 
class slot_enc(nn.Module):
    def __init__(self, embedding_size, lstm_hidden_size, vocab_size=len(word2idx)):
        super(slot_enc, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_size).to(device)
        self.lstm = nn.LSTM(input_size=embedding_size, hidden_size=lstm_hidden_size, num_layers=2,\
                            bidirectional= True, batch_first=True) #, dropout=DROPOUT)

    def forward(self, x):
        x = self.embedding(x)
        x = F.dropout(x, DROPOUT)       
        x, _ = self.lstm(x)
        x = F.dropout(x, DROPOUT)
        return x 


class slot_dec(nn.Module):
    def __init__(self, lstm_hidden_size, label_size=len(slot2idx)):
        super(slot_dec, self).__init__()
        self.lstm = nn.LSTM(input_size=lstm_hidden_size*5, hidden_size=lstm_hidden_size, num_layers=1)
        self.fc = nn.Linear(lstm_hidden_size, label_size)
        self.hidden_size = lstm_hidden_size

    def forward(self, x, hi):
        batch = x.size(0)
        length = x.size(1)
        dec_init_out = torch.zeros(batch, 1, self.hidden_size).to(device)
        hidden_state = (torch.zeros(1, 1, self.hidden_size).to(device), \
                        torch.zeros(1, 1, self.hidden_size).to(device))
        x = torch.cat((x, hi), dim=-1)

        x = x.transpose(1, 0)  # 50 x batch x feature_size
        x = F.dropout(x, DROPOUT)
        all_out = []
        for i in range(length):
            if i == 0:
                out, hidden_state = self.lstm(torch.cat((x[i].unsqueeze(1), dec_init_out), dim=-1), hidden_state)
            else:
                out, hidden_state = self.lstm(torch.cat((x[i].unsqueeze(1), out), dim=-1), hidden_state)
            all_out.append(out)
        output = torch.cat(all_out, dim=1) # 50 x batch x feature_size
        x = F.dropout(x, DROPOUT)
        res = self.fc(output)
        return res 



class intent_enc(nn.Module):
    def __init__(self, embedding_size, lstm_hidden_size, vocab_size=len(word2idx)):
        super(intent_enc, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_size).to(device)
        # self.embedding.weight.data.uniform_(-1.0, 1.0)
        self.lstm = nn.LSTM(input_size=embedding_size, hidden_size= lstm_hidden_size, num_layers=2,\
                            bidirectional= True, batch_first=True, dropout=DROPOUT)
    
    def forward(self, x):
        x = self.embedding(x)
        x = F.dropout(x, DROPOUT)
        x, _ = self.lstm(x)
        x = F.dropout(x, DROPOUT)
        return x


class intent_dec(nn.Module):
    def __init__(self, lstm_hidden_size, label_size=len(intent2idx)):
        super(intent_dec, self).__init__()
        self.lstm = nn.LSTM(input_size=lstm_hidden_size*4, hidden_size=lstm_hidden_size, batch_first=True, num_layers=1)#, dropout=DROPOUT)
        self.fc = nn.Linear(lstm_hidden_size, label_size)
        
    def forward(self, x, hs, real_len):
        batch = x.size()[0]
        real_len = torch.tensor(real_len).to(device)
        x = torch.cat((x, hs), dim=-1)
        x = F.dropout(x, DROPOUT)
        x, _ = self.lstm(x)
        x = F.dropout(x, DROPOUT)

        index = torch.arange(batch).long().to(device)
        state = x[index, real_len-1, :]
        
        res = self.fc(state.squeeze())
        return res
        


class Intent(nn.Module):
    def __init__(self):
        super(Intent, self).__init__()
        self.enc = intent_enc(cfg.embedding_size, cfg.lstm_hidden_size).to(device)
        self.dec = intent_dec(cfg.lstm_hidden_size).to(device)
        self.share_memory = torch.zeros(cfg.batch, cfg.max_len, cfg.lstm_hidden_size * 2).to(device)
    

class Slot(nn.Module):
    def __init__(self):
        super(Slot, self).__init__()
        self.enc = slot_enc(cfg.embedding_size, cfg.lstm_hidden_size).to(device)
        self.dec = slot_dec(cfg.lstm_hidden_size).to(device)
        self.share_memory = torch.zeros(cfg.batch, cfg.max_len, cfg.lstm_hidden_size * 2).to(device)

In [None]:
%%writefile utils.py
import torch
import numpy as np
from torch.nn import functional as F
from config import max_len, batch
from make_dict import slot2idx


def make_mask(real_len, max_len=max_len, label_size=len(slot2idx), batch=batch):
    mask = torch.zeros(batch, max_len, label_size)
    for index, item in enumerate(real_len):
        mask[index, :item, :] = 1.0
    return mask


def masked_log_softmax(vector: torch.Tensor, mask: torch.Tensor, dim: int = -1) -> torch.Tensor:
    if mask is not None:
        mask = mask.float()
        while mask.dim() < vector.dim():
            mask = mask.unsqueeze(1)

        vector = vector + (mask + 1e-45).log()
    return torch.nn.functional.log_softmax(vector, dim=dim)


def one_hot(array, Num=len(slot2idx), maxlen=max_len):

    shape = array.size()
    batch = shape[0]
    if len(shape) == 1:
        res = torch.zeros(batch, Num)
        for i in range(batch):
            res[i][array[i]] = 1
    else:
        res = torch.zeros(batch, maxlen, Num)
        for i in range(batch):
            for j in range(maxlen):
                if array[i, j] == Num:
                    pass
                else:
                    res[i][j][array[i, j]] = 1
    return res

import random

def get_batch(data, batch_size=batch):
    random.shuffle(data)
    sindex = 0
    eindex = batch_size
    while eindex < len(data):

        sentence = []
        real_len = []
        slot_label = []
        intent_label = []
         
        batch = data[sindex:eindex]
        for m in range(sindex, eindex):
            sentence.append(data[m][0])
            real_len.append(data[m][1])
            slot_label.append(data[m][2])
            intent_label.append(data[m][3])

        temp = eindex
        eindex = eindex + batch_size
        sindex = temp

        yield (sentence, real_len, slot_label, intent_label)

def get_chunks(labels):
    chunks = []
    start_idx,end_idx = 0,0
    for idx in range(1,len(labels)-1):
        chunkStart, chunkEnd = False,False
        if labels[idx-1] not in ('O', '<pad>', '<unk>', '<s>', '</s>', '<STOP>', '<START>'):
            prevTag, prevType = labels[idx-1][:1], labels[idx-1][2:]
        else:
            prevTag, prevType = 'O', 'O'
        if labels[idx] not in ('O', '<pad>', '<unk>', '<s>', '</s>', '<STOP>', '<START>'):
            Tag, Type = labels[idx][:1], labels[idx][2:]
        else:
            Tag, Type = 'O', 'O'
        if labels[idx+1] not in ('O', '<pad>', '<unk>', '<s>', '</s>', '<STOP>', '<START>'):
            nextTag, nextType = labels[idx+1][:1], labels[idx+1][2:]
        else:
            nextTag, nextType = 'O', 'O'

        if (Tag == 'B' and prevTag in ('B', 'I', 'O')) or (prevTag, Tag) in [('O', 'I'), ('E', 'E'), ('E', 'I'), ('O', 'E')]:
            chunkStart = True
        if Tag != 'O' and prevType != Type:
            chunkStart = True

        if (Tag in ('B','I') and nextTag in ('B','O')) or (Tag == 'E' and nextTag in ('E', 'I', 'O')):
            chunkEnd = True
        if Tag != 'O' and Type != nextType:
            chunkEnd = True

        if chunkStart:
            start_idx = idx
        if chunkEnd:
            end_idx = idx
            chunks.append((start_idx,end_idx,Type))
            start_idx,end_idx = 0,0
    return chunks

In [None]:
%%writefile make_data.py
from make_dict import train_data, valid_data, word2idx, slot2idx, intent2idx
from config import max_len
import numpy as np
from keras_preprocessing.sequence import pad_sequences

def make_idxdata(data):
    sentences = data["words"]
    slots = data["tags"]
    
    # Chuyển các câu về dạng vector of index
    sentence_idx = [[word2idx.get(w, word2idx['UNK']) for w in s] for s in sentences.values]

    # Padding các câu về max_len
    sentence_idx = pad_sequences(maxlen = max_len, sequences = sentence_idx, padding = "post", value = word2idx["PAD"]).tolist()

    # Chuyển các slot về dạng index
    slot_idx = [[slot2idx[w] for w in s] for s in slots.values]

    # Tiến hành padding về max_len
    slot_idx = pad_sequences(maxlen = max_len, sequences = slot_idx, padding = "post", value = slot2idx["O"]).tolist()

    # Chuyển intent về index
    intent_idx = [intent2idx[s] for s in data["intent"].values]
#     print(max_len)
    return list(zip(sentence_idx, sentences.apply(len).values, slot_idx, intent_idx))

train_data = make_idxdata(train_data)
valid_data = make_idxdata(valid_data)
# print(valid_data)

In [None]:
%%writefile train.py
from torch import optim
import numpy as np
import torch
import pandas as pd

import utils
from utils import get_chunks
from config import device
import config as cfg
from make_dict import idx2slot
from make_data import train_data, valid_data
from model import *
from collections import Counter

epoch_num = cfg.total_epoch

slot_model = Slot().to(device)
intent_model = Intent().to(device)

print(slot_model)
print(intent_model)

slot_optimizer = optim.Adam(slot_model.parameters(), lr=cfg.learning_rate)       # optim.Adamax
intent_optimizer = optim.Adam(intent_model.parameters(), lr=cfg.learning_rate)   # optim.Adamax

best_correct_num = 0
best_epoch = -1
best_F1_score = 0.0
best_epoch_slot = -1
best_epoch_utterance = -1
best_utterance = 0

for epoch in range(epoch_num):
    slot_loss_history = []
    intent_loss_history = []
    for batch_index, data in enumerate(utils.get_batch(train_data)):

        # Preparing data
        sentence, real_len, slot_label, intent_label = data

        mask = utils.make_mask(real_len).to(device)
        x = torch.tensor(sentence).to(device)
        y_slot = torch.tensor(slot_label).to(device)
        y_slot = utils.one_hot(y_slot).to(device)
        y_intent = torch.tensor(intent_label).to(device)
        y_intent = utils.one_hot(y_intent, Num=15).to(device)

        # Calculate compute graph
        slot_optimizer.zero_grad()
        intent_optimizer.zero_grad()

        hs = slot_model.enc(x)
        slot_model.share_memory = hs.clone()

        hi = intent_model.enc(x)
        intent_model.share_memory = hi.clone()


        slot_logits = slot_model.dec(hs, intent_model.share_memory.detach())
        log_slot_logits = utils.masked_log_softmax(slot_logits, mask, dim=-1)
        slot_loss = -1.0*torch.sum(y_slot*log_slot_logits)
        slot_loss_history.append(slot_loss.item())
        slot_loss.backward()
        torch.nn.utils.clip_grad_norm_(slot_model.parameters(), 5.0)
        slot_optimizer.step()

        # Asynchronous training
        intent_logits = intent_model.dec(hi, slot_model.share_memory.detach(), real_len)
        log_intent_logits = F.log_softmax(intent_logits, dim=-1)
        intent_loss = -1.0*torch.sum(y_intent*log_intent_logits)
        intent_loss_history.append(intent_loss.item())
        intent_loss.backward()
        torch.nn.utils.clip_grad_norm_(intent_model.parameters(), 5.0)
        intent_optimizer.step()
        
        # Log
        if batch_index % 100 == 0 and batch_index > 0:
            print('Slot loss: {:.4f} \t Intent loss: {:.4f}'.format(sum(slot_loss_history[-100:])/100.0, \
                sum(intent_loss_history[-100:])/100.0))

    # Evaluation 
    total_valid = len(valid_data)
    correct_num = 0
    TP, FP, FN = 0, 0, 0
    utterance_true = 0
    for batch_index, data_test in enumerate(utils.get_batch(valid_data, batch_size=1)):
        sentence_test, real_len_test, slot_label_test, intent_label_test = data_test
        # print(sentence[0].shape, real_len.shape, slot_label.shape)
        x_test = torch.tensor(sentence_test).to(device)

        mask_test = utils.make_mask(real_len_test, batch=1).to(device)
        # Slot model generate hs_test and intent model generate hi_test
        hs_test = slot_model.enc(x_test)
        hi_test = intent_model.enc(x_test)

        # Slot
        slot_logits_test = slot_model.dec(hs_test, hi_test)
        log_slot_logits_test = utils.masked_log_softmax(slot_logits_test, mask_test, dim=-1)
        slot_pred_test = torch.argmax(log_slot_logits_test, dim=-1)
        # Intent
        intent_logits_test = intent_model.dec(hi_test, hs_test, real_len_test)
        log_intent_logits_test = F.log_softmax(intent_logits_test, dim=-1)
        res_test = torch.argmax(log_intent_logits_test, dim=-1)
        

        if res_test.item() == intent_label_test[0]:
            correct_num += 1
        if correct_num > best_correct_num:
            best_correct_num = correct_num
            best_epoch = epoch
#             # Save and load the entire model.
#             torch.save(intent_model, 'model_intent_best.ckpt')
#             torch.save(slot_model, 'model_slot_best.ckpt')
    
        # Calc slot F1 score
        
        slot_pred_test = slot_pred_test[0][:real_len_test[0]]
        slot_label_test = slot_label_test[0][:real_len_test[0]]

        slot_pred_test = [int(item) for item in slot_pred_test]
        slot_label_test = [int(item) for item in slot_label_test]

        slot_pred_test = [idx2slot[item] for item in slot_pred_test]
        slot_label_test = [idx2slot[item] for item in slot_label_test]

        pred_chunks = get_chunks(['O'] + slot_pred_test + ['O'])
        label_chunks = get_chunks(['O'] + slot_label_test + ['O'])
        for pred_chunk in pred_chunks:
            if pred_chunk in label_chunks:
                TP += 1
            else:
                FP += 1
        for label_chunk in label_chunks:
            if label_chunk not in pred_chunks:
                FN += 1
                
        if (res_test.item() == intent_label_test[0]) and (Counter(pred_chunks) == Counter(label_chunks)):
            utterance_true += 1
        if utterance_true > best_utterance:
            best_utterance = utterance_true
            best_epoch_utterance = epoch
            # Save and load the entire model.
            torch.save(intent_model, 'model_intent_best.ckpt')
            torch.save(slot_model, 'model_slot_best.ckpt')

    F1_score = 100.0*2*TP/(2*TP+FN+FP)
    if F1_score > best_F1_score:
        best_F1_score = F1_score
        best_epoch_slot = epoch
        
    print('*'*20)
    print('Epoch: [{}/{}], Intent Val Acc: {:.4f} \t Slot F1 score: {:.4f} \t Utterance Accuracy: {:.4f}'.format(epoch+1, epoch_num, 100.0*correct_num/total_valid, F1_score, utterance_true/total_valid))
    print('*'*20)
    
    print('Best Intent Acc: {:.4f} at Epoch: [{}]'.format(100.0*best_correct_num/total_valid, best_epoch+1))
    print('Best F1 score: {:.4f} at Epoch: [{}]'.format(best_F1_score, best_epoch_slot+1))
    print('Best Utterance Acc: {:.4f} at Epoch: [{}]'.format(best_utterance/total_valid, best_epoch_utterance+1))


In [None]:
!python train.py

# Load model weights

In [None]:
import torch
slot_model = torch.load("/kaggle/working/model_slot_best.ckpt")
intent_model = torch.load("/kaggle/working/model_intent_best.ckpt")

In [None]:
from make_data import valid_data

In [None]:
import utils
import torch.nn.functional as F 
from make_dict import idx2intent, idx2slot, idx2word
device = "cuda"

for batch_index, data_test in enumerate(utils.get_batch(valid_data[-10:], batch_size=1)):
    sentence_test, real_len_test, slot_label_test, intent_label_test = data_test
    x_test = torch.tensor(sentence_test).to(device)
    mask_test = utils.make_mask(real_len_test, batch=1).to(device)
#     print(x_test)
    # Slot model generate hs_test and intent model generate hi_test
    hs_test = slot_model.enc(x_test)
    hi_test = intent_model.enc(x_test)

    # Slot
    slot_logits_test = slot_model.dec(hs_test, hi_test)
    log_slot_logits_test = utils.masked_log_softmax(slot_logits_test, mask_test, dim=-1)
    slot_pred_test = torch.argmax(log_slot_logits_test, dim=-1)
    # Intent
    intent_logits_test = intent_model.dec(hi_test, hs_test, real_len_test)
    log_intent_logits_test = F.log_softmax(intent_logits_test, dim=-1)
    res_test = torch.argmax(log_intent_logits_test, dim=-1)
    
    print("Itent: ")
    print("Predict: ", idx2intent[res_test.item()])
    print("Label: ", idx2intent[intent_label_test[0]])
    
    print("Slot: ")
    slot_pred_test = slot_pred_test[0][:real_len_test[0]]
    slot_label_test = slot_label_test[0][:real_len_test[0]]

    slot_pred_test = [int(item) for item in slot_pred_test]
    slot_label_test = [int(item) for item in slot_label_test]

    slot_pred_test = [idx2slot[item] for item in slot_pred_test]
    slot_label_test = [idx2slot[item] for item in slot_label_test]
    print("Predict: " , slot_pred_test)
    print("Labels: ", slot_label_test)
    print("Sentence: ", [[idx2word[w] for w in item] for item in sentence_test])
    print("=================")