In [1]:
# ! pip install torchtext==0.10.1
! pip install torchtext==0.6.0
! pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchtext==0.6.0
  Downloading torchtext-0.6.0-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m948.8 kB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.98-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m41.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sentencepiece, torchtext
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.15.1
    Uninstalling torchtext-0.15.1:
      Successfully uninstalled torchtext-0.15.1
Successfully installed sentencepiece-0.1.98 torchtext-0.6.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.12.0-py3-none-any.whl (

In [2]:
from datasets import load_dataset

import torch
from torchtext import data

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [28]:
MODEL_CONFIG = "IMPROVEMENTS"
print("Using MODEL_CONFIG", MODEL_CONFIG)

Using MODEL_CONFIG IMPROVEMENTS


In [29]:
PROJECT_ROOT = F"/content/gdrive/My Drive/nlp_project_task_1/"

In [30]:
SEED = 42
MAX_VOCAB_SIZE = 25_000

In [31]:
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [32]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [33]:
faithdial_dataset = load_dataset("McGill-NLP/FaithDial")



  0%|          | 0/7 [00:00<?, ?it/s]

In [34]:
faithdial_dataset.keys()

dict_keys(['test', 'test_random_split', 'test_topic_split', 'train', 'validation', 'valid_random_split', 'valid_topic_split'])

In [35]:
faithdial_dataset["train"][0]

{'dialog_idx': 0,
 'response': 'Yeah, but once the access to the internet was a rare thing. do you remember?',
 'original_response': "No I could not! I couldn't imagine living when internet access was rare and very few people had it!",
 'history': ['Can you imagine the world without internet access?'],
 'knowledge': 'Internet access was once rare, but has grown rapidly.',
 'BEGIN': ['Hallucination'],
 'VRM': ['Disclosure', 'Ack.']}

In [36]:
def critic_preprocess(dataset):
    """
    Data items transformed into (knowledge, response, is_hallucination)
    """
    new_dataset = []
    for d in dataset:
        # original response
        if d["original_response"] != None:
            new_dataset.append({
                "knowledge": d["knowledge"],
                "response": d["original_response"],
                "hallucination": "yes" if "Hallucination" in d["BEGIN"] else "no",
                "history": " ".join(d["history"]),
                "all": " ".join(d["history"]) + " <eos> " + d["knowledge"] + " <eos> " + d["original_response"]
            })

        # new responses always aren't hallucinations
        new_dataset.append({"knowledge": d["knowledge"],
                            "response": d["response"],
                            "hallucination": "no",
                            "history": " ".join(d["history"]),
                            "all": " ".join(d["history"]) + " <eos> " + d["knowledge"] + " <eos> " + d["response"]
        })
    return new_dataset

In [37]:
import json

def dump_as_json(dataset, filename):
    """
    Takes a list of dicts and dumps it as a json file that torchtext can parse.
    """
    with open(filename, "w") as file:
        for d in dataset:
            file.write(json.dumps(d))
            file.write("\n")


In [41]:
KNOWLEDGE = data.Field(tokenize='spacy', tokenizer_language="en_core_web_sm", include_lengths = True)
RESPONSE = data.Field(tokenize='spacy', tokenizer_language="en_core_web_sm", include_lengths = True)
HISTORY = data.Field(tokenize='spacy', tokenizer_language="en_core_web_sm", include_lengths = True)
LABEL = data.LabelField(dtype=torch.float)

# ALL = data.Field(tokenize='spacy', tokenizer_language="en_core_web_sm", include_lengths = True)

In [42]:
dump_as_json(critic_preprocess(faithdial_dataset["test"]), PROJECT_ROOT + "data/faithdial_dataset_test.json")
dump_as_json(critic_preprocess(faithdial_dataset["train"]), PROJECT_ROOT + "data/faithdial_dataset_train.json")
dump_as_json(critic_preprocess(faithdial_dataset["validation"]), PROJECT_ROOT + "data/faithdial_dataset_validation.json")

In [43]:
fields = {"knowledge": ("k", KNOWLEDGE), "response": ("r", RESPONSE), "hallucination": ("l", LABEL), "history": ("h", HISTORY)}
# fields = {"all": ("a", ALL), "hallucination": ("l", LABEL)}

dataset = data.TabularDataset.splits(path=PROJECT_ROOT + "data",
                                     train="faithdial_dataset_train.json",
                                     validation="faithdial_dataset_validation.json",
                                     test="faithdial_dataset_test.json",
                                     format="json",
                                     fields=fields)


In [44]:
train_data, valid_data, test_data = dataset

In [45]:
train_data[0]

<torchtext.data.example.Example at 0x7fde8a089d20>

In [46]:
vars(train_data.examples[0])

{'k': ['Internet',
  'access',
  'was',
  'once',
  'rare',
  ',',
  'but',
  'has',
  'grown',
  'rapidly',
  '.'],
 'r': ['No',
  'I',
  'could',
  'not',
  '!',
  'I',
  'could',
  "n't",
  'imagine',
  'living',
  'when',
  'internet',
  'access',
  'was',
  'rare',
  'and',
  'very',
  'few',
  'people',
  'had',
  'it',
  '!'],
 'l': 'yes',
 'h': ['Can',
  'you',
  'imagine',
  'the',
  'world',
  'without',
  'internet',
  'access',
  '?']}

In [47]:
KNOWLEDGE.build_vocab(train_data,
                      max_size=MAX_VOCAB_SIZE,
                      vectors = "fasttext.simple.300d",
                      unk_init = torch.Tensor.normal_)
RESPONSE.build_vocab(train_data,
                     max_size=MAX_VOCAB_SIZE,
                     vectors = "fasttext.simple.300d",
                     unk_init = torch.Tensor.normal_)
HISTORY.build_vocab(train_data,
                    max_size=MAX_VOCAB_SIZE,
                    vectors = "fasttext.simple.300d",
                    unk_init = torch.Tensor.normal_)
LABEL.build_vocab(train_data)

# ALL.build_vocab(train_data,
#                 specials=["<eos>"],
#                 max_size=MAX_VOCAB_SIZE,
#                 vectors = "fasttext.simple.300d",
#                 unk_init = torch.Tensor.normal_)


In [48]:
print(f"Unique tokens in KNOWLEDGE vocabulary: {len(KNOWLEDGE.vocab)}")
print(f"Unique tokens in RESPONSE vocabulary: {len(RESPONSE.vocab)}")
print(f"Unique tokens in HISTORY vocabulary: {len(HISTORY.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

Unique tokens in KNOWLEDGE vocabulary: 24539
Unique tokens in RESPONSE vocabulary: 25002
Unique tokens in HISTORY vocabulary: 22340
Unique tokens in LABEL vocabulary: 2


In [49]:
print(KNOWLEDGE.vocab.freqs.most_common(20))
print(RESPONSE.vocab.freqs.most_common(20))
print(HISTORY.vocab.freqs.most_common(20))
print(LABEL.vocab.freqs.most_common(20))

[(',', 61720), ('the', 38383), ('.', 33816), ('and', 28891), ('of', 26828), ('a', 20285), ('in', 19499), ('is', 17156), ("''", 16411), ('to', 12365), ('or', 9842), ('as', 9598), (')', 8496), ('-', 8327), ('(', 8142), ('The', 7870), ('by', 6639), ('with', 5784), ('for', 5683), ('are', 5067)]
[('.', 32770), (',', 31530), ('the', 24162), ('I', 18471), ('a', 14945), ('and', 13738), ('of', 13482), ('is', 12016), ('in', 11802), ('that', 10738), ('to', 10728), ('you', 8747), ('it', 8115), ('?', 7405), ('know', 6604), ('are', 6421), ('!', 6137), ("'s", 6079), ('have', 5205), ('but', 4822)]
[('.', 127919), ('I', 114029), (',', 110960), ('the', 73662), ('?', 70486), ('a', 59059), ('you', 53372), ('to', 45803), ('of', 44987), ('that', 43462), ('is', 43001), ('know', 37626), ('and', 37304), ('it', 36219), ('in', 32770), ('do', 27194), ('!', 24244), ('have', 23839), ("'s", 23079), ('are', 20923)]
[('no', 20474), ('yes', 13507)]


In [50]:
print(KNOWLEDGE.vocab.itos[:10])
print(RESPONSE.vocab.itos[:10])
print(HISTORY.vocab.itos[:10])
print(LABEL.vocab.itos[:10])

['<unk>', '<pad>', ',', 'the', '.', 'and', 'of', 'a', 'in', 'is']
['<unk>', '<pad>', '.', ',', 'the', 'I', 'a', 'and', 'of', 'is']
['<unk>', '<pad>', '.', 'I', ',', 'the', '?', 'a', 'you', 'to']
['no', 'yes']


In [51]:
BATCH_SIZE = 64

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    sort_key = lambda x: x.r,
    device = device)

In [52]:
from torch import nn

class LSTM(nn.Module):
    def __init__(self, response_vocab_size, knowledge_vocab_size, history_vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
                 bidirectional, dropout, response_pad_idx, knowledge_pad_idx, history_pad_idx):

        super().__init__()

        # Initialize Embedding Layer
        self.response_embedding = nn.Embedding(num_embeddings=response_vocab_size,
                                               embedding_dim=embedding_dim,
                                               padding_idx=response_pad_idx)

        self.knowledge_embedding = nn.Embedding(num_embeddings=knowledge_vocab_size,
                                                embedding_dim=embedding_dim,
                                                padding_idx=knowledge_pad_idx)
        
        self.history_embedding = nn.Embedding(num_embeddings=history_vocab_size,
                                              embedding_dim=embedding_dim,
                                              padding_idx=history_pad_idx)

        # Initialize LSTM layer
        self.response_lstm = nn.LSTM(input_size=embedding_dim,
                                     hidden_size=hidden_dim,
                                     num_layers=n_layers,
                                     bidirectional=bidirectional)

        self.knowledge_lstm = nn.LSTM(input_size=embedding_dim,
                                      hidden_size=hidden_dim,
                                      num_layers=n_layers,
                                      bidirectional=bidirectional)
        
        self.history_lstm = nn.LSTM(input_size=embedding_dim,
                                    hidden_size=hidden_dim,
                                    num_layers=n_layers,
                                    bidirectional=bidirectional)

        # Initialize a fully connected layer with Linear transformation
        self.fc = nn.Linear(in_features=3*2*hidden_dim,
                            out_features=output_dim)

        # Initialize Dropout
        self.dropout = nn.Dropout(dropout)

    def forward(self, response, response_lengths, knowledge, knowledge_lengths, history, history_lengths):
        # Apply embedding layer that matches each word to its vector and apply dropout. Dim [sent_len, batch_size, emb_dim]
        x_r = self.response_embedding(response)
        x_r = self.dropout(x_r)

        x_k = self.knowledge_embedding(knowledge)
        x_k = self.dropout(x_k)

        x_h = self.history_embedding(history)
        x_h = self.dropout(x_h)

        # Run the LSTM along the sentences of length sent_len.
        output_r, (hidden_r, cell_r) = self.response_lstm(x_r)
        output_k, (hidden_k, cell_k) = self.knowledge_lstm(x_k)
        output_h, (hidden_h, cell_h) = self.history_lstm(x_h)

        # Concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers and apply dropout
        hidden_r = torch.cat((hidden_r[-2,:,:], hidden_r[-1,:,:]), -1)
        hidden_k = torch.cat((hidden_k[-2,:,:], hidden_k[-1,:,:]), -1)
        hidden_h = torch.cat((hidden_h[-2,:,:], hidden_h[-1,:,:]), -1)
        hidden = torch.cat((hidden_r, hidden_k, hidden_h), -1)
        hidden = self.dropout(hidden)

        return self.fc(hidden)

In [53]:
# from torch import nn

# class ConcatLSTM(nn.Module):
#     def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
#                  bidirectional, dropout, pad_idx):

#         super().__init__()

#         # Initialize Embedding Layer
#         self.embedding = nn.Embedding(num_embeddings=vocab_size,
#                                                embedding_dim=embedding_dim,
#                                                padding_idx=pad_idx)

#         # Initialize LSTM layer
#         self.lstm = nn.LSTM(input_size=embedding_dim,
#                             hidden_size=hidden_dim,
#                             num_layers=n_layers,
#                             bidirectional=bidirectional)

#         # Initialize a fully connected layer with Linear transformation
#         self.fc = nn.Linear(in_features=2*hidden_dim,
#                             out_features=output_dim)

#         # Initialize Dropout
#         self.dropout = nn.Dropout(dropout)

#     def forward(self, all, all_lengths):
#         # Apply embedding layer that matches each word to its vector and apply dropout. Dim [sent_len, batch_size, emb_dim]
#         x = self.embedding(all)
#         x = self.dropout(x)

#         # Run the LSTM along the sentences of length sent_len.
#         output, (hidden, cell) = self.lstm(x)

#         # Concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers and apply dropout
#         hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), -1)
#         hidden = self.dropout(hidden)

#         return self.fc(hidden)

In [54]:
RESPONSE_INPUT_DIM = len(RESPONSE.vocab)
KNOWLEDGE_INPUT_DIM = len(KNOWLEDGE.vocab)
HISTORY_INPUT_DIM = len(HISTORY.vocab)
EMBEDDING_DIM = 300
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
RESPONSE_PAD_IDX = RESPONSE.vocab.stoi[RESPONSE.pad_token]
KNOWLEDGE_PAD_IDX = KNOWLEDGE.vocab.stoi[KNOWLEDGE.pad_token]
HISTORY_PAD_IDX = HISTORY.vocab.stoi[HISTORY.pad_token]


model = LSTM(RESPONSE_INPUT_DIM,
             KNOWLEDGE_INPUT_DIM,
             HISTORY_INPUT_DIM,
             EMBEDDING_DIM,
             HIDDEN_DIM,
             OUTPUT_DIM,
             N_LAYERS,
             BIDIRECTIONAL,
             DROPOUT,
             RESPONSE_PAD_IDX,
             KNOWLEDGE_PAD_IDX,
             HISTORY_PAD_IDX)

In [55]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 29,725,069 trainable parameters


In [56]:
print(RESPONSE.vocab.vectors.shape)
print(KNOWLEDGE.vocab.vectors.shape)
print(HISTORY.vocab.vectors.shape)

torch.Size([25002, 300])
torch.Size([24539, 300])
torch.Size([22340, 300])


In [57]:
model.response_embedding.weight.data.copy_(RESPONSE.vocab.vectors)
model.knowledge_embedding.weight.data.copy_(KNOWLEDGE.vocab.vectors)
model.history_embedding.weight.data.copy_(HISTORY.vocab.vectors)

tensor([[ 1.6184,  0.0979,  1.1739,  ..., -2.3628,  0.8702, -1.4477],
        [ 0.1974, -0.0730, -0.4950,  ..., -1.1911,  0.0557,  1.7617],
        [ 0.0569, -0.0520,  0.2733,  ..., -0.0695, -0.1606, -0.0989],
        ...,
        [-0.3497,  0.0489, -0.1498,  ..., -0.0549,  0.0259, -0.2769],
        [ 0.5550,  0.1573, -0.2594,  ...,  0.0232,  0.1095, -0.0642],
        [ 0.0820,  0.3241, -0.4957,  ...,  0.1171,  0.1461, -0.3180]])

In [58]:
UNK_IDX_R = RESPONSE.vocab.stoi[RESPONSE.unk_token]
UNK_IDX_K = RESPONSE.vocab.stoi[KNOWLEDGE.unk_token]
UNK_IDX_H = RESPONSE.vocab.stoi[HISTORY.unk_token]

model.response_embedding.weight.data[UNK_IDX_R] = torch.zeros(EMBEDDING_DIM)
model.response_embedding.weight.data[RESPONSE_PAD_IDX] = torch.zeros(EMBEDDING_DIM)

model.knowledge_embedding.weight.data[UNK_IDX_K] = torch.zeros(EMBEDDING_DIM)
model.knowledge_embedding.weight.data[KNOWLEDGE_PAD_IDX] = torch.zeros(EMBEDDING_DIM)

model.history_embedding.weight.data[UNK_IDX_H] = torch.zeros(EMBEDDING_DIM)
model.history_embedding.weight.data[HISTORY_PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(model.response_embedding.weight.data)
print(model.knowledge_embedding.weight.data)
print(model.history_embedding.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0569, -0.0520,  0.2733,  ..., -0.0695, -0.1606, -0.0989],
        ...,
        [ 0.7385,  0.2614,  0.3067,  ..., -0.1981, -0.2725, -0.0737],
        [ 0.3933, -0.1404, -0.0947,  ...,  0.0495,  0.0273, -0.0339],
        [ 0.1373, -0.1097,  0.1443,  ...,  0.0776, -0.1282, -0.0274]])
tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0132e-01,  1.0376e-02,  1.6235e-01,  ..., -9.3056e-02,
         -1.4075e-01, -1.3264e-01],
        ...,
        [ 1.0595e-01,  2.1516e-01, -1.8881e-01,  ...,  2.2973e-01,
         -2.7972e-01, -1.1228e+00],
        [-1.8776e-03, -7.1971e-01,  2.5737e+00,  ...,  5.9717e-01,
         -1.1907e+00, -1.2164e+00],
        [-1.9317e+00,  2.0529e-02, -1.

In [59]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [60]:
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

In [61]:
from sklearn.metrics import f1_score


def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc


def binary_f1(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    f1 = f1_score(y.cpu(), rounded_preds.cpu(), average="macro")

    return f1


In [62]:
def train(model, iterator, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in iterator:

        optimizer.zero_grad()

        response, response_lengths = batch.r
        knowledge, knowledge_lengths = batch.k
        history, history_lengths = batch.h

        predictions = model(response, response_lengths, knowledge, knowledge_lengths, history, history_lengths).squeeze(1)

        loss = criterion(predictions, batch.l)
        acc = binary_accuracy(predictions, batch.l)

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [63]:
def evaluate(model, iterator, criterion):

    epoch_loss = 0
    epoch_acc = 0
    epoch_f1 = 0

    model.eval()

    with torch.no_grad():

        for batch in iterator:
            response, response_lengths = batch.r
            knowledge, knowledge_lengths = batch.k
            history, history_lengths = batch.h

            predictions = model(response, response_lengths, knowledge, knowledge_lengths, history, history_lengths).squeeze(1)

            loss = criterion(predictions, batch.l)
            acc = binary_accuracy(predictions, batch.l)
            f1 = binary_f1(predictions, batch.l)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
            epoch_f1 += f1.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_f1 / len(iterator)

In [64]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [65]:
N_EPOCHS = 5
path = PROJECT_ROOT + "/" + MODEL_CONFIG + "_model.pt"
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc, valid_f1 = evaluate(model, valid_iterator, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), path)

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% |')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}% | Val. F1: {valid_f1:.3f}')

Epoch: 01 | Epoch Time: 1m 29s
	Train Loss: 0.504 | Train Acc: 74.72% |
	 Val. Loss: 0.398 |  Val. Acc: 82.29% | Val. F1: 0.777
Epoch: 02 | Epoch Time: 1m 30s
	Train Loss: 0.367 | Train Acc: 83.82% |
	 Val. Loss: 0.375 |  Val. Acc: 83.20% | Val. F1: 0.790
Epoch: 03 | Epoch Time: 1m 31s
	Train Loss: 0.307 | Train Acc: 86.93% |
	 Val. Loss: 0.372 |  Val. Acc: 84.10% | Val. F1: 0.801
Epoch: 04 | Epoch Time: 1m 31s
	Train Loss: 0.264 | Train Acc: 89.08% |
	 Val. Loss: 0.354 |  Val. Acc: 84.14% | Val. F1: 0.805
Epoch: 05 | Epoch Time: 1m 32s
	Train Loss: 0.226 | Train Acc: 90.70% |
	 Val. Loss: 0.380 |  Val. Acc: 83.67% | Val. F1: 0.801


In [66]:
model.load_state_dict(torch.load(path, map_location=device))

test_loss, test_acc, test_f1 = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% | Test F1: {test_f1:.2f}')

Test Loss: 0.350 | Test Acc: 84.90% | Test F1: 0.81


In [67]:
import spacy
nlp = spacy.load('en_core_web_sm')

def predict_hallucination(model, knowledge, response):
    model.eval()

    tokenized_r = [tok.text for tok in nlp.tokenizer(response)]
    indexed_r = [RESPONSE.vocab.stoi[t] for t in tokenized_r]
    length_r = [len(indexed_r)]
    tensor_r = torch.LongTensor(indexed_r).to(device)
    tensor_r = tensor_r.unsqueeze(1)
    length_tensor_r = torch.LongTensor(length_r)

    tokenized_k = [tok.text for tok in nlp.tokenizer(knowledge)]
    indexed_k = [KNOWLEDGE.vocab.stoi[t] for t in tokenized_k]
    length_k = [len(indexed_k)]
    tensor_k = torch.LongTensor(indexed_k).to(device)
    tensor_k = tensor_k.unsqueeze(1)
    length_tensor_k = torch.LongTensor(length_k)

    prediction = torch.sigmoid(model(tensor_r, length_tensor_r, tensor_k, length_tensor_k))

    return prediction.item()


In [68]:
predict_hallucination(model, "", "I love dogs")

TypeError: ignored

In [None]:
predict_hallucination(model, "", "Dogs are animals.")

In [None]:
predict_hallucination(model, "", "I was walking my dog last week.")

In [None]:
predict_hallucination(model, "", "Dogs need to be walked daily.")

In [None]:
test_data[2].r

In [None]:
predict_hallucination(model, "", "Dylan's Candy Bar is a candy supplier.")

In [None]:
predict_hallucination(model, "", "Dylan's Candy Bar is my favorite great brand of candy.")

In [None]:
print(test_data[2].h)