In [1]:
from transformers import RobertaForMaskedLM, RobertaTokenizer
import torch
from tqdm import tqdm

In [2]:
model = RobertaForMaskedLM.from_pretrained('roberta-base')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

In [3]:
import os
import pandas as pd
from random import choice

import nltk

In [4]:
path_to_intents = os.path.join('data', 'raw')
intents = os.listdir(path_to_intents)
get_path = lambda x: os.path.join('data', 'raw', x, x + ".csv")

In [5]:
os.chdir('helpers')
from analyze import questions, entities, get_data
data = get_data()
os.chdir('..')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dhruv\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
from collections import defaultdict
text = defaultdict(list)
encoded_text = defaultdict(list)

ans = defaultdict(list)
response = defaultdict(list)

scores = defaultdict(list)

for (_questions, intent, entity) in zip(questions, intents, entities):
    question = choice(_questions)
    
    for row in data[intent]['df']['text']:
        text[intent].append("<s> " + row.strip() + '. ' + question.strip())
        
    for row in data[intent]['df'][entity]:
        ans[intent].append((str(row) + " </s>").split())

In [7]:
for intent in intents:
    for row in text[intent]:
        encoded_text[intent].append(torch.tensor([tokenizer.encode(row, add_special_tokens=False)]))

In [8]:
encoded_text['BookRestaurant'][0]

tensor([[   0, 6298,   20, 2367,  953, 1437, 2391,   11, 2808,   13, 5996,    4,
           38,   40, 1040,   10, 2103,   23]])

In [9]:
text['BookRestaurant'][0]

'<s> book The Middle East  restaurant in IN for noon. I will book a table at'

In [10]:
ans['BookRestaurant'][0]

['The', 'Middle', 'East', 'in', 'IN', '</s>']

In [11]:
with torch.no_grad():
    for intent in intents:
        for i, row in tqdm(enumerate(encoded_text[intent])):
            out = model(row)
            actual_out = model((torch.argmax(out[0][0], dim=1)).view(1, -1))
            response[intent].append(tokenizer.decode(torch.argmax(actual_out[0][0], dim=1).tolist()).split())
            
            if len(response[intent][i]) > 4:
                score = nltk.translate.bleu_score.sentence_bleu([ans[intent][i]], response[intent][i])
            else:
                weights = [1/len(response[intent][i]) for x in range(len(response[intent][i]))]
                score = nltk.translate.bleu_score.sentence_bleu([ans[intent][i]], response[intent][i], weights = tuple(weights))
            
            scores[intent].append(score)
            

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
2042it [05:21,  6.35it/s]
2073it [06:13,  5.55it/s]
2100it [05:17,  6.61it/s]
2100it [05:17,  6.62it/s]
2056it [04:56,  6.92it/s]
2054it [05:02,  6.79it/s]
2059it [05:08,  6.68it/s]


In [15]:
" ".join(response['BookRestaurant'][0])

'<s>book The Middle East Mediterranean restaurant in NYC for lunch</s>I will book a table</s>'

In [14]:
for intent in intents:
    print(f"For {intent}, avg. BLEU score is {sum(scores[intent]) / len(scores[intent])}")

For AddToPlaylist, avg. BLEU score is 0.03314444953707598
For BookRestaurant, avg. BLEU score is 0.024356911937837487
For GetWeather, avg. BLEU score is 0.015514249211315917
For PlayMusic, avg. BLEU score is 0.016610345225562685
For RateBook, avg. BLEU score is 0.011978870598112389
For SearchCreativeWork, avg. BLEU score is 0.08233009173333247
For SearchScreeningEvent, avg. BLEU score is 0.0811461706991225


In [None]:
_out = sample_sequence(model, 20, encoded_text['BookRestaurant'][0][0], top_p = 0.9)

In [None]:
encoded_text['BookRestaurant'][0][0]

In [None]:
tokenizer

In [None]:
tokenizer.decode(_out.squeeze(0).tolist())

In [None]:
tuple([1, 2])