In [1]:
from datasets import load_dataset
from pipelines import pipeline
from nlgeval import compute_metrics
from tqdm import tqdm
import json 

In [2]:
train_dataset, valid_dataset = load_dataset('squad', split=['train', 'validation'])

Reusing dataset squad (C:\Users\lauts\.cache\huggingface\datasets\squad\plain_text\1.0.0\d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)
100%|██████████| 2/2 [00:00<00:00, 57.10it/s]


In [3]:
nlp = pipeline("question-generation")

In [4]:
hyp = 'eval_test/hyp.txt'
ref = 'eval_test/ref1.txt'
ctx = 'eval_test/ref2.txt'
res = 'eval_test/res.txt'

dev_mode = True
squad_size = 100 if dev_mode else len(train_dataset)

#corpus = [text, text2,text3, text4]
c_t = None #Current Title
c_q = [] #Current QG set
cqc = "" #Current concatenated questions
ccc = "" #Current context

h_q = [] #Lines of predicted question (concated by each topic)
r_q = [] #Lines of actual question (concated by each topic)
c_c = [] #Lines of context (for each topic)

In [5]:
def wq(ta, xt=hyp):
    with open(xt, 'w', encoding='utf-8') as f:
        for t in tqdm(ta):
            nt = nlp(t)
            f.writelines([' '.join(nt), '\n'])

def wc(ta, xt=ref):
    with open(xt, 'w', encoding='utf-8') as f:
        f.writelines('\n'.join(ta))


In [6]:
print("QG for {0} records: ".format(squad_size))

for i in tqdm(range(0, squad_size)):
    t_d = train_dataset[i]
    #tdt = t_d["title"]

    #Fill in first context
    if i == 0:
        ccc = t_d["context"]
        #c_t = tdt

    #Force write result when it reaches the end
    if i == squad_size:
        ccc = ""

    #print(len(ccc), len(t_d["context"]))

    #Skip if no context swap
    if t_d["context"] == ccc:
        cqc = cqc + "{} ".format(t_d["question"])
        if i < squad_size - 1:
            continue
    
    #Title has been swapped. Retrieve predicted questions
    c_q = nlp(ccc)

    h_q.append(' '.join(cq["question"] for cq in c_q))
    r_q.append(cqc)
    c_c.append(ccc)
    
    #Swap context
    ccc = t_d["context"]
    #Clear question segment
    cqc = ""


print("Distinct context found: {0}".format(len(hyp)))
print("Writing {0}...".format(hyp))
wc(h_q, xt=hyp)
print("Writing {0}...".format(ref))
wc(r_q, xt=ref)
print("Writing {0}...".format(ctx))
wc(c_c, xt=ctx)

QG for 100 records: 


100%|██████████| 100/100 [00:40<00:00,  2.44it/s]

Distinct context found: 17
Writing eval_test/hyp.txt...
Writing eval_test/ref1.txt...
Writing eval_test/ref2.txt...





In [7]:
metrics_dict = compute_metrics(hypothesis=hyp, references=[ref, ctx], no_skipthoughts=True, no_glove=True)

Bleu_1: 0.630316
Bleu_2: 0.455492
Bleu_3: 0.337768
Bleu_4: 0.257436
METEOR: 0.231376
ROUGE_L: 0.347077
CIDEr: 0.145509


In [8]:
print("Writing result to {0}...".format(res))
      
json_res = json.dumps(metrics_dict, indent = 4) 
with open(res, 'w', encoding='utf-8') as f:
    f.writelines(json_res)


Writing result to eval_test/res.txt...
