In [None]:
!pip install -q -U transformers[sentencepiece] rouge git+https://github.com/deepset-ai/haystack.git grpcio-tools==1.34.1 spacy

In [None]:
import re
import os
import time
import spacy
import nltk
import json
import pandas as pd
import pandas as pd 
from tqdm import tqdm
from rouge import Rouge
from pprint import pprint
from typing import List
from haystack import Document
from haystack.reader import TransformersReader
from haystack.pipeline import ExtractiveQAPipeline 
from haystack.retriever.dense import DensePassageRetriever 
from haystack.document_store.faiss import FAISSDocumentStore
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [None]:
!spacy download en_core_web_md 
!spacy link en_core_web_md en

Collecting en-core-web-md==3.1.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.1.0/en_core_web_md-3.1.0-py3-none-any.whl (45.4 MB)
[K     |████████████████████████████████| 45.4 MB 16 kB/s 
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.1.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ As of spaCy v3.0, model symlinks are not supported anymore. You can
load trained pipeline packages using their full names or from a directory
path.[0m


In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
from spacy.lang.en import English
# nlp = spacy.load('en_core_web_md')
nlp = English() 
nlp.add_pipe("sentencizer")

<spacy.pipeline.sentencizer.Sentencizer at 0x7fd5a7068460>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Answers Bleu and Rouge

In [None]:
class report_generator():
    def __init__(self, 
                 retriever_top_ks: list, 
                 reader_top_ks: list, 
                 embed_titles: list, 
                 reader_models: list,
                 context_window_sizes: list,
                 text_datasets: list,
                 qa_datasets: list,
                 max_seq_len_passages: list,
                 max_seq_len_queries: list,
                 doc_strides: list,
                 max_seq_lens: list):
        
        self.retriever_top_ks = retriever_top_ks
        self.reader_top_ks = reader_top_ks
        self.embed_titles = embed_titles
        self.doc_strides = doc_strides
        self.reader_models = reader_models
        self.context_window_sizes = context_window_sizes
        self.text_datasets = text_datasets
        self.qa_datasets = qa_datasets
        self.max_seq_len_passages = max_seq_len_passages
        self.max_seq_len_queries = max_seq_len_queries
        self.max_seq_lens = max_seq_lens

    def __load_dataset(self, qa_dataset, text_dataset):
        with open(qa_dataset, "r") as f:
            self.qa = json.loads(f.read())['data']
            df = pd.read_csv(text_dataset, index_col=0)
            df = df.reset_index()

        titles = list(df["title"].values)
        texts  = list(df["text"].values)
        self.documents: List[Document] = []
        
        for title, text in zip(titles, texts):
            self.documents.append(
                Document(
                    text=text,
                    meta={
                        "name": title or ""
                    }
                )
            )
  
    def __init_doc_store(self, similarity="dot_product"):
        self.document_store = FAISSDocumentStore(similarity=similarity,
                                                faiss_index_factory_str="Flat",
                                                return_embedding=True
                                                )

    def __init_retriever(self, embed_title, 
                         query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                         passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"):
        
        self.retriever = DensePassageRetriever(document_store=self.document_store,
                                            query_embedding_model=query_embedding_model,
                                            passage_embedding_model=passage_embedding_model,
                                            use_gpu=True,
                                            embed_title=embed_title
                                            )
    def __update_embedding(self):
        self.document_store.delete_documents()
        self.document_store.write_documents(self.documents)
        self.document_store.update_embeddings(
            retriever=self.retriever
            )

    def __init_reader(self, model_name, window_size, seq_len, stride):
        self.reader = TransformersReader(model_name_or_path=model_name, 
                                        context_window_size=window_size,
                                        max_seq_len=seq_len,
                                        doc_stride=stride,
                                        use_gpu=0
                                        )
        
    def __compute_metrics(self, reader, retriever, qa, retriever_top_k, reader_top_k):
        bleu_scores = []
        rouge1_scores = []
        rouge2_scores = []
        rougel_scores = []
        context_detection = []
        context_accuracy = []

        rouge = Rouge()
        smoothie = SmoothingFunction().method4

        for data in tqdm(qa):
            true_context = data['paragraphs'][0]['context']
            # true_context = true_context.replace('\n', ' ')

            for q_a in data['paragraphs'][0]['qas']:
                question = q_a['question']
                reference_list = set([answer['text'] for answer in q_a['answers']])
                reference = " ".join(reference_list)
                reference_sents = nlp(reference)
                reference_sents = list(reference_sents.sents)
                reference_sents = [sent.text.lstrip().rstrip() for sent in reference_sents]

                pipe = ExtractiveQAPipeline(reader, retriever)

                preds = pipe.run(
                    query=question,
                    params={"Retriever": {"top_k": retriever_top_k}, 
                            "Reader": {"top_k": reader_top_k}}
                    )

                candidate_sent_list = []

                for pred in preds['answers']:
                    pred_answer = pred['answer']

                    if pred_answer is not None:
                        offset_start = pred['offset_start']
                        offset_end = pred['offset_end']
                        meta_name = pred['meta']['vector_id']

                        pred_all_context_sents= []

                        for pred in preds['documents']:
                            pred_all_context_sents += list(nlp(pred.to_dict()['text']).sents)

                            if pred.to_dict()['meta']['vector_id'] == meta_name:
                                pred_context = pred.to_dict()['text']
                                pred_context_sents = nlp(pred_context)
                                pred_context_sents = list(pred_context_sents.sents)
                                pred_context_sents = [sent.text for sent in pred_context_sents]
                                # pred_context = " ".join(pred_context_sents)

                        pred_all_context_sents = [re.sub(r'\n+', ' ', sent.text).strip() for sent in pred_all_context_sents]

                        doc = nlp(pred_answer)
                        pred_answer_sents = list(doc.sents)
                        pred_answer_sents = [sent.text for sent in pred_answer_sents] 

                        for pred_context_sent in pred_context_sents:
                            start_index = 0
                            end_index = len(pred_answer) 

                            for pred_answer_sent in pred_answer_sents:
                                right_reduction = len(pred_answer_sent) - len(pred_answer_sent.rstrip())
                                left_reduction = len(pred_answer_sent) - len(pred_answer_sent.lstrip())
                                end_index -= len(pred_answer_sent) + 0 if pred_context_sents[-1] == pred_answer_sent else 1

                                context_offset_start = pred_context.find(pred_context_sent)
                                context_offset_end = pred_context.find(pred_context_sent) + len(pred_context_sent)

                                if  context_offset_start - left_reduction <= offset_start + start_index and context_offset_end + right_reduction >= offset_end - end_index:
                                    candidate_sent_list.append(pred_context_sent)
                                
                                start_index += len(pred_answer_sent) + 0 if pred_context_sents[-1] == pred_answer_sent else 1

                        for reference_sent in reference_sents:
                            context_truth = 0

                            if reference_sent in pred_all_context_sents:
                                context_truth = 1
                
                            context_accuracy.append(context_truth)

                candidate_sent_set = set(candidate_sent_list)
                candidate = " ".join(candidate_sent_set)
                token_reference = nltk.word_tokenize(reference)
                token_candidate = nltk.word_tokenize(candidate)

                bleu_score = sentence_bleu(token_reference, 
                                            token_candidate, 
                                            smoothing_function=smoothie, 
                                            weights=(1, 0, 0, 0))
                rouge_score = rouge.get_scores(candidate, reference)

                bleu_scores.append(bleu_score)
                rouge1_scores.append(rouge_score[0]['rouge-1']['f'])
                rouge2_scores.append(rouge_score[0]['rouge-2']['f'])
                rougel_scores.append(rouge_score[0]['rouge-l']['f'])

        ctx_acc = context_accuracy.count(1)/len(context_accuracy)
        bleu_ave = sum(bleu_scores)/len(bleu_scores)
        rouge1_ave = sum(rouge1_scores)/len(rouge1_scores)
        rouge2_ave = sum(rouge2_scores)/len(rouge2_scores)
        rougel_ave = sum(rougel_scores)/len(rougel_scores)
        # Pick the last question in dataset as a sample
        return bleu_ave, rouge1_ave, rouge2_ave, rougel_ave, ctx_acc, question, reference_list, true_context, candidate_sent_set, pred_all_context_sents

    def get_report(self):
        logs = []
        metric = [] 

        self.__init_doc_store()

        for qa_dataset in self.qa_datasets:
            for text_dataset in self.text_datasets:
                self.__load_dataset(qa_dataset, text_dataset)
                self.__init_doc_store()

                for embed_title in self.embed_titles:
                    self.__init_retriever(embed_title)
                    tic = time.time()
                    self.__update_embedding()
                    toc = time.time()
                    document_embedding_time = toc - tic
                    
                    for reader_model in self.reader_models:
                        for context_window_size in self.context_window_sizes:
                            for doc_stride in self.doc_strides:
                                for max_seq_len in self.max_seq_lens:
                                    self.__init_reader(reader_model, context_window_size, max_seq_len, doc_stride)

                                    for retriever_top_k in self.retriever_top_ks:
                                        for reader_top_k in self.reader_top_ks:
                                            for max_seq_len_passage in self.max_seq_len_passages:
                                                for max_seq_len_query in self.max_seq_len_queries:
                                                    tic = time.time()
                                                    bleu_ave, rouge1_ave, rouge2_ave, rougel_ave, ctx_acc, question, reference_list, true_context, candidate_sent_set, pred_all_context_sents = self.__compute_metrics(self.reader, self.retriever, self.qa, retriever_top_k, reader_top_k)
                                                    toc = time.time()
                                                    inference_time = toc - tic

                                                    cmd = "nvidia-smi -q -x | grep \<fb_memory_usage\> -A 3 | grep used | sed -n 's:.*<used>\(.*\)</used>.*:\1:p'"
                                                    vram = os.system(cmd)

                                                    metric.append([reader_model, qa_dataset, embed_title, context_window_size, max_seq_len, doc_stride,
                                                                   max_seq_len_passage, max_seq_len_query, retriever_top_k, reader_top_k, rouge1_ave,
                                                                   rouge2_ave, rougel_ave, bleu_ave, ctx_acc, vram, document_embedding_time, inference_time])
                                                    
                                                    log = {'Question': question,
                                                           'Reference answers': reference_list,
                                                           'Reference context': true_context,
                                                           'Predicted answers': candidate_sent_set,
                                                           'Retrieved context': pred_all_context_sents
                                                           }

                                                    logs.append(log)

        columns = ['QA model', 'Dataset', 'embed_title', 'context_window_size', 
                   'max_seq_len', 'doc_stride', 'max_seq_len_passage', 'max_seq_len_query',
                   'Retriever top-k', 'QA top-k', 'Rouge-1', 
                   'Rouge-2', 'Rouge-l', 'Bleu', 
                   'Answers percent in retrieved documents', 'VRAM',
                   'document embedding time', 'inference time'  
                   ]

        df = pd.DataFrame(metric, columns=columns)

        return df, logs

In [None]:
report_generator = report_generator(max_seq_lens = [256], 
                                    max_seq_len_passages = [256],   
                                    max_seq_len_queries = [64],
                                    embed_titles = [True, False], 
                                    context_window_sizes = [150, 175],
                                    doc_strides = [100, 128],
                                    retriever_top_ks = [3, 5, 7],
                                    reader_top_ks = [3, 5, 7], 
                                    reader_models = ['drive/MyDrive/bert_basefi_qafi',
                                                     'drive/MyDrive/squeeze-bertfi_qafi',
                                                     'ktrapeznikov/albert-xlarge-v2-squad-v2',
                                                     'deepset/roberta-base-squad2',
                                                     'deepset/minilm-uncased-squad2',
                                                     'ahotrod/albert_xxlargev1_squad2_512'],
                                    text_datasets = ['drive/MyDrive/titleText-threeSentences.csv',
                                                     'drive/MyDrive/titleText-paragraphs.csv'],
                                    qa_datasets = ['drive/MyDrive/qa-SQUAD.json'])

In [None]:
df, logs = report_generator.get_report()

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/493 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/492 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Updating Embedding:   0%|          | 0/16341 [00:00<?, ? docs/s]

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  61%|██████    | 10000/16341 [05:24<03:25, 30.78 docs/s]

Create embeddings:   0%|          | 0/6352 [00:00<?, ? Docs/s]

Documents Processed: 20000 docs [08:50, 37.70 docs/s]
100%|██████████| 4/4 [00:02<00:00,  1.35it/s]
100%|██████████| 4/4 [00:02<00:00,  1.51it/s]
100%|██████████| 4/4 [00:02<00:00,  1.49it/s]
100%|██████████| 4/4 [00:03<00:00,  1.02it/s]
100%|██████████| 4/4 [00:04<00:00,  1.00s/it]
100%|██████████| 4/4 [00:04<00:00,  1.02s/it]
100%|██████████| 4/4 [00:05<00:00,  1.28s/it]
100%|██████████| 4/4 [00:05<00:00,  1.39s/it]
100%|██████████| 4/4 [00:05<00:00,  1.37s/it]
100%|██████████| 4/4 [00:02<00:00,  1.54it/s]
100%|██████████| 4/4 [00:02<00:00,  1.51it/s]
100%|██████████| 4/4 [00:02<00:00,  1.46it/s]
100%|██████████| 4/4 [00:03<00:00,  1.01it/s]
100%|██████████| 4/4 [00:04<00:00,  1.02s/it]
100%|██████████| 4/4 [00:04<00:00,  1.04s/it]
100%|██████████| 4/4 [00:05<00:00,  1.28s/it]
100%|██████████| 4/4 [00:05<00:00,  1.33s/it]
100%|██████████| 4/4 [00:05<00:00,  1.45s/it]
100%|██████████| 4/4 [00:02<00:00,  1.49it/s]
100%|██████████| 4/4 [00:02<00:00,  1.48it/s]
100%|██████████| 4/4 [00:0

Downloading:   0%|          | 0.00/717 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/235M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/760k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/156 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/58.0 [00:00<?, ?B/s]

100%|██████████| 4/4 [00:19<00:00,  5.00s/it]
100%|██████████| 4/4 [00:20<00:00,  5.00s/it]
100%|██████████| 4/4 [00:20<00:00,  5.02s/it]
100%|██████████| 4/4 [00:33<00:00,  8.48s/it]
100%|██████████| 4/4 [00:34<00:00,  8.51s/it]
100%|██████████| 4/4 [00:34<00:00,  8.54s/it]
100%|██████████| 4/4 [00:46<00:00, 11.51s/it]
100%|██████████| 4/4 [00:45<00:00, 11.44s/it]
100%|██████████| 4/4 [00:45<00:00, 11.49s/it]
100%|██████████| 4/4 [00:20<00:00,  5.00s/it]
100%|██████████| 4/4 [00:20<00:00,  5.01s/it]
100%|██████████| 4/4 [00:20<00:00,  5.02s/it]
100%|██████████| 4/4 [00:34<00:00,  8.56s/it]
100%|██████████| 4/4 [00:34<00:00,  8.59s/it]
100%|██████████| 4/4 [00:34<00:00,  8.61s/it]
100%|██████████| 4/4 [00:46<00:00, 11.52s/it]
100%|██████████| 4/4 [00:46<00:00, 11.65s/it]
100%|██████████| 4/4 [00:46<00:00, 11.59s/it]
100%|██████████| 4/4 [00:20<00:00,  5.01s/it]
100%|██████████| 4/4 [00:20<00:00,  5.00s/it]
100%|██████████| 4/4 [00:20<00:00,  5.03s/it]
100%|██████████| 4/4 [00:33<00:00,

Downloading:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/496M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

100%|██████████| 4/4 [00:02<00:00,  1.39it/s]
100%|██████████| 4/4 [00:02<00:00,  1.45it/s]
100%|██████████| 4/4 [00:02<00:00,  1.39it/s]
100%|██████████| 4/4 [00:04<00:00,  1.02s/it]
100%|██████████| 4/4 [00:04<00:00,  1.05s/it]
100%|██████████| 4/4 [00:04<00:00,  1.09s/it]
100%|██████████| 4/4 [00:05<00:00,  1.32s/it]
100%|██████████| 4/4 [00:05<00:00,  1.37s/it]
100%|██████████| 4/4 [00:05<00:00,  1.49s/it]
100%|██████████| 4/4 [00:02<00:00,  1.38it/s]
100%|██████████| 4/4 [00:02<00:00,  1.42it/s]
100%|██████████| 4/4 [00:02<00:00,  1.39it/s]
100%|██████████| 4/4 [00:04<00:00,  1.02s/it]
100%|██████████| 4/4 [00:04<00:00,  1.06s/it]
100%|██████████| 4/4 [00:04<00:00,  1.08s/it]
100%|██████████| 4/4 [00:05<00:00,  1.34s/it]
100%|██████████| 4/4 [00:05<00:00,  1.38s/it]
100%|██████████| 4/4 [00:05<00:00,  1.41s/it]
100%|██████████| 4/4 [00:03<00:00,  1.32it/s]
100%|██████████| 4/4 [00:02<00:00,  1.43it/s]
100%|██████████| 4/4 [00:02<00:00,  1.36it/s]
100%|██████████| 4/4 [00:04<00:00,

Downloading:   0%|          | 0.00/477 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/133M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/107 [00:00<?, ?B/s]

100%|██████████| 4/4 [00:02<00:00,  1.73it/s]
100%|██████████| 4/4 [00:02<00:00,  1.65it/s]
100%|██████████| 4/4 [00:02<00:00,  1.39it/s]
100%|██████████| 4/4 [00:03<00:00,  1.26it/s]
100%|██████████| 4/4 [00:03<00:00,  1.29it/s]
100%|██████████| 4/4 [00:03<00:00,  1.26it/s]
100%|██████████| 4/4 [00:03<00:00,  1.09it/s]
100%|██████████| 4/4 [00:03<00:00,  1.05it/s]
100%|██████████| 4/4 [00:03<00:00,  1.03it/s]
100%|██████████| 4/4 [00:01<00:00,  2.03it/s]
100%|██████████| 4/4 [00:02<00:00,  1.90it/s]
100%|██████████| 4/4 [00:02<00:00,  1.81it/s]
100%|██████████| 4/4 [00:02<00:00,  1.36it/s]
100%|██████████| 4/4 [00:03<00:00,  1.17it/s]
100%|██████████| 4/4 [00:03<00:00,  1.30it/s]
100%|██████████| 4/4 [00:03<00:00,  1.10it/s]
100%|██████████| 4/4 [00:03<00:00,  1.04it/s]
100%|██████████| 4/4 [00:03<00:00,  1.01it/s]
100%|██████████| 4/4 [00:01<00:00,  2.03it/s]
100%|██████████| 4/4 [00:02<00:00,  1.90it/s]
100%|██████████| 4/4 [00:02<00:00,  1.80it/s]
100%|██████████| 4/4 [00:02<00:00,

Downloading:   0%|          | 0.00/715 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/890M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/760k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/156 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

100%|██████████| 4/4 [00:36<00:00,  9.20s/it]
100%|██████████| 4/4 [00:36<00:00,  9.18s/it]
100%|██████████| 4/4 [00:36<00:00,  9.21s/it]
100%|██████████| 4/4 [01:02<00:00, 15.63s/it]
100%|██████████| 4/4 [01:02<00:00, 15.63s/it]
100%|██████████| 4/4 [01:02<00:00, 15.68s/it]
100%|██████████| 4/4 [01:24<00:00, 21.13s/it]
100%|██████████| 4/4 [01:25<00:00, 21.33s/it]
100%|██████████| 4/4 [01:24<00:00, 21.22s/it]
100%|██████████| 4/4 [00:36<00:00,  9.17s/it]
100%|██████████| 4/4 [00:36<00:00,  9.18s/it]
100%|██████████| 4/4 [00:36<00:00,  9.23s/it]
100%|██████████| 4/4 [01:03<00:00, 15.87s/it]
100%|██████████| 4/4 [01:03<00:00, 15.92s/it]
100%|██████████| 4/4 [01:03<00:00, 15.93s/it]
100%|██████████| 4/4 [01:25<00:00, 21.46s/it]
100%|██████████| 4/4 [01:26<00:00, 21.57s/it]
100%|██████████| 4/4 [01:26<00:00, 21.52s/it]
100%|██████████| 4/4 [00:36<00:00,  9.24s/it]
100%|██████████| 4/4 [00:36<00:00,  9.24s/it]
100%|██████████| 4/4 [00:37<00:00,  9.26s/it]
100%|██████████| 4/4 [01:02<00:00,

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  61%|██████    | 10000/16341 [07:21<04:33, 23.16 docs/s]

Create embeddings:   0%|          | 0/6352 [00:00<?, ? Docs/s]

Documents Processed: 20000 docs [10:57, 30.42 docs/s]
100%|██████████| 4/4 [00:02<00:00,  1.44it/s]
100%|██████████| 4/4 [00:02<00:00,  1.60it/s]
100%|██████████| 4/4 [00:02<00:00,  1.41it/s]
100%|██████████| 4/4 [00:03<00:00,  1.19it/s]
100%|██████████| 4/4 [00:03<00:00,  1.15it/s]
100%|██████████| 4/4 [00:03<00:00,  1.12it/s]
100%|██████████| 4/4 [00:04<00:00,  1.14s/it]
100%|██████████| 4/4 [00:04<00:00,  1.16s/it]
100%|██████████| 4/4 [00:04<00:00,  1.18s/it]
100%|██████████| 4/4 [00:02<00:00,  1.63it/s]
100%|██████████| 4/4 [00:02<00:00,  1.60it/s]
100%|██████████| 4/4 [00:02<00:00,  1.59it/s]
100%|██████████| 4/4 [00:03<00:00,  1.16it/s]
100%|██████████| 4/4 [00:03<00:00,  1.06it/s]
100%|██████████| 4/4 [00:03<00:00,  1.12it/s]
100%|██████████| 4/4 [00:04<00:00,  1.14s/it]
100%|██████████| 4/4 [00:04<00:00,  1.16s/it]
100%|██████████| 4/4 [00:04<00:00,  1.18s/it]
100%|██████████| 4/4 [00:02<00:00,  1.63it/s]
100%|██████████| 4/4 [00:02<00:00,  1.60it/s]
100%|██████████| 4/4 [00:0

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  50%|█████     | 10000/19943 [06:14<05:55, 27.97 docs/s]

Create embeddings:   0%|          | 0/9952 [00:00<?, ? Docs/s]

Documents Processed: 20000 docs [11:51, 28.11 docs/s]
100%|██████████| 4/4 [00:02<00:00,  1.34it/s]
100%|██████████| 4/4 [00:03<00:00,  1.31it/s]
100%|██████████| 4/4 [00:02<00:00,  1.48it/s]
100%|██████████| 4/4 [00:03<00:00,  1.13it/s]
100%|██████████| 4/4 [00:03<00:00,  1.10it/s]
100%|██████████| 4/4 [00:03<00:00,  1.07it/s]
100%|██████████| 4/4 [00:04<00:00,  1.15s/it]
100%|██████████| 4/4 [00:04<00:00,  1.17s/it]
100%|██████████| 4/4 [00:04<00:00,  1.20s/it]
100%|██████████| 4/4 [00:02<00:00,  1.47it/s]
100%|██████████| 4/4 [00:02<00:00,  1.43it/s]
100%|██████████| 4/4 [00:02<00:00,  1.42it/s]
100%|██████████| 4/4 [00:03<00:00,  1.10it/s]
100%|██████████| 4/4 [00:04<00:00,  1.06s/it]
100%|██████████| 4/4 [00:03<00:00,  1.03it/s]
100%|██████████| 4/4 [00:04<00:00,  1.18s/it]
100%|██████████| 4/4 [00:04<00:00,  1.20s/it]
100%|██████████| 4/4 [00:04<00:00,  1.23s/it]
100%|██████████| 4/4 [00:02<00:00,  1.55it/s]
100%|██████████| 4/4 [00:02<00:00,  1.54it/s]
100%|██████████| 4/4 [00:0

Create embeddings:   0%|          | 0/10000 [00:00<?, ? Docs/s]

Documents Processed:  50%|█████     | 10000/19943 [06:15<06:03, 27.37 docs/s]

Create embeddings:   0%|          | 0/9952 [00:00<?, ? Docs/s]

Documents Processed: 20000 docs [11:56, 27.90 docs/s]
100%|██████████| 4/4 [00:02<00:00,  1.54it/s]
100%|██████████| 4/4 [00:02<00:00,  1.65it/s]
100%|██████████| 4/4 [00:02<00:00,  1.61it/s]
100%|██████████| 4/4 [00:03<00:00,  1.24it/s]
100%|██████████| 4/4 [00:03<00:00,  1.27it/s]
100%|██████████| 4/4 [00:03<00:00,  1.24it/s]
100%|██████████| 4/4 [00:04<00:00,  1.10s/it]
100%|██████████| 4/4 [00:04<00:00,  1.01s/it]
100%|██████████| 4/4 [00:04<00:00,  1.03s/it]
100%|██████████| 4/4 [00:02<00:00,  1.76it/s]
100%|██████████| 4/4 [00:02<00:00,  1.74it/s]
100%|██████████| 4/4 [00:02<00:00,  1.71it/s]
100%|██████████| 4/4 [00:03<00:00,  1.30it/s]
100%|██████████| 4/4 [00:03<00:00,  1.26it/s]
100%|██████████| 4/4 [00:03<00:00,  1.22it/s]
100%|██████████| 4/4 [00:03<00:00,  1.01it/s]
100%|██████████| 4/4 [00:04<00:00,  1.02s/it]
100%|██████████| 4/4 [00:04<00:00,  1.14s/it]
100%|██████████| 4/4 [00:02<00:00,  1.75it/s]
100%|██████████| 4/4 [00:02<00:00,  1.74it/s]
100%|██████████| 4/4 [00:0

In [None]:
df.head()

Unnamed: 0,QA model,Dataset,embed_title,context_window_size,max_seq_len,doc_stride,max_seq_len_passage,max_seq_len_query,Retriever top-k,QA top-k,Rouge-1,Rouge-2,Rouge-l,Bleu,Answers percent in retrieved documents,VRAM,document embedding time,inference time
0,drive/MyDrive/bert_basefi_qafi,drive/MyDrive/qa-SQUAD.json,True,150,256,100,256,64,3,3,0.251098,0.159368,0.243856,0.079877,0.147059,0,584.795759,2.969653
1,drive/MyDrive/bert_basefi_qafi,drive/MyDrive/qa-SQUAD.json,True,150,256,100,256,64,3,5,0.285494,0.187559,0.275389,0.05682,0.147059,0,584.795759,2.666598
2,drive/MyDrive/bert_basefi_qafi,drive/MyDrive/qa-SQUAD.json,True,150,256,100,256,64,3,7,0.282064,0.183417,0.270337,0.04917,0.147059,0,584.795759,2.699238
3,drive/MyDrive/bert_basefi_qafi,drive/MyDrive/qa-SQUAD.json,True,150,256,100,256,64,5,3,0.282972,0.1823,0.263041,0.076722,0.147059,0,584.795759,3.913378
4,drive/MyDrive/bert_basefi_qafi,drive/MyDrive/qa-SQUAD.json,True,150,256,100,256,64,5,5,0.319277,0.215881,0.306076,0.050894,0.147059,0,584.795759,4.027034


In [None]:
df.to_csv('drive/MyDrive/dpr-qa-report.csv', index=False)

In [None]:
def serialize_sets(obj):
    if isinstance(obj, set):
        return list(obj)

    return obj

In [None]:
with open('drive/MyDrive/dpr-qa-sample.json', 'w') as f:
    json.dump(logs, f, default=serialize_sets)