In [1]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
fname = "train_fold_one"
basepath = "./-paraphrase-multilingual-MiniLM-L12-v2_3015"

## Load data

In [3]:
import pandas as pd

def loadData(fname, basepath):
    sample = pd.read_csv(f"{basepath}/{fname}.csv")
    sample['Document'] = sample.apply(lambda row: ': '.join(row.astype(str)), axis=1)
    return sample

## Question Generation

In [4]:
from qa_generation import QAGenerator
from prompt_factory import PromptFactory
from tqdm.notebook import tqdm

template = '''
        Gedraag je als een {role} die online informatie zoekt.
        Patiënten stellen meestal vragen als:
{{
    "qa_list": [
        {{
            "question": "Hoe kan ik een verstopte voedingssonde doorspoelen?",
            "answer": "Om een verstopte voedingssonde door te spoelen, kunt u de volgende stappen proberen: 1. Sluit de spuit rechtstreeks aan op de sonde (niet op het voedingssysteem). Als er een verstopping is in het voedingssysteem, kunt u dit vervangen. 2. Neem een spuit van 10 cc en spuit met lichte druk lauwwarm water door de voedingssonde. Herhaal dit zo nodig nog een keer. 3. Als het oplossen van de verstopping niet lukt, laat dan lauwwarm water 30 minuten inwerken en herhaal de bovenstaande procedure nogmaals. 4. Als u de verstopping kunt zien, kunt u proberen om de voedingssonde op die plek zachtjes te kneden. Als de verstopping dan loskomt, kunt u het doorspuiten met lauwwarm water. Belangrijk: Gebruik nooit een voerdraad of koolzuurhoudend bronwater of frisdranken, omdat dit kan leiden tot perforatie. Daarnaast wordt het afraden om natriumbicarbonaat te gebruiken als medicatie de oorzaak is van de verstopping, omdat dit de verstopping groter kan maken."
        }}
    ]
}}


        in het json formaat: 
        {format}

Document Informatie:
        {document}

        Belangrijke Opmerking: Bij het genereren van vragen, gebruik specifieke termen en benamingen uit het document in plaats van algemene termen zoals 'dit onderzoek' of 'die procedure'. Verwijs direct naar de procedure of het document met de exacte naam om nauwkeurigheid en duidelijkheid in de vragen te waarborgen. Vermijd algemeenheden en zorg ervoor dat elke vraag direct gerelateerd is aan de verstrekte documentinformatie.


        Stel {n} vragen die beantwoord kunnen worden op basis van deze paragraaf in het formaat:
        {format}

        Zorg ervoor dat elke vraag en antwoord paar in een geldig JSON-formaat is. Dit betekent dat vragen en antwoorden tussen dubbele aanhalingstekens moeten staan, en de algemene structuur moet overeenkomen met het vereiste JSON-schema.
        '''

roles = [
    'Patiënt',
    'Nieuwe Ouder',
    'Oudere Patiënt',
    'Persoon die een Tweede Mening Zoekt',
    'Reiziger die Medisch Advies Na Reizen Zoekt',
    'Zorgverlener die Informatie Zoekt']

prompt_factory = PromptFactory(prompt=template,roles=roles)

num_questions_per_doc = 5

QAGenerator = QAGenerator()
# Generate the data
import pandas as pd
from tqdm import tqdm


def generateQuestions(df):
    data = []
    for index, row in tqdm(df.iterrows(), total=df.shape[0],desc="Generating QAs"):
        doc = row["Document"]
        # Do something with col1 and col2
        try:
            for i in range(2):
                qa_list = QAGenerator.generate_qas(prompt_factory.generate_prompt(doc, num_questions_per_doc))
                for qa in qa_list:
                    new_row = row.to_dict()
                    new_row['Question'] = qa.question
                    new_row['Answer'] = qa.answer
                    data.append(new_row)

        except Exception as e:
                print(f"Failed to generate QA for doc: {doc[:100]}. Error: {e}")
                continue
    if(len(data) == 0):
       return pd.DataFrame()
    result = pd.DataFrame(data)
    result = result.dropna(subset=['Document', 'Question', 'Answer'])
    result = result[(result['Document'] != '') & (result['Question'] != '') & (result['Answer'] != '')]
    return result


In [5]:
from fact_qa import FactQAGenerator
from prompt_factory import PromptFactory
import langchain

template = '''
Gedraag je als iemand die online informatie zoekt en stelt vragen gebaseerd op specifieke feiten zoals locatie, telefoonnummer, naam, enzovoort.

Patiënten of gebruikers kunnen bijvoorbeeld vragen stellen als:
{{
    "qa_list": [
        {{
            "question": "Wat is het telefoonnummer van de kliniek voor noodgevallen?",
            "answer": "Het telefoonnummer van de kliniek voor noodgevallen is 012-345-6789."
        }},
        {{
            "question": "Waar is de hoofdingang van het ziekenhuis gelegen?",
            "answer": "De hoofdingang van het ziekenhuis is gelegen aan de Eerste Gezondheidsstraat 123 in Amsterdam."
        }}
        // meer vragen en antwoorden gebaseerd op specifieke feiten
    ]
}}

in het json formaat:
{format}

Document Informatie:
{document}

Belangrijke Opmerking: Bij het genereren van vragen, gebruik specifieke termen en benamingen uit het document in plaats van algemene termen. Verwijs direct naar de specifieke feit (zoals locatie, telefoonnummer, naam) om nauwkeurigheid en duidelijkheid in de vragen te waarborgen. Vermijd algemeenheden.

Maak vragen die beantwoord kunnen worden met de volgende informatie: {fact}

Stel {n} vragen die beantwoord kunnen worden op basis van deze paragraaf in het formaat:
{format}

Zorg ervoor dat elke vraag en antwoord paar in een geldig JSON-formaat is. Dit betekent dat vragen en antwoorden tussen dubbele aanhalingstekens moeten staan, en de algemene structuur moet overeenkomen met het vereiste JSON-schema.
        '''

base_prompt = langchain.PromptTemplate(
            template=template,
            input_variables=['fact', 'n', 'document', 'format']
        )

num_questions_per_doc = 5

FactQAGenerator = FactQAGenerator()

def generateFactQuestions(df):
    data = []
    # Generate the data
    for index, row in tqdm(df.iterrows(), total=df.shape[0],desc="Generating Fact QAs"):
        doc = row['Document']
        # Assuming phone numbers are separated by commas
        facts = row['Context Entities']
        try:
            for i in range(2):
                for entity in extract_desired_entities(facts)[:5]:
                    fact = entity["value"]
                    prompt = base_prompt.partial(
                        n=num_questions_per_doc,
                        document=doc)
                    try:
                        qa_list = FactQAGenerator.generate_question_for_fact(prompt, fact)
                        for qa in qa_list:
                            new_row = row.to_dict()
                            new_row['Question'] = qa.question
                            new_row['Answer'] = qa.answer
                            new_row['Fact'] = entity
                            print(entity)
                            data.append(new_row)
                    except:
                        pass
        except Exception as e:
                print(f"Failed to generate QA for document: {doc[:20]}. Error: {e:20}")

    if(len(data) == 0):
        return pd.DataFrame()

    result = pd.DataFrame(data)
    result = result.dropna(subset=['Document', 'Question', 'Answer'])
    result = result[(result['Document'] != '') & (result['Question'] != '') & (result['Answer'] != '')]
    return result

In [6]:
def update_df_with_lambda(df, column_name, lambda_function):
     # Check if the column does not exist in the DataFrame, if not, initialize it with NA
    if column_name not in df.columns:
        df[column_name] = pd.NA
    
    # Apply the lambda function only to rows where the column value is NA
    df[column_name] = df.apply(lambda row: lambda_function(row) if pd.isna(row[column_name]) else row[column_name], axis=1)
    return df

In [7]:
from answer_context_filter import calculate_bleu_score, calculate_rouge_score

def calculate_scores(df):
    # Calculate BLEU and ROUGE scores
    #df['Question-Context BLEU'] = df.apply(lambda row: calculate_bleu_score(row['Document'], row['Question']), axis=1)
    #df['Answer-Context BLEU'] = df.apply(lambda row: calculate_bleu_score(row['Document'], row['Answer']), axis=1)

    #df['Question-Context ROUGE'] = df.apply(lambda row: calculate_rouge_score(row['Document'], row['Question']), axis=1)
    #df['Answer-Context ROUGE'] = df.apply(lambda row: calculate_rouge_score(row['Document'], row['Answer']), axis=1)
    #df['Question-Context ROUGE-L F1'] = df['Question-Context ROUGE'].apply(lambda x: x[0]['rouge-l']['f'] if x else None)
    #df['Answer-Context ROUGE-L F1'] = df['Answer-Context ROUGE'].apply(lambda x: x[0]['rouge-l']['f'] if x else None)

    df = update_df_with_lambda(
        df, 'Question-Context ROUGE',
        lambda row: calculate_rouge_score(row['Document'], row['Question']) if pd.isna(row.get('Question-Context ROUGE')) else row['Question-Context ROUGE']
    )

    # Update 'Answer-Context ROUGE' only if it is NA
    df = update_df_with_lambda(
        df, 'Answer-Context ROUGE',
        lambda row: calculate_rouge_score(row['Document'], row['Answer']) if pd.isna(row.get('Answer-Context ROUGE')) else row['Answer-Context ROUGE']
    )

    # Update 'Question-Context ROUGE-L F1' based on the 'Question-Context ROUGE' column, only if it is NA
    df = update_df_with_lambda(
        df, 'Question-Context ROUGE-L F1',
        lambda row: row['Question-Context ROUGE'][0]['rouge-l']['f'] if pd.isna(row.get('Question-Context ROUGE-L F1')) and row['Question-Context ROUGE'] else None
    )

    # Update 'Answer-Context ROUGE-L F1' based on the 'Answer-Context ROUGE' column, only if it is NA
    df = update_df_with_lambda(
        df, 'Answer-Context ROUGE-L F1',
        lambda row: row['Answer-Context ROUGE'][0]['rouge-l']['f'] if pd.isna(row.get('Answer-Context ROUGE-L F1')) and row['Answer-Context ROUGE'] else None
    )

    return df


def rougeFilter(df):
    df = calculate_scores(df)
    df = df[df['Answer-Context ROUGE-L F1'] > 0.1]
    return df

## Vector filter

In [8]:
from vector_filter import precompute_embeddings
from vector_filter import filter_dataframe

def vectorFilter(df):
    df["Embedding"] = precompute_embeddings(df, 'Question')
    df = filter_dataframe(df, threshold=0.9)
    return df

## Entity Filter

In [9]:
def contains_all_elements(list1, list2):
    """
    Check if list1 contains all elements of list2.
    Each element is a dictionary with 'entityType' and 'value' keys.
    """
    # Convert each dictionary in the lists to a tuple (entityType, value) for easy comparison
    #set1 = {tuple(d.items()) for d in list1}
    #set2 = {tuple(d.items()) for d in list2}
    set1 = {d['value'] for d in list1 if 'value' in d}
    set2 = {d['value'] for d in list2 if 'value' in d}

    # Check if every element in set2 is also in set1
    return set2.issubset(set1)

def check_entity_match(row):
    return contains_all_elements(row['Context Entities'], row['Answer Entities'])

# Apply the function to each row to create the new column


In [10]:
from extract import extract_all_entities

def extractEntities(df):
    df['Context Entities'] = df['Document'].apply(extract_all_entities)
    df['Question Entities'] = df['Question'].apply(extract_all_entities)
    df['Answer Entities'] = df['Answer'].apply(extract_all_entities)
    df['Entity Match'] = df.apply(check_entity_match, axis=1)
    """  
    df = update_df_with_lambda(df, 'Context Entities', 
                               lambda row: extract_all_entities(row['Document']) 
                                if row.get('Context Entities') is None else row['Context Entities'])

    # Update or create 'Question Entities' column by applying entity extraction function if the value is NA
    df = update_df_with_lambda(df, 'Question Entities', 
                               lambda row: extract_all_entities(row['Question']) 
                               if row.get('Question Entities')is None else row['Question Entities'])

    # Update or create 'Answer Entities' column by applying entity extraction function if the value is NA
    df = update_df_with_lambda(df, 'Answer Entities', 
                               lambda row: extract_all_entities(row['Answer']) 
                               if row.get('Answer Entities') is None else row['Answer Entities'])

    # Update or create 'Entity Match' column by applying the check entity match function if the value is NA
    df = update_df_with_lambda(df, 'Entity Match', 
                               lambda row: check_entity_match(row) 
                               if row.get('Entity Match') is None else row['Entity Match'])
    """
    return df



  return torch._C._cuda_getDeviceCount() > 0
2024-01-18 14:20:21.527565: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-18 14:20:21.656910: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-18 14:20:22.378435: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/lib:/usr/local/nvidia/lib:/usr/

2024-01-18 14:20:37,966 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-PER, S-LOC, B-MISC, E-MISC, B-ORG, E-ORG, I-ORG, I-PER, B-LOC, I-LOC, E-LOC, I-MISC, <START>, <STOP>


In [11]:
def contains_desired_entity(entities_list):
    desired_types = {'Phone', 'Email'}
    return any(entity['entityType'] in desired_types for entity in entities_list)


def filterFactQuestions(df):
    df = extractEntities(df)
    return df[~((df['Question'].str.split().str.len() < 30) & df['Answer Entities'].apply(contains_desired_entity))]

def extract_desired_entities(entities_list):
    # Define the desired entity types
    desired_types = {'Phone', 'Email'}
    
    # Extract entities that match the desired types
    matching_entities = [entity for entity in entities_list if entity['entityType'] in desired_types]
    
    return matching_entities

In [12]:
def filterFactQuestionCorrectness(df):
    df['Answer Entities'] = df['Answer'].apply(extract_all_entities)
    # Apply a filter condition to each row
    #filtered_df = df[df.apply(lambda row: row["Fact"] in row["Answer Entities"], axis=1)]
    filtered_df = df[df.apply(lambda row: any( row["Fact"]['value'] == entity['value'] for entity in row["Answer Entities"]) if isinstance(row["Answer Entities"], list) else False, axis=1)]
    return filtered_df

#### LLM Scoring

In [13]:
from llm_filter import estimate_relevance

def llmScore(df):
    return update_df_with_lambda(df, 'LLM Score', lambda row: estimate_relevance(row['Question'], row['Document']))

# Run

In [14]:
import os

sample = loadData(fname, basepath)

df = pd.DataFrame(columns = sample.columns)



for i in tqdm(range(5), desc="Processing"):
    raw_file_path = f"{basepath}/output_raw_{i}_{fname}.csv"

    if os.path.exists(raw_file_path):
        # Load the file into a DataFrame if it exists
        print(f"Skipping generation: {i}")
        new_questions_df = pd.read_csv(raw_file_path)
    else:
        df_gen = sample[~sample['Document'].isin(df['Document'])]
        new_questions_df = generateQuestions(df_gen)
        if df_gen.empty:
            break
    new_questions_df.to_csv(raw_file_path)
    df = pd.concat([df, new_questions_df], ignore_index=True, sort=False)
    df = rougeFilter(df)
    df = vectorFilter(df)
    df = filterFactQuestions(df)
    df = llmScore(df)


## take top one for each doc
idx = df.groupby('Document')['LLM Score'].idxmax()
df = df.loc[idx]

df.to_csv(f"{basepath}/output_full_{fname}.csv")
df.to_csv(f"{basepath}/output_{fname}.csv", columns = ['topic_id', 'filename', 'header', 'section_text', 'Question', 'Answer'])


Processing:   0%|          | 0/5 [00:00<?, ?it/s]
Generating QAs:   0%|          | 0/200 [00:00<?, ?it/s][A
Generating QAs:   0%|          | 1/200 [00:13<45:55, 13.85s/it][A
Generating QAs:   1%|          | 2/200 [00:27<44:32, 13.50s/it][A
Generating QAs:   2%|▏         | 3/200 [00:39<42:37, 12.98s/it][A
Generating QAs:   2%|▏         | 4/200 [00:49<38:07, 11.67s/it][A
Generating QAs:   2%|▎         | 5/200 [00:57<33:50, 10.41s/it][A
Generating QAs:   3%|▎         | 6/200 [01:08<34:48, 10.76s/it][A
Generating QAs:   4%|▎         | 7/200 [01:17<32:46, 10.19s/it][A

Failed to generate QA for doc: 31.0: Acupunctuur  (Nocepta): Nacontrole: In overleg met u wordt een afspraak voor de volgende behan. Error: 'list' object has no attribute 'qa_list'



Generating QAs:   4%|▍         | 8/200 [01:31<35:43, 11.17s/it][A
Generating QAs:   4%|▍         | 9/200 [01:55<48:28, 15.23s/it][A
Generating QAs:   5%|▌         | 10/200 [02:16<53:52, 17.01s/it][A
Generating QAs:   6%|▌         | 11/200 [02:29<49:55, 15.85s/it][A
Generating QAs:   6%|▌         | 12/200 [02:48<53:13, 16.99s/it][A
Generating QAs:   6%|▋         | 13/200 [02:58<45:25, 14.57s/it][A
Generating QAs:   7%|▋         | 14/200 [03:19<51:52, 16.74s/it][A
Generating QAs:   8%|▊         | 15/200 [03:36<51:49, 16.81s/it][A
Generating QAs:   8%|▊         | 16/200 [03:57<55:38, 18.14s/it][A
Generating QAs:   8%|▊         | 17/200 [04:12<52:00, 17.05s/it][A
Generating QAs:   9%|▉         | 18/200 [04:26<48:59, 16.15s/it][A
Generating QAs:  10%|▉         | 19/200 [04:39<45:52, 15.21s/it][A
Generating QAs:  10%|█         | 20/200 [04:50<41:58, 13.99s/it][A
Generating QAs:  10%|█         | 21/200 [04:59<37:27, 12.56s/it][A
Generating QAs:  11%|█         | 22/200 [05:11<36

Failed to generate QA for doc: 0.0: Cystolithotripsie: Wanneer neemt u contact op?: U neemt contact op bij: 
- Koorts hoger dan 38,. Error: 'list' object has no attribute 'qa_list'



Generating QAs:  16%|█▌        | 32/200 [07:56<48:10, 17.20s/it][A
Generating QAs:  16%|█▋        | 33/200 [08:18<51:41, 18.57s/it][A
Generating QAs:  17%|█▋        | 34/200 [08:35<50:02, 18.09s/it][A
Generating QAs:  18%|█▊        | 35/200 [08:50<47:01, 17.10s/it][A
Generating QAs:  18%|█▊        | 36/200 [09:11<50:37, 18.52s/it][A
Generating QAs:  18%|█▊        | 37/200 [09:24<45:45, 16.85s/it][A
Generating QAs:  19%|█▉        | 38/200 [09:33<38:37, 14.31s/it][A
Generating QAs:  20%|█▉        | 39/200 [09:56<45:19, 16.89s/it][A
Generating QAs:  20%|██        | 40/200 [10:09<42:32, 15.95s/it][A
Generating QAs:  20%|██        | 41/200 [10:25<41:48, 15.78s/it][A
Generating QAs:  21%|██        | 42/200 [10:43<43:03, 16.35s/it][A
Generating QAs:  22%|██▏       | 43/200 [10:53<38:18, 14.64s/it][A
Generating QAs:  22%|██▏       | 44/200 [11:13<41:49, 16.09s/it][A
Generating QAs:  22%|██▎       | 45/200 [11:33<44:45, 17.33s/it][A

Failed to generate QA for doc: 0.0: Polikliniek kindergeneeskunde: Bereikbaarheid van de polikliniek: De polikliniek kindergeneesku. Error: 'list' object has no attribute 'qa_list'



Generating QAs:  23%|██▎       | 46/200 [11:45<40:48, 15.90s/it][A
Generating QAs:  24%|██▎       | 47/200 [12:02<40:44, 15.97s/it][A
Generating QAs:  24%|██▍       | 48/200 [12:11<35:26, 13.99s/it][A
Generating QAs:  24%|██▍       | 49/200 [12:29<38:35, 15.33s/it][A
Generating QAs:  25%|██▌       | 50/200 [12:45<38:28, 15.39s/it][A
Generating QAs:  26%|██▌       | 51/200 [12:59<37:10, 14.97s/it][A
Generating QAs:  26%|██▌       | 52/200 [13:10<34:14, 13.88s/it][A
Generating QAs:  26%|██▋       | 53/200 [13:22<32:10, 13.13s/it][A
Generating QAs:  27%|██▋       | 54/200 [13:31<29:03, 11.94s/it][A
Generating QAs:  28%|██▊       | 55/200 [13:39<26:21, 10.91s/it][A
Generating QAs:  28%|██▊       | 56/200 [13:48<24:52, 10.37s/it][A
Generating QAs:  28%|██▊       | 57/200 [14:03<27:50, 11.68s/it][A
Generating QAs:  29%|██▉       | 58/200 [14:13<26:35, 11.23s/it][A
Generating QAs:  30%|██▉       | 59/200 [14:24<26:18, 11.20s/it][A
Generating QAs:  30%|███       | 60/200 [14:38<

Failed to generate QA for doc: 4.0: Fistulografie: 23: Foto’s van fistelgang (Fistulografie) Uw behandelend specialist heeft u voor. Error: 'list' object has no attribute 'qa_list'



Generating QAs:  60%|█████▉    | 119/200 [29:52<18:44, 13.88s/it][A
Generating QAs:  60%|██████    | 120/200 [30:02<16:45, 12.56s/it][A
Generating QAs:  60%|██████    | 121/200 [30:13<15:48, 12.01s/it][A
Generating QAs:  61%|██████    | 122/200 [30:21<14:10, 10.91s/it][A
Generating QAs:  62%|██████▏   | 123/200 [30:34<14:57, 11.65s/it][A
Generating QAs:  62%|██████▏   | 124/200 [30:46<14:37, 11.54s/it][A
Generating QAs:  62%|██████▎   | 125/200 [30:54<13:23, 10.72s/it][A
Generating QAs:  63%|██████▎   | 126/200 [31:25<20:39, 16.76s/it][A
Generating QAs:  64%|██████▎   | 127/200 [31:26<14:21, 11.81s/it][A

Failed to generate QA for doc: 7.0: Operatief verwijderen van de baarmoeder: Gevolgen van een baarmoederverwijdering: - Geen menstr. Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 4097 tokens, however you requested 4133 tokens (2085 in your prompt; 2048 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}



Generating QAs:  64%|██████▍   | 128/200 [31:46<17:18, 14.42s/it][A
Generating QAs:  64%|██████▍   | 129/200 [32:01<17:07, 14.47s/it][A
Generating QAs:  65%|██████▌   | 130/200 [32:30<22:03, 18.91s/it][A
Generating QAs:  66%|██████▌   | 131/200 [32:40<18:31, 16.10s/it][A
Generating QAs:  66%|██████▌   | 132/200 [32:56<18:25, 16.25s/it][A
Generating QAs:  66%|██████▋   | 133/200 [33:07<16:26, 14.72s/it][A
Generating QAs:  67%|██████▋   | 134/200 [33:20<15:35, 14.18s/it][A
Generating QAs:  68%|██████▊   | 135/200 [33:28<13:23, 12.36s/it][A
Generating QAs:  68%|██████▊   | 136/200 [33:37<12:08, 11.39s/it][A
Generating QAs:  68%|██████▊   | 137/200 [33:47<11:21, 10.81s/it][A
Generating QAs:  69%|██████▉   | 138/200 [34:01<12:06, 11.72s/it][A
Generating QAs:  70%|██████▉   | 139/200 [34:11<11:37, 11.43s/it][A
Generating QAs:  70%|███████   | 140/200 [34:24<11:51, 11.86s/it][A
Generating QAs:  70%|███████   | 141/200 [34:49<15:24, 15.67s/it][A
Generating QAs:  71%|███████   | 

Failed to generate QA for doc: 48.0: RS-virus: Tot slot: Deze folder betreft een algemene voorlichting en is bedoeld als extra info. Error: 'list' object has no attribute 'qa_list'



Generating QAs:  92%|█████████▎| 185/200 [46:23<02:51, 11.41s/it][A
Generating QAs:  93%|█████████▎| 186/200 [46:57<04:15, 18.23s/it][A
Generating QAs:  94%|█████████▎| 187/200 [47:09<03:32, 16.33s/it][A
Generating QAs:  94%|█████████▍| 188/200 [47:28<03:25, 17.11s/it][A
Generating QAs:  94%|█████████▍| 189/200 [47:41<02:53, 15.77s/it][A
Generating QAs:  95%|█████████▌| 190/200 [48:03<02:58, 17.81s/it][A
Generating QAs:  96%|█████████▌| 191/200 [48:23<02:45, 18.34s/it][A
Generating QAs:  96%|█████████▌| 192/200 [48:41<02:25, 18.24s/it][A
Generating QAs:  96%|█████████▋| 193/200 [49:00<02:10, 18.67s/it][A
Generating QAs:  97%|█████████▋| 194/200 [49:15<01:44, 17.45s/it][A
Generating QAs:  98%|█████████▊| 195/200 [49:31<01:25, 17.12s/it][A
Generating QAs:  98%|█████████▊| 196/200 [49:44<01:02, 15.66s/it][A
Generating QAs:  98%|█████████▊| 197/200 [49:52<00:40, 13.41s/it][A
Generating QAs:  99%|█████████▉| 198/200 [50:07<00:27, 13.85s/it][A
Generating QAs: 100%|█████████▉| 

Failed to generate QA for doc: 0.0: Cystolithotripsie: Wanneer neemt u contact op?: U neemt contact op bij: 
- Koorts hoger dan 38,. Error: 'list' object has no attribute 'qa_list'



Generating QAs:  23%|██▎       | 5/22 [01:26<04:28, 15.82s/it][A
Generating QAs:  27%|██▋       | 6/22 [01:44<04:26, 16.65s/it][A
Generating QAs:  32%|███▏      | 7/22 [01:56<03:46, 15.11s/it][A
Generating QAs:  36%|███▋      | 8/22 [02:12<03:34, 15.31s/it][A
Generating QAs:  41%|████      | 9/22 [02:26<03:13, 14.89s/it][A
Generating QAs:  45%|████▌     | 10/22 [02:38<02:49, 14.13s/it][A
Generating QAs:  50%|█████     | 11/22 [02:48<02:22, 12.94s/it][A
Generating QAs:  55%|█████▍    | 12/22 [02:58<01:58, 11.87s/it][A
Generating QAs:  59%|█████▉    | 13/22 [03:13<01:55, 12.83s/it][A

Failed to generate QA for doc: 0.0: CT urografie: Vragen: Mocht u na het lezen van deze informatie nog vragen hebben, stel deze ger. Error: 'list' object has no attribute 'qa_list'



Generating QAs:  64%|██████▎   | 14/22 [03:23<01:35, 11.98s/it][A
Generating QAs:  68%|██████▊   | 15/22 [03:38<01:30, 12.95s/it][A
Generating QAs:  73%|███████▎  | 16/22 [03:51<01:18, 13.10s/it][A
Generating QAs:  77%|███████▋  | 17/22 [03:52<00:46,  9.27s/it][A

Failed to generate QA for doc: 7.0: Operatief verwijderen van de baarmoeder: Gevolgen van een baarmoederverwijdering: - Geen menstr. Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 4097 tokens, however you requested 4135 tokens (2087 in your prompt; 2048 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}



Generating QAs:  82%|████████▏ | 18/22 [04:09<00:46, 11.62s/it][A
Generating QAs:  86%|████████▋ | 19/22 [04:18<00:32, 10.93s/it][A
Generating QAs:  91%|█████████ | 20/22 [04:37<00:26, 13.47s/it][A
Generating QAs:  95%|█████████▌| 21/22 [04:56<00:15, 15.06s/it][A

Failed to generate QA for doc: 52.0: Privacy en persoonsgegevens (privacyverklaring): Het gebruik van persoonsgegevens door ZGT: ZG. Error: 'list' object has no attribute 'qa_list'



Generating QAs: 100%|██████████| 22/22 [05:18<00:00, 14.48s/it][A
Processing:  40%|████      | 2/5 [1:23:59<1:50:37, 2212.56s/it]
Generating QAs:   0%|          | 0/13 [00:00<?, ?it/s][A
Generating QAs:   8%|▊         | 1/13 [00:29<05:50, 29.19s/it][A
Generating QAs:  15%|█▌        | 2/13 [00:39<03:20, 18.27s/it][A
Generating QAs:  23%|██▎       | 3/13 [00:57<02:58, 17.83s/it][A
Generating QAs:  31%|███       | 4/13 [01:10<02:24, 16.00s/it][A
Generating QAs:  38%|███▊      | 5/13 [01:23<02:01, 15.15s/it][A
Generating QAs:  46%|████▌     | 6/13 [01:37<01:41, 14.45s/it][A
Generating QAs:  54%|█████▍    | 7/13 [01:48<01:21, 13.55s/it][A
Generating QAs:  62%|██████▏   | 8/13 [02:05<01:12, 14.46s/it][A
Generating QAs:  69%|██████▉   | 9/13 [02:05<00:40, 10.02s/it][A

Failed to generate QA for doc: 7.0: Operatief verwijderen van de baarmoeder: Gevolgen van een baarmoederverwijdering: - Geen menstr. Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 4097 tokens, however you requested 4135 tokens (2087 in your prompt; 2048 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}



Generating QAs:  77%|███████▋  | 10/13 [02:15<00:30, 10.13s/it][A
Generating QAs:  85%|████████▍ | 11/13 [02:25<00:19,  9.89s/it][A
Generating QAs:  92%|█████████▏| 12/13 [02:35<00:10, 10.16s/it][A
Generating QAs: 100%|██████████| 13/13 [02:46<00:00, 12.84s/it][A
Processing:  60%|██████    | 3/5 [1:34:27<49:37, 1488.84s/it]  
Generating QAs:   0%|          | 0/9 [00:00<?, ?it/s][A
Generating QAs:  11%|█         | 1/9 [00:19<02:38, 19.78s/it][A
Generating QAs:  22%|██▏       | 2/9 [00:28<01:33, 13.31s/it][A
Generating QAs:  33%|███▎      | 3/9 [00:40<01:16, 12.81s/it][A
Generating QAs:  44%|████▍     | 4/9 [00:49<00:56, 11.26s/it][A
Generating QAs:  56%|█████▌    | 5/9 [01:02<00:46, 11.72s/it][A
Generating QAs:  67%|██████▋   | 6/9 [01:02<00:23,  7.83s/it][A

Failed to generate QA for doc: 7.0: Operatief verwijderen van de baarmoeder: Gevolgen van een baarmoederverwijdering: - Geen menstr. Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 4097 tokens, however you requested 4127 tokens (2079 in your prompt; 2048 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}



Generating QAs:  78%|███████▊  | 7/9 [01:12<00:16,  8.42s/it][A
Generating QAs:  89%|████████▉ | 8/9 [01:37<00:13, 13.78s/it][A
Generating QAs: 100%|██████████| 9/9 [01:50<00:00, 12.27s/it][A
Processing:  80%|████████  | 4/5 [1:43:51<18:43, 1123.75s/it]
Generating QAs:   0%|          | 0/8 [00:00<?, ?it/s][A
Generating QAs:  12%|█▎        | 1/8 [00:19<02:15, 19.36s/it][A
Generating QAs:  25%|██▌       | 2/8 [00:33<01:38, 16.47s/it][A
Generating QAs:  38%|███▊      | 3/8 [01:02<01:50, 22.03s/it][A
Generating QAs:  50%|█████     | 4/8 [01:15<01:14, 18.54s/it][A
Generating QAs:  62%|██████▎   | 5/8 [01:41<01:03, 21.20s/it][A
Generating QAs:  75%|███████▌  | 6/8 [01:41<00:28, 14.08s/it][A

Failed to generate QA for doc: 7.0: Operatief verwijderen van de baarmoeder: Gevolgen van een baarmoederverwijdering: - Geen menstr. Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 4097 tokens, however you requested 4126 tokens (2078 in your prompt; 2048 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}



Generating QAs:  88%|████████▊ | 7/8 [01:54<00:13, 13.68s/it][A
Generating QAs: 100%|██████████| 8/8 [02:08<00:00, 16.12s/it][A
Processing: 100%|██████████| 5/5 [1:53:23<00:00, 1360.78s/it]


## Export Q&A Pairs

In [15]:
df.to_csv(f"{basepath}/output_full_{fname}.csv")
df.to_csv(f"{basepath}/output_{fname}.csv", columns = ['topic_id', 'filename', 'header', 'section_text', 'Question', 'Answer'])

In [16]:
df

Unnamed: 0,topic_id,filename,header,section_text,Document,Question,Answer,Question-Context ROUGE,Answer-Context ROUGE,Question-Context ROUGE-L F1,Answer-Context ROUGE-L F1,Embedding,Context Entities,Question Entities,Answer Entities,Entity Match,LLM Score
29,0.0,(Langdurig) gebroken vliezen,Contact opnemen,Wij vragen u contact op te nemen als: \n- Uw t...,0.0: (Langdurig) gebroken vliezen: Contact opn...,Moet ik contact opnemen als mijn temperatuur h...,"Ja, u moet contact opnemen als uw temperatuur ...","[{'rouge-1': {'r': 0.1206896551724138, 'p': 0....","[{'rouge-1': {'r': 0.1896551724137931, 'p': 0....",0.162162,0.213333,"[-0.004962361079971868, -0.041561738727993854,...",[],[],[],True,100.0
156,0.0,Aambeien (heamorroiden),Vragen,Deze folder is niet bedoeld als vervanging van...,0.0: Aambeien (heamorroiden): Vragen: Deze fol...,Waar kan ik terecht voor vragen over aambeien ...,U kunt op werkdagen bellen met het secretariaa...,"[{'rouge-1': {'r': 0.02531645569620253, 'p': 0...","[{'rouge-1': {'r': 0.189873417721519, 'p': 0.7...",0.044444,0.300000,"[-0.007706894348600569, -0.030702567838503225,...","[{'entityType': 'Phone', 'value': '+3188708523...",[],[],True,100.0
135,0.0,Anale fissuur,Vragen,Deze brochure is niet bedoeld als vervanging v...,0.0: Anale fissuur: Vragen: Deze brochure is n...,Kan deze brochure een mondelinge informatie ve...,"Nee, deze brochure is niet bedoeld als vervang...","[{'rouge-1': {'r': 0.045454545454545456, 'p': ...","[{'rouge-1': {'r': 0.13636363636363635, 'p': 0...",0.082192,0.233766,"[-0.021418902809556963, -0.008482350689619944,...","[{'entityType': 'Phone', 'value': '+3188708523...",[],[],True,100.0
557,0.0,Anale fissuur,Vragen,Deze brochure is niet bedoeld als vervanging v...,0.0: Anale fissuur: Vragen: Deze brochure is n...,Wanneer is het mogelijk om meer informatie te ...,U kunt meer informatie over de proctologiepoli...,"[{'rouge-1': {'r': 0.10126582278481013, 'p': 0...","[{'rouge-1': {'r': 0.08860759493670886, 'p': 0...",0.175824,0.155556,"[0.008837624754946205, -0.014453528011422483, ...","[{'entityType': 'Phone', 'value': '+3188708523...",[],[],True,100.0
177,0.0,"Angiografie, dotterbehandeling en_of stentplaa...",Neem contact op indien,"- U koorts krijgt (38,5°C of hoger). \n- De li...","0.0: Angiografie, dotterbehandeling en_of sten...",Wat moet ik doen als mijn lies rood verkleurt ...,Als uw lies rood verkleurt en erg warm aanvoel...,"[{'rouge-1': {'r': 0.13432835820895522, 'p': 0...","[{'rouge-1': {'r': 0.29850746268656714, 'p': 0...",0.209302,0.371134,"[-0.016909901298582258, -0.029433875324452668,...","[{'entityType': 'Phone', 'value': '+3188708524...",[],[],True,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,7.0,Serotiniteit,Wat is serotiniteit?,Als de bevalling twee weken na de uitgerekende...,7.0: Serotiniteit: Wat is serotiniteit?: Als d...,Wat is serotiniteit?,Serotiniteit is de medische term voor overdrag...,"[{'rouge-1': {'r': 0.05263157894736842, 'p': 0...","[{'rouge-1': {'r': 0.34210526315789475, 'p': 0...",0.097561,0.440678,"[0.019936117249290446, -0.024929900769007817, ...",[],[],[],True,100.0
17,8.0,"Borstvorming bij mannen _ gynaecomastie, behan...",Borstvorming bij mannen,Gynaecomastie Deze folder geeft u een globaal ...,"8.0: Borstvorming bij mannen _ gynaecomastie, ...",Wat is gynaecomastie?,Gynaecomastie is borstvorming bij mannen. Het ...,"[{'rouge-1': {'r': 0.021739130434782608, 'p': ...","[{'rouge-1': {'r': 0.1956521739130435, 'p': 0....",0.040816,0.262295,"[-0.023004339812470277, -0.028726620355583557,...",[],[],[],True,100.0
370,8.0,"LAR-Syndroom, vervolgbehandeling",Vervolgbehandeling Low Anterior Resectie-,Syndroom; “LAR-Syndroom” Na een endeldarmopera...,"8.0: LAR-Syndroom, vervolgbehandeling: Vervolg...",Wat is het LAR-Syndroom?,Het LAR-Syndroom is een verzameling van klacht...,"[{'rouge-1': {'r': 0.028985507246376812, 'p': ...","[{'rouge-1': {'r': 0.13043478260869565, 'p': 0...",0.054795,0.148148,"[0.00635609825801561, -0.012207640401298138, 0...","[{'entityType': 'MISC', 'value': 'LAR-Syndroom...","[{'entityType': 'MISC', 'value': 'LAR-Syndroom'}]","[{'entityType': 'MISC', 'value': 'LAR-Syndroom'}]",True,100.0
366,8.0,Verstuikte enkel,Verstuikte enkel,(enkelbanddistorsie) Deze folder geeft u een g...,8.0: Verstuikte enkel: Verstuikte enkel: (enke...,Wat is een verstuikte enkel?,"Een verstuikte enkel, ook wel enkelbanddistors...","[{'rouge-1': {'r': 0.08333333333333333, 'p': 0...","[{'rouge-1': {'r': 0.16666666666666666, 'p': 0...",0.146341,0.181818,"[0.00675508180116059, -0.021176334045558038, 0...",[],[],[],True,100.0


# Fact Q&A

In [17]:
sample = loadData(fname, basepath)

sample['Context Entities'] = sample['Document'].apply(extract_all_entities)

new_questions_df = generateFactQuestions(sample)
new_questions_df.to_csv(f"{basepath}/output_fact_raw_{fname}.csv")
df = vectorFilter(new_questions_df)
df = llmScore(df)
df = filterFactQuestionCorrectness(df)

## take top one for each doc
df['Fact Value'] = df['Fact'].apply(lambda x: x.get('value'))
idx = df.groupby(['Document', 'Fact Value'])['LLM Score'].idxmax()
df = df.loc[idx]

Generating Fact QAs:   0%|          | 0/200 [00:00<?, ?it/s]

{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}


Generating Fact QAs:   4%|▍         | 9/200 [00:09<03:22,  1.06s/it]

{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Email', 'value': 'info@heupafwijkingen.nl'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083

Generating Fact QAs:   8%|▊         | 17/200 [00:27<05:19,  1.75s/it]

{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Pho

Generating Fact QAs:  14%|█▎        | 27/200 [00:49<05:41,  1.98s/it]

{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Email', 'value': 'cardiologie@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}


Generating Fact QAs:  14%|█▍        | 29/200 [00:58<06:32,  2.29s/it]

{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887087075'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}
{'entityType': 'Phone', 'value': '+31887083390'}


Generating Fact QAs:  15%|█▌        | 30/200 [01:14<09:36,  3.39s/it]

{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}


Generating Fact QAs:  16%|█▋        | 33/200 [01:22<08:56,  3.21s/it]

{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Email', 'value': 'hartrevalidatie@zgt.nl'}
{'entityType': 'Email', 'value': 'hartrevalidatie@zgt.nl'}
{'entityType': 'Email', 'value': 'hartrevalidatie@zgt.nl'}
{'entityType': 'Email', 'value': 'hartrevalidatie@zgt.nl'}
{'entityType': 'Email', 'value': 'hartrevalidatie@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Phone', 'value': '+31887083300'}
{'entityType': 'Pho

Generating Fact QAs:  18%|█▊        | 35/200 [01:41<12:13,  4.45s/it]

{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}
{'entityType': 'Phone', 'value': '+31243010350'}


Generating Fact QAs:  18%|█▊        | 36/200 [01:53<14:47,  5.41s/it]

{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Email', 'value': 'secretariaat@freya.nl'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}


Generating Fact QAs:  19%|█▉        | 38/200 [02:01<13:21,  4.95s/it]

{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}


Generating Fact QAs:  20%|██        | 40/200 [02:15<14:44,  5.53s/it]

{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}


Generating Fact QAs:  20%|██        | 41/200 [02:24<16:04,  6.07s/it]

{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083130'}
{'entityType': 'Phone', 'value': '+31887083310'}
{'entityType': 'Phone', 'value': '+31887083310'}
{'entityType': 'Phone', 'value': '+31887083310'}
{'entityType': 'Phone', 'value': '+31887083310'}
{'entityType': 'Phone', 'value': '+31887083310'}
{'entityType': 'Phone', 'value': '+31887085200'}
{'entityType': 'Phone', 'value': '+31887085200'}
{'entityType': 'Phone', 'value': '+31887085200'}
{'entityType': 'Phone', 'value': '+31887085200'}
{'entityType': 'Phone', 'value': '+31887085200'}
{'entityType': 'Phone', 'value': '+31887083210'}
{'entityType': 'Phone', 'value': '+31887083210'}
{'entityType': 'Phone', 'value': '+31887083210'}
{'entityType': 'Phone', 'value': '+31887083210'}
{'entityType': 'Phone', 'value': '+31887083210'}
{'entityType': 'Phon

Generating Fact QAs:  22%|██▏       | 44/200 [03:10<25:57,  9.99s/it]

{'entityType': 'Phone', 'value': '+31887085311'}
{'entityType': 'Phone', 'value': '+31887085311'}
{'entityType': 'Phone', 'value': '+31887085311'}
{'entityType': 'Phone', 'value': '+31887085311'}
{'entityType': 'Phone', 'value': '+31887085311'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}


Generating Fact QAs:  22%|██▎       | 45/200 [03:19<25:12,  9.76s/it]

{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Email', 'value': 'kinderartsen@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}


Generating Fact QAs:  23%|██▎       | 46/200 [03:28<24:44,  9.64s/it]

{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}


Generating Fact QAs:  24%|██▎       | 47/200 [03:35<23:03,  9.04s/it]

{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phon

Generating Fact QAs:  24%|██▍       | 48/200 [04:11<38:48, 15.32s/it]

{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}
{'entityType': 'Phone', 'value': '+31887084320'}


Generating Fact QAs:  24%|██▍       | 49/200 [04:26<38:12, 15.18s/it]

{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083313'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887085513'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phone', 'value': '+31887083667'}
{'entityType': 'Phon

Generating Fact QAs:  26%|██▌       | 51/200 [04:48<33:16, 13.40s/it]

{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}


Generating Fact QAs:  26%|██▌       | 52/200 [05:06<36:10, 14.66s/it]

{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}


Generating Fact QAs:  26%|██▋       | 53/200 [05:21<36:01, 14.70s/it]

{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887084450'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}


Generating Fact QAs:  27%|██▋       | 54/200 [05:29<31:05, 12.77s/it]

{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}


Generating Fact QAs:  28%|██▊       | 55/200 [05:36<27:08, 11.23s/it]

{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}


Generating Fact QAs:  28%|██▊       | 56/200 [05:42<23:45,  9.90s/it]

{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887083350'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}


Generating Fact QAs:  28%|██▊       | 57/200 [06:02<30:08, 12.65s/it]

{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}


Generating Fact QAs:  30%|██▉       | 59/200 [06:10<21:01,  8.95s/it]

{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887085231'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}


Generating Fact QAs:  30%|███       | 60/200 [06:17<19:40,  8.43s/it]

{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}


Generating Fact QAs:  32%|███▏      | 64/200 [06:25<10:34,  4.66s/it]

{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887087070'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}


Generating Fact QAs:  33%|███▎      | 66/200 [06:34<10:23,  4.66s/it]

{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083120'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}


Generating Fact QAs:  34%|███▎      | 67/200 [06:43<11:48,  5.32s/it]

{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}


Generating Fact QAs:  34%|███▍      | 68/200 [06:51<12:50,  5.83s/it]

{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Email', 'value': 'secretariaat@schisis-team.nl'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}


Generating Fact QAs:  34%|███▍      | 69/200 [06:59<13:40,  6.26s/it]

{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887083110'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Email', 'value': 'mammapoli@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}
{'entityType': 'Phone', 'value': '+31887085232'}


Generating Fact QAs:  35%|███▌      | 70/200 [07:14<18:28,  8.53s/it]

{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}
{'entityType': 'Phone', 'value': '+31887085243'}


Generating Fact QAs:  36%|███▌      | 71/200 [07:38<26:41, 12.41s/it]

{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887087878'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}


Generating Fact QAs:  36%|███▌      | 72/200 [07:46<24:04, 11.29s/it]

{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083700'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}


Generating Fact QAs:  36%|███▋      | 73/200 [07:53<21:14, 10.03s/it]

{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083400'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}


Generating Fact QAs:  58%|█████▊    | 116/200 [08:01<01:02,  1.35it/s]

{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887083620'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}


Generating Fact QAs:  68%|██████▊   | 136/200 [08:09<00:38,  1.66it/s]

{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887087010'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}
{'entityType': 'Phone', 'value': '+31887083220'}


Generating Fact QAs: 100%|██████████| 200/200 [08:23<00:00,  2.52s/it]

{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}
{'entityType': 'Email', 'value': 'dietisten@zgt.nl'}





In [18]:

df.to_csv(f"{basepath}/output_full_fact_{fname}.csv")
df.to_csv(f"{basepath}/output_fact_{fname}.csv", columns = ['topic_id', 'filename', 'header', 'section_text', 'Fact', 'Question', 'Answer'])