# Test env

In [1]:

#imports
import os
import streamlit as st
from PIL import Image
from loguru import logger
# local imports
from ingest.ingester import Ingester
from query.querier import Querier
from summarize.summarizer import Summarizer
import settings
import utils as ut
from query.querier import EnumMode
from ingest.ingester import IngestionMode
from datetime import datetime
import kamervragenEvaluation



In [2]:
SYSTEM_PROMPT= """
### OBJECTIVE ###
Je bent een assistent voor de rijksoverheid. Jouw taak is om vragen te beantwoorden in het Nederlands. Zorg ervoor dat je alleen antwoord geeft op basis van de beschikbare context en dat je daar ook naar verwijst in je antwoord.

### AUDIENCE ###
De doelgroep van jouw antwoorden zijn ambtenaren. Geef alle relevante informatie uit de context, antwoord in het Nederlands leg in maximaal 100 woorden zoveel mogelijk uit.

### GUARDRAILS ###
Indien de context onvoldoende informatie bevat om de vraag te beantwoorden, verzin dan geen informatie maar geef aan dat er onvoldoende informatie beschikbaar is.

### INSTRUCTIONS ###
- Beantwoord de vraag altijd in het Nederlands, zelfs als de context in het Engels is gesteld.
- Vermijd het herhalen van de vraag in het antwoord en het herhalen van de instructies. Voer de instructies uit en geef een concreet antwoord op de gestelde vraag.
- Geef een stapsgewijze redenering bij het beantwoorden van de vraag en refereer naar specifieke zinnen uit de context die hebben bijgedragen aan het antwoord.
- Houd je antwoord nauw verbonden met de context en vermijd het toevoegen van informatie die niet expliciet in de context wordt vermeld.

- Voor meer informatie over de context, zeg het bestandsnaam die gevonden is in de source_document. Mits deze beschikbaar is.
### QUESTION ### \n
"""

In [3]:
# Commented the settings that arent used as parameters in the functions

# DOC_DIR = "./docs"
# CHUNK_DIR = "./chunks"
# VECDB_DIR = "./vector_stores"
# EVAL_DIR = "./evaluate"
# EVAL_APP_HEADER = "Evaluation"
# EVAL_APP_INFO = "./info/evaluation_explanation.txt"
# EVAL_FILE_NAME = "eval.json"
# CHAIN_VERBOSITY = False
LLM_TYPE = "local_llm"
LLM_MODEL_TYPE = "gemma2"
# API_URL = "http://127.0.0.1:11434"
AZUREOPENAI_API_VERSION = "2023-08-01-preview"
EMBEDDINGS_PROVIDER = "local_embeddings"
EMBEDDINGS_MODEL = "textgain/allnli-GroNLP-bert-base-dutch-cased"
TEXT_SPLITTER_METHOD = "NLTKTextSplitter"
# CHAIN_NAME = "conversationalretrievalchain"
# CHAIN_TYPE = "stuff"
# SEARCH_TYPE = "similarity"
# SCORE_THRESHOLD = 0.5
VECDB_TYPE = "chromadb"
CHUNK_SIZE = 1024
# CHUNK_K = 4
CHUNK_OVERLAP = 256
# RETRIEVAL_METHOD = "regular"


folderSelected = "kamerVragen"
my_folder_path_selected, my_vectordb_folder_path_selected = ut.create_vectordb_name(folderSelected)

CONCAT_FILES = True
question_sample_CSV = "question_sample.csv"
VALIDATIONLAPS = 10



In [4]:
SPLITTING_METHODS = [IngestionMode.question_answer,IngestionMode.token_small,IngestionMode.token_medium,IngestionMode.token_large]
CONTEXT_PRESENT= [True, False]
EMBEDDINGS_MODELS = ["GroNLP/bert-base-dutch-cased","textgain/allnli-GroNLP-bert-base-dutch-cased", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2","dunzhang/stella_en_400M_v5", "actualdata/jina-embeddings-v3"]

In [5]:
querier = None
ingester = None

def init(LLM_TYPE=LLM_TYPE, LLM_MODEL_TYPE=LLM_MODEL_TYPE, EMBEDDINGS_MODEL=EMBEDDINGS_MODEL, EMBEDDINGS_PROVIDER=EMBEDDINGS_PROVIDER, AZUREOPENAI_API_VERSION=AZUREOPENAI_API_VERSION, TEXT_SPLITTER_METHOD=TEXT_SPLITTER_METHOD, CHUNK_SIZE=CHUNK_SIZE, CHUNK_OVERLAP=CHUNK_OVERLAP, VECDB_TYPE=VECDB_TYPE, vectordb_folder= my_vectordb_folder_path_selected, content_folder=my_folder_path_selected):
  # Init
  querier = Querier(
    llm_type=LLM_TYPE, 
    llm_model_type=LLM_MODEL_TYPE, 
    embeddings_model=EMBEDDINGS_MODEL, 
    embeddings_provider=EMBEDDINGS_PROVIDER, 
    azureopenai_api_version=AZUREOPENAI_API_VERSION
    )

  ingester = Ingester(
    collection_name=folderSelected, 
    content_folder=content_folder, 
    vectordb_folder=vectordb_folder,
    embeddings_model=EMBEDDINGS_MODEL,
    text_splitter_method=TEXT_SPLITTER_METHOD,
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    vecdb_type=VECDB_TYPE
    )
  return [querier,ingester]
  
# querier,ingester = init()


In [6]:
def ingest(mode=IngestionMode.question_answer_per_page, forceRebuild=True, addedMetaDataURLCSV="docs/metadata.csv", addContext=True):
  """Ingest the documents in the folder"""
  ingester.ingest(mode=mode, forceRebuild=forceRebuild, addedMetaDataURLCSV=addedMetaDataURLCSV, addContext=addContext)

In [7]:
def chain(vectorDBPATH = my_vectordb_folder_path_selected):
  """Build the RAG Chain"""
  querier.make_chain(folderSelected, vectorDBPATH)

### Create sample questions

If no sample is provided the tests will not run.

In [8]:

querier,ingester = init(EMBEDDINGS_MODEL=EMBEDDINGS_MODEL, vectordb_folder=my_vectordb_folder_path_selected)
kamervragenEvaluation.create_evaluation_sample_questions(my_folder_path_selected,ingester=ingester, destinationCSV=question_sample_CSV)

  embeddings = HuggingFaceEmbeddings(
  from tqdm.autonotebook import tqdm, trange
[32m2024-10-03 15:37:30.443[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: textgain/allnli-GroNLP-bert-base-dutch-cased[0m
[32m2024-10-03 15:37:30.444[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:30.444[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:30.444[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:30.446[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m52[0m - [1mRetrieved gemma2[0m


CSV file already exists


## ingest and evaluate

Ingest the questions and evaluate the performance of the model on the test set.

In [9]:
from error import write_to_error_log


current_item = 0
total_items = VALIDATIONLAPS * len(SPLITTING_METHODS) * len(CONTEXT_PRESENT) * len(EMBEDDINGS_MODELS)
for embeddingModel in EMBEDDINGS_MODELS:
  for splittingMethod in SPLITTING_METHODS:
    for context in CONTEXT_PRESENT:
      for i in range(VALIDATIONLAPS):
        current_item += 1
        try:
          chunk_size = CHUNK_SIZE
          if splittingMethod == IngestionMode.token_small:
            chunk_size = 128
          elif splittingMethod == IngestionMode.token_medium:
            chunk_size = 512
          elif splittingMethod == IngestionMode.token_large:
            chunk_size = 1024
            
          # Setup
          my_folder_path_selected, my_vectordb_folder_path_selected = ut.create_vectordb_name(folderSelected, chunk_size=chunk_size, chunk_overlap=0, splitting_method=splittingMethod, embeddings_model=embeddingModel, added_context=context)
          querier,ingester = init(EMBEDDINGS_MODEL=embeddingModel, vectordb_folder=my_vectordb_folder_path_selected)
          # Ingestion
          ingest(mode=splittingMethod, addContext=context, addedMetaDataURLCSV="docs/metadata.csv")
          # RAG CHAIN
          chain(vectorDBPATH=my_vectordb_folder_path_selected)
          
          # Evaluation
          kamervragenEvaluation.evaluate_with_sample_questions(
            question_sample_CSV,querier=querier, 
            toCSV=True, 
            ingestionMode=splittingMethod, 
            addedMetaDataURLCSV="docs/metadata.csv", 
            addContext=context,
            embeddings_model=embeddingModel,
            text_splitter_method=TEXT_SPLITTER_METHOD,
            embeddings_provider=EMBEDDINGS_PROVIDER,
            database=VECDB_TYPE,
            concatFiles=CONCAT_FILES,
            )
        except Exception as e:
          print(e)
          # Write error to file
          write_to_error_log("error_log_test.txt", e, f"splittingMethod={splittingMethod}, context={context}, embeddingModel={embeddingModel}")
          continue
        
        
        print(f"done with {current_item} of {total_items}")

No sentence-transformers model found with name GroNLP/bert-base-dutch-cased. Creating a new one with mean pooling.
Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:31.284[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:31.284[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:31.284[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:31.284[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url h

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:33.257[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:33.257[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:33.257[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:33.257[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:33.258[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:34.832[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:34.832[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:34.833[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:34.833[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:34.833[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:36.525[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:36.526[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:36.526[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:36.526[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:36.527[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:37.945[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:37.946[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:37.946[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:37.946[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:37.946[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:39.794[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:39.795[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:39.795[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:39.796[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:39.796[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:41.409[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:41.410[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:41.410[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:41.410[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:41.410[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:42.837[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:42.838[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:42.838[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:42.838[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:42.838[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:44.805[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:44.806[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:44.806[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:44.806[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:44.806[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:47.064[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:47.065[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:47.065[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:47.066[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:47.068[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:48.782[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:48.783[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:48.783[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:48.783[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:48.783[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 11 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:54.795[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:54.797[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:54.798[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:54.798[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:54.799[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 12 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:37:59.063[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:37:59.063[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:37:59.063[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:37:59.064[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:37:59.064[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 13 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:02.817[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:02.818[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:02.818[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:02.818[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:02.819[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 14 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:07.339[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:07.340[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:07.340[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:07.341[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:07.342[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 15 of 400


No sentence-transformers model found with name GroNLP/bert-base-dutch-cased. Creating a new one with mean pooling.
Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:11.873[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:11.873[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:11.874[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:11.874[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url h

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 16 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:15.456[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:15.457[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:15.457[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:15.457[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:15.457[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 17 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:19.096[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:19.097[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:19.097[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:19.097[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:19.098[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 18 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:23.153[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:23.154[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:23.154[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:23.154[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:23.154[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 19 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:26.649[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:26.652[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:26.659[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:26.660[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:26.660[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 9
Total count: 30
Percentage: 30.0%
Average precision: 0.3
writing to csv
done writing to csv
done with 20 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:30.604[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:30.605[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:30.605[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:30.605[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:30.606[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:32.087[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:32.087[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:32.087[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:32.088[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:32.088[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:33.974[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:33.974[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:33.974[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:33.975[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:33.983[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:35.604[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:35.605[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:35.605[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:35.605[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:35.606[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:37.805[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:37.805[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:37.805[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:37.806[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:37.806[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:39.170[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:39.171[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:39.171[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:39.171[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:39.171[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:41.166[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:41.167[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:41.168[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:41.168[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:41.169[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:43.592[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:43.593[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:43.593[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:43.593[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:43.594[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:45.356[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:45.356[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:45.356[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:45.357[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:45.357[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:47.275[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:47.276[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:47.276[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:47.276[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:47.277[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

expected string or bytes-like object, got 'list'


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:38:48.764[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:38:48.765[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:38:48.765[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:38:48.765[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:38:48.765[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 28
Total count: 30
Percentage: 93.33333333333333%
Average precision: 0.9333333333333333
writing to csv
done writing to csv
done with 31 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:39:17.978[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:39:17.980[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:39:17.980[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:39:17.981[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:39:18.005[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

Correct count: 28
Total count: 30
Percentage: 93.33333333333333%
Average precision: 0.9333333333333333
writing to csv
done writing to csv
done with 32 of 400


Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-10-03 15:39:38.878[0m | [1mINFO    [0m | [36mutils[0m:[36mgetEmbeddings[0m:[36m126[0m - [1mLoaded local embeddings: GroNLP/bert-base-dutch-cased[0m
[32m2024-10-03 15:39:38.879[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m38[0m - [1mUse Local LLM[0m
[32m2024-10-03 15:39:38.879[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m39[0m - [1mRetrieving gemma2[0m
[32m2024-10-03 15:39:38.879[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m__init__[0m:[36m41[0m - [1mUsing local api url http://127.0.0.1:11434[0m
[32m2024-10-03 15:39:38.880[0m | [1mINFO    [0m | [36mllm_class.llm_class[0m:[36m_

### Test and evaluate larger models on the same data
The larger models selected are  "BAAI/bge-multilingual-gemma2", "Alibaba-NLP/gte-Qwen2-7B-instruct","Alibaba-NLP/gte-multilingual-base"

In [None]:
LongTimeEmbeddingsModels = ["BAAI/bge-multilingual-gemma2", "Alibaba-NLP/gte-Qwen2-7B-instruct","Alibaba-NLP/gte-multilingual-base"]

current_item = 0
total_items = VALIDATIONLAPS * len(SPLITTING_METHODS) * len(CONTEXT_PRESENT) * len(EMBEDDINGS_MODELS)
for embeddingModel in EMBEDDINGS_MODELS:
  for splittingMethod in SPLITTING_METHODS:
    for context in CONTEXT_PRESENT:
      for i in range(VALIDATIONLAPS):
        current_item += 1
        try:
          chunk_size = CHUNK_SIZE
          if splittingMethod == IngestionMode.token_small:
            chunk_size = 128
          elif splittingMethod == IngestionMode.token_medium:
            chunk_size = 512
          elif splittingMethod == IngestionMode.token_large:
            chunk_size = 1024
            
          # Setup
          my_folder_path_selected, my_vectordb_folder_path_selected = ut.create_vectordb_name(folderSelected, chunk_size=chunk_size, chunk_overlap=0, splitting_method=splittingMethod, embeddings_model=embeddingModel, added_context=context)
          querier,ingester = init(EMBEDDINGS_MODEL=embeddingModel, vectordb_folder=my_vectordb_folder_path_selected)
          # Ingestion
          ingest(mode=splittingMethod, addContext=context, addedMetaDataURLCSV="docs/metadata.csv")
          # RAG CHAIN
          chain(vectorDBPATH=my_vectordb_folder_path_selected)
          
          # Evaluation
          kamervragenEvaluation.evaluate_with_sample_questions(
            question_sample_CSV,querier=querier, 
            toCSV=True, 
            ingestionMode=splittingMethod, 
            addedMetaDataURLCSV="docs/metadata.csv", 
            addContext=context,
            embeddings_model=embeddingModel,
            text_splitter_method=TEXT_SPLITTER_METHOD,
            embeddings_provider=EMBEDDINGS_PROVIDER,
            database=VECDB_TYPE,
            concatFiles=CONCAT_FILES,
            )
        except Exception as e:
          print(e)
          # Write error to file
          write_to_error_log(e, f"splittingMethod={splittingMethod}, context={context}, embeddingModel={embeddingModel}")
          continue
        
        
        
        print(f"done with {current_item} of {total_items}")

# Re-test items

This uses the existing database that has already been ingested

In [None]:
current_item = 0
total_items = VALIDATIONLAPS * len(SPLITTING_METHODS) * len(CONTEXT_PRESENT) * len(EMBEDDINGS_MODELS)
for embeddingModel in EMBEDDINGS_MODELS:
  for splittingMethod in SPLITTING_METHODS:
    for context in CONTEXT_PRESENT:
        for i in range(VALIDATIONLAPS):
          current_item += 1
          try:
            chunk_size = CHUNK_SIZE
            if splittingMethod == IngestionMode.token_small:
              chunk_size = 128
            elif splittingMethod == IngestionMode.token_medium:
              chunk_size = 512
            elif splittingMethod == IngestionMode.token_large:
              chunk_size = 1024
              
            # Setup
            my_folder_path_selected, my_vectordb_folder_path_selected = ut.create_vectordb_name(folderSelected, chunk_size=chunk_size, chunk_overlap=0, splitting_method=splittingMethod, embeddings_model=embeddingModel, added_context=context)
            querier,ingester = init(EMBEDDINGS_MODEL=embeddingModel, vectordb_folder=my_vectordb_folder_path_selected)
            # RAG CHAIN
            chain(vectorDBPATH=my_vectordb_folder_path_selected)
            
            # Evaluation
            kamervragenEvaluation.evaluate_with_sample_questions(
              question_sample_CSV,querier=querier, 
              toCSV=True, 
              ingestionMode=splittingMethod, 
              addedMetaDataURLCSV="docs/metadata.csv", 
              addContext=context,
              embeddings_model=embeddingModel,
              text_splitter_method=TEXT_SPLITTER_METHOD,
              embeddings_provider=EMBEDDINGS_PROVIDER,
              database=VECDB_TYPE,
              concatFiles=CONCAT_FILES,
              )
          except Exception as e:
            print(e)
            # Write error to file
            write_to_error_log(e, f"splittingMethod={splittingMethod}, context={context}, embeddingModel={embeddingModel}")
            continue
          
          
          
          print(f"done with {current_item} of {total_items}")

## Manual input

In [18]:
chunk_size = 128
splittingMethod = IngestionMode.token_small
embeddingModel = "GroNLP/bert-base-dutch-cased"
context = True

In [None]:
my_folder_path_selected, my_vectordb_folder_path_selected = ut.create_vectordb_name(folderSelected, chunk_size=chunk_size, chunk_overlap=0, splitting_method=splittingMethod, embeddings_model=embeddingModel, added_context=context)
querier,ingester = init(EMBEDDINGS_MODEL=embeddingModel, vectordb_folder=my_vectordb_folder_path_selected)
chain(vectorDBPATH=my_vectordb_folder_path_selected)


In [None]:
querier.get_documents_with_scores("""Waar heeft u uw uitspraken in de Kamer op gebaseerd dat de kans reëel is
dat er in 2025 helemaal niet meer op tong gevist kan worden als je niet zou
kiezen voor 60 procent reductie van het tongquotum en dat Wageningen
Marine Research dit gezegd zou hebben?""")