In [1]:
import sys

sys.path.append("../")

In [2]:
from elasticsearch import Elasticsearch
from qdrant_client import QdrantClient
from cache.cache import Cache


qdrant_client = QdrantClient(host="localhost", port=6333)
es_client = Elasticsearch(
    hosts=["http://localhost:9200"],
)
cache = Cache()

In [3]:
from common.names import RERANKER_MODEL
from evaluation.ragas_evaulator import RAGASEvaluator
from vectorizer.hf_vectorizer import HFVectorizer

vectorizer = HFVectorizer("sdadas/mmlw-retrieval-roberta-large", cache)
ragas = RAGASEvaluator(
    RERANKER_MODEL, cache, "../../models/Bielik-11B-v2.2-Instruct-q4", vectorizer
)

  from tqdm.autonotebook import tqdm, trange
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
Vectorizer with model sdadas/mmlw-retrieval-roberta-large initialized


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/jakubkusiowski/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
from common.names import (
    OPENAI_EMBEDDING_MODEL_NAMES,
    PASSAGE_PREFIX_MAP,
    QUERY_PREFIX_MAP,
)
from repository.es_repository import ESRepository
from repository.qdrant_openai_repository import QdrantOpenAIRepository
from repository.qdrant_repository import QdrantRepository
from qdrant_client.models import Distance

from rerankers.hf_reranker import HFReranker
from retrievers.es_retriever import ESRetriever
from retrievers.hybrid_retriever import HybridRetriever
from retrievers.qdrant_retriever import QdrantRetriever
from retrievers.retriever import Retriever


def get_best_poquad_retriever() -> tuple[Retriever, str]:
    dataset_key = "clarin-pl-poquad-100000"
    es_index = "morfologik_index"
    qdrant_model = "intfloat/multilingual-e5-large"
    reranker_model = "sdadas/polish-reranker-large-ranknet"
    alpha = 0.5

    es_repository = ESRepository(es_client, es_index, cache)
    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.COSINE,
        cache,
        passage_prefix,
        query_prefix,
    )
    reranker = HFReranker(reranker_model, cache)

    retriever = HybridRetriever(
        es_repository, qdrant_repository, dataset_key, alpha, reranker
    )

    return (
        retriever,
        "morfologik_index-intfloat/multilingual-e5-large-Cosine-clarin-pl-poquad-100000-0.5-sdadas/polish-reranker-large-ranknet",
    )

In [5]:
from common.names import DATASET_SEED
from dataset.polqa_dataset_getter import PolqaDatasetGetter
from dataset.poquad_dataset_getter import PoquadDatasetGetter


poquad_dataset_getter = PoquadDatasetGetter()
polqa_dataset_getter = PolqaDatasetGetter()

poquad_dataset = poquad_dataset_getter.get_random_n_test(500, DATASET_SEED)[:100]
polqa_dataset = polqa_dataset_getter.get_random_n_test(500, DATASET_SEED)[:100]

In [6]:
def get_best_poquad_retriever() -> tuple[Retriever, str]:
    dataset_key = "clarin-pl-poquad-100000"
    es_index = "morfologik_index"
    qdrant_model = "intfloat/multilingual-e5-large"
    reranker_model = "sdadas/polish-reranker-large-ranknet"
    alpha = 0.5

    es_repository = ESRepository(es_client, es_index, cache)
    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.COSINE,
        cache,
        passage_prefix,
        query_prefix,
    )
    reranker = HFReranker(reranker_model, cache)

    retriever = HybridRetriever(
        es_repository, qdrant_repository, dataset_key, alpha, reranker
    )

    return (
        retriever,
        "morfologik_index-intfloat/multilingual-e5-large-Cosine-clarin-pl-poquad-100000-0.5-sdadas/polish-reranker-large-ranknet",
    )

In [7]:
retriever = get_best_poquad_retriever()[0]

Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized
Vectorizer with model sdadas/polish-reranker-large-ranknet initialized


In [8]:
from common.names import INST_MODEL_PATHS
from generators.instruction_generator import InstructionGenerator


generator = InstructionGenerator(INST_MODEL_PATHS[0], cache)


In [11]:
hal_scores = []
n = 5


for entry in poquad_dataset:    
    result = retriever.get_relevant_passages(entry.question)
    result.passages = result.passages[:n]
    answer = generator.generate_answer(entry.question, [passage for (passage, _) in result.passages])

    print(ragas.hallucination(result, answer))

0.7914844552675883
0.8376137057940166
0.8166127138667636
0.8467580530378553
0.8273213863372803
0.8301357507705688
0.85299488041136


KeyboardInterrupt: 

In [None]:
from common.dataset_entry import DatasetEntry

unique_questions = set()
repetetive_polqa_qestion_entries: list[DatasetEntry] = []

for entry in polqa_dataset:
    if entry.question in unique_questions:
        repetetive_polqa_qestion_entries.append(entry)
    else:
        unique_questions.add(entry.question)

In [None]:
print(len(unique_questions))
print(len(repetetive_polqa_qestion_entries))

100
0


In [None]:
from elasticsearch import Elasticsearch
from qdrant_client import QdrantClient
from cache.cache import Cache


qdrant_client = QdrantClient(host="localhost", port=6333)
es_client = Elasticsearch(
    hosts=["http://localhost:9200"],
)
cache = Cache()

In [None]:
from elasticsearch import ConflictError, NotFoundError
from common.names import DISTANCES, INDEX_NAMES, MODEL_NAMES, OPENAI_EMBEDDING_MODEL_NAMES
from common.utils import replace_slash_with_dash
from qdrant_client import models

for model in INDEX_NAMES:
    i = 0
    es_client.delete_by_query(
        index=model,
        body={"query": {"match_all": {}}},
        conflicts="proceed"  # Ignore version conflicts
    )
   

In [None]:
from common.names import QUERY_PREFIX_MAP
from repository.qdrant_repository import QdrantRepository
from qdrant_client.models import Distance

from retrievers.qdrant_retriever import QdrantRetriever


qdrant_repository = QdrantRepository.get_repository(
    qdrant_client,
    "intfloat/multilingual-e5-large",
    Distance.COSINE,
    cache,
    "",
    QUERY_PREFIX_MAP["intfloat/multilingual-e5-large"],
)

qdrant_retriever = QdrantRetriever(qdrant_repository, "ipipan-polqa-100000")

Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized


In [None]:
from typing import Dict
from evaluation.retriever_evaluator import PoquadRetrieverEvaluator

poquad_evaluator = PoquadRetrieverEvaluator()

def run_poquad_evaluation(dataset, repository, retriever, dataset_key):
    scores: Dict[str, float] = {}

    ndcgs = []
    mrrs = []
    recalls = []
    accuracies = []

    i = 0

    for entry in dataset:

        passage_id = entry.passage_id
        print(entry.passage_id)
        query = entry.question
        result = retriever.get_relevant_passages(query)
        relevant_passages_count = repository.count_relevant_documents(
            [passage_id], dataset_key
        )

        print(relevant_passages_count)

        if relevant_passages_count == 0:
            print("#\n#\n#\n#")
            print("ERROR NO RELEVANT PASSAGES")
            print("#\n#\n#\n#")
            break

        ndcg = poquad_evaluator.calculate_ndcg(result, passage_id)
        mrr = poquad_evaluator.calculate_mrr(result, passage_id)
        recall = poquad_evaluator.calculate_recall(
            result, passage_id, relevant_passages_count
        )
        accuracy = poquad_evaluator.calculate_accuracy(result, passage_id)

        ndcgs.append(ndcg)
        mrrs.append(mrr)
        recalls.append(recall)
        accuracies.append(accuracy)

        print(f"ndcg: {ndcg}, mrr: {mrr}, recall: {recall}, accuracy: {accuracy}")
        print(
            f"ndcg: {sum(ndcgs) / len(ndcgs)}, mrr: {sum(mrrs) / len(mrrs)}, recall: {sum(recalls) / len(recalls)}, accuracy: {sum(accuracies) / len(accuracies)}"
        )

    scores["ndcg"] = sum(ndcgs) / len(ndcgs)
    scores["mrr"] = sum(mrrs) / len(mrrs)
    scores["recall"] = sum(recalls) / len(recalls)
    scores["accuracy"] = sum(accuracies) / len(accuracies)

    return scores

In [None]:
run_poquad_evaluation(
    polqa_dataset, qdrant_repository, qdrant_retriever, "ipipan-polqa-100000"
)

86da700fb3dc64aea5580790e490a4a024084aca519f38334798e0bd8f9bc264
[FieldCondition(key='dataset_key', match=MatchValue(value='ipipan-polqa-100000'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None), FieldCondition(key='id', match=MatchAny(any=['86da700fb3dc64aea5580790e490a4a024084aca519f38334798e0bd8f9bc264']), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)]
count=1
1
ndcg: 0.5, mrr: 0.3333333333333333, recall: 1.0, accuracy: 0
ndcg: 0.5, mrr: 0.3333333333333333, recall: 1.0, accuracy: 0.0
5a2cd2f7fe9cfcdb285c48a000542304edbd34e0349d8dd937c6b79fb809745d
[FieldCondition(key='dataset_key', match=MatchValue(value='ipipan-polqa-100000'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None), FieldCondition(key='id', match=MatchAny(any=['5a2cd2f7fe9cfcdb285c48a000542304edbd34e0349d8dd937c6b79fb809745d']), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, va

{'ndcg': 0.6171980889708193,
 'mrr': 0.5051102872923049,
 'recall': 0.9692982456140351,
 'accuracy': 0.29239766081871343}