In [1]:
import sys

sys.path.append("../")

In [2]:
from elasticsearch import Elasticsearch
from qdrant_client import QdrantClient
from cache.cache import Cache


qdrant_client = QdrantClient(host="localhost", port=6333)
es_client = Elasticsearch(
    hosts=["http://localhost:9200"],
)
cache = Cache()

  qdrant_client = QdrantClient(host="localhost", port=6333)


In [3]:
from common.names import RERANKER_MODEL
from evaluation.ragas_evaulator import RAGASEvaluator
from vectorizer.hf_vectorizer import HFVectorizer

vectorizer = HFVectorizer("sdadas/mmlw-retrieval-roberta-large", cache)
ragas = RAGASEvaluator(
    RERANKER_MODEL, cache, "../../models/PLLuM-12B-instruct-q4", vectorizer
)

Vectorizer with model sdadas/mmlw-retrieval-roberta-large initialized


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/jakubkusiowski/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
from common.names import (
    OPENAI_EMBEDDING_MODEL_NAMES,
    PASSAGE_PREFIX_MAP,
    QUERY_PREFIX_MAP,
)
from repository.es_repository import ESRepository
from repository.qdrant_openai_repository import QdrantOpenAIRepository
from repository.qdrant_repository import QdrantRepository
from qdrant_client.models import Distance

from rerankers.hf_reranker import HFReranker
from retrievers.es_retriever import ESRetriever
from retrievers.hybrid_retriever import HybridRetriever
from retrievers.qdrant_retriever import QdrantRetriever
from retrievers.retriever import Retriever


def get_best_poquad_retriever() -> tuple[Retriever, str]:
    dataset_key = "clarin-pl-poquad-100000"
    es_index = "morfologik_index"
    qdrant_model = "intfloat/multilingual-e5-large"
    reranker_model = "sdadas/polish-reranker-large-ranknet"
    alpha = 0.5

    es_repository = ESRepository(es_client, es_index, cache)
    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.COSINE,
        cache,
        passage_prefix,
        query_prefix,
    )
    reranker = HFReranker(reranker_model, cache)

    retriever = HybridRetriever(
        es_repository, qdrant_repository, dataset_key, alpha, reranker
    )

    return (
        retriever,
        "morfologik_index-intfloat/multilingual-e5-large-Cosine-clarin-pl-poquad-100000-0.5-sdadas/polish-reranker-large-ranknet",
    )

In [5]:
from common.names import DATASET_SEED
from dataset.polqa_dataset_getter import PolqaDatasetGetter
from dataset.poquad_dataset_getter import PoquadDatasetGetter


poquad_dataset_getter = PoquadDatasetGetter()
polqa_dataset_getter = PolqaDatasetGetter()

poquad_dataset = poquad_dataset_getter.get_random_n_test(500, DATASET_SEED)[:100]
polqa_dataset = polqa_dataset_getter.get_random_n_test(500, DATASET_SEED)[:100]

In [6]:
def get_best_poquad_retriever() -> tuple[Retriever, str]:
    dataset_key = "clarin-pl-poquad-100000"
    es_index = "morfologik_index"
    qdrant_model = "intfloat/multilingual-e5-large"
    reranker_model = "sdadas/polish-reranker-large-ranknet"
    alpha = 0.5

    es_repository = ESRepository(es_client, es_index, cache)
    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.COSINE,
        cache,
        passage_prefix,
        query_prefix,
    )
    reranker = HFReranker(reranker_model, cache)

    retriever = HybridRetriever(
        es_repository, qdrant_repository, dataset_key, alpha, reranker
    )

    return (
        retriever,
        "morfologik_index-intfloat/multilingual-e5-large-Cosine-clarin-pl-poquad-100000-0.5-sdadas/polish-reranker-large-ranknet",
    )

In [7]:
retriever = get_best_poquad_retriever()[0]

Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized
Vectorizer with model sdadas/polish-reranker-large-ranknet initialized


In [8]:
from common.names import INST_MODEL_PATHS
from generators.instruction_generator import InstructionGenerator


generator = InstructionGenerator(INST_MODEL_PATHS[2], cache)


In [9]:
hal_scores = []
n = 5


for entry in poquad_dataset:    
    result = retriever.get_relevant_passages(entry.question)
    result.passages = result.passages[:n]
    answer = generator.generate_answer(entry.question, [passage for (passage, _) in result.passages])

    print(ragas.hallucination(result, answer))

0.4601368308067322
0.4438478893703885
0.4763565182685852
0.4587802052497864
0.45490841600630016
0.8104492955737643
0.837481395403544
0.8668285886446635
0.46236984332402553
0.46544650660620795
0.4430265784263611
0.7744162744945949
0.8389997508790757
0.8638077603446113
0.4741881489753723
0.47250867419772674
0.44280656311247085
0.8847644554244147
0.7704950345887078
0.4457202778922187
0.882605336772071
0.45013348923789126
0.4548708928955925
0.868443931473626
0.44043015374077693
0.8271593477990892
0.47055681149164835
0.8161473764313593
0.8949701309204102
0.8842248267597622
0.4330896510018243
0.8582213971349928
0.4263785243034363
0.789284230603112
0.4637789501084222
0.45333315134048463
0.8223413427670797
0.5042341179317898
0.8248651888635422
0.4539534409840902
0.826536614365048
0.8719880024592083
0.8820703201823764
0.8492149617936876
0.44347844653659396
0.8340788496865166
0.8598861204253302
0.8565022548039755
0.421261465549469
0.9045980241563584
0.44942700465520224
0.894346695476108
0.448619

In [10]:
from common.dataset_entry import DatasetEntry

unique_questions = set()
repetetive_polqa_qestion_entries: list[DatasetEntry] = []

for entry in polqa_dataset:
    if entry.question in unique_questions:
        repetetive_polqa_qestion_entries.append(entry)
    else:
        unique_questions.add(entry.question)

In [11]:
print(len(unique_questions))
print(len(repetetive_polqa_qestion_entries))

100
0


In [12]:
from elasticsearch import Elasticsearch
from qdrant_client import QdrantClient
from cache.cache import Cache


qdrant_client = QdrantClient(host="localhost", port=6333)
es_client = Elasticsearch(
    hosts=["http://localhost:9200"],
)
cache = Cache()

  qdrant_client = QdrantClient(host="localhost", port=6333)


In [13]:
from elasticsearch import ConflictError, NotFoundError
from common.names import DISTANCES, INDEX_NAMES, MODEL_NAMES, OPENAI_EMBEDDING_MODEL_NAMES
from common.utils import replace_slash_with_dash
from qdrant_client import models

for model in INDEX_NAMES:
    i = 0
    es_client.delete_by_query(
        index=model,
        body={"query": {"match_all": {}}},
        conflicts="proceed"  # Ignore version conflicts
    )
   

In [14]:
from common.names import QUERY_PREFIX_MAP
from repository.qdrant_repository import QdrantRepository
from qdrant_client.models import Distance

from retrievers.qdrant_retriever import QdrantRetriever


qdrant_repository = QdrantRepository.get_repository(
    qdrant_client,
    "intfloat/multilingual-e5-large",
    Distance.COSINE,
    cache,
    "",
    QUERY_PREFIX_MAP["intfloat/multilingual-e5-large"],
)

qdrant_retriever = QdrantRetriever(qdrant_repository, "ipipan-polqa-100000")

Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized


In [None]:
from typing import Dict
from repository.repository import Repository
from evaluation.retriever_evaluator import RetrieverEvaluator
retriever_evaluator = RetrieverEvaluator()

def run_polqa_evaluation(
    dataset: list[DatasetEntry],
    repository: Repository,
    retriever: Retriever,
    dataset_key: str,
):
    scores: Dict[str, float] = {}

    ndcgs = []
    mrrs = []
    recalls = []
    accuracies = []

    for entry in dataset:
        passage_id = entry.passage_id
        query = entry.question

        result = retriever.get_relevant_passages(query)
        relevant_passages_count = repository.count_relevant_documents(
            passage_id, dataset_key
        )

        if relevant_passages_count == 0:
            print(f"ERROR NO RELEVANT PASSAGES - passage id {passage_id}")
            break

        ndcg = retriever_evaluator.calculate_ndcg(result, passage_id)
        mrr = retriever_evaluator.calculate_mrr(result, passage_id)
        recall = retriever_evaluator.calculate_recall(
            result, passage_id, relevant_passages_count
        )
        accuracy = retriever_evaluator.calculate_accuracy(result, passage_id)
        if recall < 0:
            break
        if recall > 1:
            print(dataset_key, passage_id, recall, relevant_passages_count)
            for passage, _ in result.passages:
                print(passage)
            break

        ndcgs.append(ndcg)
        mrrs.append(mrr)
        recalls.append(recall)
        accuracies.append(accuracy)

    scores["ndcg"] = sum(ndcgs) / len(ndcgs)
    scores["mrr"] = sum(mrrs) / len(mrrs)
    scores["recall"] = sum(recalls) / len(recalls)
    scores["accuracy"] = sum(accuracies) / len(accuracies)

    return scores

In [19]:
run_poquad_evaluation(
    polqa_dataset, qdrant_repository, qdrant_retriever, "ipipan-polqa-100000"
)

NameError: name 'retriever_evaluator' is not defined