In [1]:
import sys

sys.path.append("../")

In [3]:
from elasticsearch import Elasticsearch
from qdrant_client import QdrantClient
from cache.cache import Cache


qdrant_client = QdrantClient(host="localhost", port=6333)
es_client = Elasticsearch(
    hosts=["http://localhost:9200"],
)
cache = Cache()

In [2]:
from common.names import (
    OPENAI_EMBEDDING_MODEL_NAMES,
    PASSAGE_PREFIX_MAP,
    QUERY_PREFIX_MAP,
)
from repository.es_repository import ESRepository
from repository.qdrant_openai_repository import QdrantOpenAIRepository
from repository.qdrant_repository import QdrantRepository
from qdrant_client.models import Distance

from rerankers.hf_reranker import HFReranker
from retrievers.es_retriever import ESRetriever
from retrievers.hybrid_retriever import HybridRetriever
from retrievers.qdrant_retriever import QdrantRetriever
from retrievers.retriever import Retriever


def get_best_poquad_retriever() -> tuple[Retriever, str]:
    dataset_key = "clarin-pl-poquad-100000"
    es_index = "morfologik_index"
    qdrant_model = "intfloat/multilingual-e5-large"
    reranker_model = "sdadas/polish-reranker-large-ranknet"
    alpha = 0.5

    es_repository = ESRepository(es_client, es_index, cache)
    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.COSINE,
        cache,
        passage_prefix,
        query_prefix,
    )
    reranker = HFReranker(reranker_model, cache)

    retriever = HybridRetriever(
        es_repository, qdrant_repository, dataset_key, alpha, reranker
    )

    return (
        retriever,
        "morfologik_index-intfloat/multilingual-e5-large-Cosine-clarin-pl-poquad-100000-0.5-sdadas/polish-reranker-large-ranknet",
    )


def get_50p_poquad_retriever() -> tuple[Retriever, str]:
    dataset_key = "clarin-pl-poquad-1000"
    qdrant_model = "sdadas/mmlw-retrieval-roberta-large"

    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.EUCLID,
        cache,
        passage_prefix,
        query_prefix,
    )

    retriever = QdrantRetriever(qdrant_repository, dataset_key)

    return (
        retriever,
        "sdadas/mmlw-retrieval-roberta-large-Euclid-clarin-pl-poquad-1000",
    )


def get_worst_poquad_retriever() -> tuple[Retriever, str]:
    dataset_key = "clarin-pl-poquad-500"
    es_index = "basic_index"

    es_repository = ESRepository(es_client, es_index, cache)

    retriever = ESRetriever(es_repository, dataset_key)

    return (retriever, "basic_index-clarin-pl-poquad-500")


def get_best_poquad_openai_retriever() -> tuple[Retriever, str]:
    repository = QdrantOpenAIRepository.get_repository(
        qdrant_client, OPENAI_EMBEDDING_MODEL_NAMES[0], Distance.COSINE, cache
    )

    retriever = QdrantRetriever(repository, "clarin-pl-poquad-2000")

    return (retriever, "text-embedding-3-large-Cosine-clarin-pl-poquad-2000")


def get_worst_poquad_openai_retriever() -> tuple[Retriever, str]:
    repository = QdrantOpenAIRepository.get_repository(
        qdrant_client, OPENAI_EMBEDDING_MODEL_NAMES[0], Distance.COSINE, cache
    )

    retriever = QdrantRetriever(repository, "clarin-pl-poquad-500")

    return (retriever, "text-embedding-3-large-Cosine-clarin-pl-poquad-500")


def get_best_polqa_retriever() -> tuple[Retriever, str]:
    dataset_key = "ipipan-polqa-1000"
    es_index = "morfologik_index"
    qdrant_model = "sdadas/mmlw-retrieval-roberta-large"
    reranker_model = "sdadas/polish-reranker-large-ranknet"
    alpha = 0.75

    es_repository = ESRepository(es_client, es_index, cache)
    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.COSINE,
        cache,
        passage_prefix,
        query_prefix,
    )
    reranker = HFReranker(reranker_model, cache)

    retriever = HybridRetriever(
        es_repository, qdrant_repository, dataset_key, alpha, reranker
    )

    return (
        retriever,
        "morfologik_index-sdadas/mmlw-retrieval-roberta-large-Cosine-ipipan-polqa-1000-0.5-sdadas/polish-reranker-large-ranknet",
    )


def get_50p_polqa_retriever() -> tuple[Retriever, str]:
    dataset_key = "ipipan-polqa-1000"
    es_index = "morfologik_index"
    qdrant_model = "sdadas/mmlw-retrieval-roberta-large"
    alpha = 0.75

    es_repository = ESRepository(es_client, es_index, cache)
    passage_prefix = PASSAGE_PREFIX_MAP[qdrant_model]
    query_prefix = QUERY_PREFIX_MAP[qdrant_model]
    qdrant_repository = QdrantRepository.get_repository(
        qdrant_client,
        qdrant_model,
        Distance.COSINE,
        cache,
        passage_prefix,
        query_prefix,
    )

    retriever = HybridRetriever(es_repository, qdrant_repository, dataset_key, alpha)

    return (
        retriever,
        "morfologik_index-sdadas/mmlw-retrieval-roberta-large-Cosine-ipipan-polqa-1000-0.75",
    )


def get_worst_polqa_retriever() -> tuple[Retriever, str]:
    dataset_key = "ipipan-polqa-500"
    es_index = "basic_index"

    es_repository = ESRepository(es_client, es_index, cache)

    retriever = ESRetriever(es_repository, dataset_key)

    return (
        retriever,
        "basic_index-ipipan-polqa-500",
    )


def get_best_polqa_openai_retriever() -> tuple[Retriever, str]:
    repository = QdrantOpenAIRepository.get_repository(
        qdrant_client, OPENAI_EMBEDDING_MODEL_NAMES[0], Distance.EUCLID, cache
    )

    retriever = QdrantRetriever(repository, "ipipan-polqa-2000")

    return (retriever, "text-embedding-3-large-Euclid-ipipan-polqa-2000")


def get_worst_polqa_openai_retriever() -> tuple[Retriever, str]:
    repository = QdrantOpenAIRepository.get_repository(
        qdrant_client, OPENAI_EMBEDDING_MODEL_NAMES[0], Distance.COSINE, cache
    )

    retriever = QdrantRetriever(repository, "ipipan-polqa-500")

    return (retriever, "text-embedding-3-large-Cosine-ipipan-polqa-500")

In [4]:
poquad_retriever_functions = [
    get_best_poquad_retriever,
    get_50p_poquad_retriever,
    get_worst_poquad_retriever,
]

poquad_openai_retriever_functions = [
    get_best_poquad_openai_retriever,
    get_worst_poquad_openai_retriever,
]

polqa_retriever_functions = [
    get_best_polqa_retriever,
    get_50p_polqa_retriever,
    get_worst_polqa_retriever,
]

polqa_openai_retriever_functions = [
    get_best_polqa_openai_retriever,
    get_worst_polqa_openai_retriever,
]

In [5]:
from common.names import DATASET_SEED
from dataset.polqa_dataset_getter import PolqaDatasetGetter
from dataset.poquad_dataset_getter import PoquadDatasetGetter


poquad_dataset_getter = PoquadDatasetGetter()
polqa_dataset_getter = PolqaDatasetGetter()

poquad_dataset = poquad_dataset_getter.get_random_n_test(500, DATASET_SEED)[:100]
polqa_dataset = polqa_dataset_getter.get_random_n_test(500, DATASET_SEED)[:100]

In [6]:
ns = [1, 5]

In [11]:
def evaluate_retrievers_top_n(retriever_functions, dataset, ns):
    results = {}
    for get_retriever in retriever_functions:
        retriever, retriever_name = get_retriever()
        retriever_results = {}
        for n in ns:
            hits = []
            for item in dataset:
                question = item.question 
                correct_passage_id = item.passage_id 
                retriever_result = retriever.get_relevant_passages(question)
                passages = [passage for (passage, _) in retriever_result.passages]
                top_n_passages = passages[:n]
                retrieved_ids = [passage.id for passage in top_n_passages]
                hits.append(correct_passage_id in retrieved_ids)
            retriever_results[n] = hits
        results[retriever_name] = retriever_results
    return results

In [12]:
retriever_evaluation_results = {
    "poquad": evaluate_retrievers_top_n(poquad_retriever_functions, poquad_dataset, ns),
    "poquad_openai": evaluate_retrievers_top_n(poquad_openai_retriever_functions, poquad_dataset, ns),
    "polqa": evaluate_retrievers_top_n(polqa_retriever_functions, polqa_dataset, ns),
    "polqa_openai": evaluate_retrievers_top_n(polqa_openai_retriever_functions, polqa_dataset, ns),
}

Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized
Vectorizer with model sdadas/polish-reranker-large-ranknet initialized
Vectorizer with model sdadas/polish-reranker-large-ranknet initialized
Vectorizer with model sdadas/mmlw-retrieval-roberta-large initialized
Qdrant collection sdadas-mmlw-retrieval-roberta-large-Euclid repository initialized
Vectorizer with model sdadas/mmlw-retrieval-roberta-large initialized
Qdrant collection sdadas-mmlw-retrieval-roberta-large-Euclid repository initialized
Vectorizer with model text-embedding-3-large initialized
Qdrant openai collection text-embedding-3-large-Cosine repository initialized
Vectorizer with model text-embedding-3-large initialized
Qdrant openai collection text-embedding-3-large-Cosine repository initialized
Vectorizer with model text-embedding-3-large initialized
Qdrant openai collection text-embedding-3-large-Cosine repository initialized
V

In [13]:
# save results as csv
import csv


def save_retriever_scores_to_csv(results, filename):
    with open(filename, mode="w") as file:
        writer = csv.writer(file)
        writer.writerow(["dataset", "retriever", "n", "hit_rate"])
        
        for dataset_name, dataset_results in results.items():
            for retriever_name, retriever_results in dataset_results.items():
                for n, hits in retriever_results.items():
                    hit_rate = sum(hits) / len(hits)
                    writer.writerow(
                        [
                            dataset_name,
                            retriever_name,
                            str(n),
                            str(hit_rate).replace(".", ","),
                        ]
                    )

In [14]:
save_retriever_scores_to_csv(retriever_evaluation_results, "../../output/retriever_evaluation_scores.csv")

In [17]:
def generate_detailed_question_results(retriever_functions, dataset, ns, dataset_name):
    """Generate detailed results for each question showing which retrievers found the correct passage"""
    results = []
    
    # Create header with question info and all retriever-n combinations
    header = ["question_id", "question_text", "correct_passage_id"]
    retriever_columns = []
    
    for get_retriever in retriever_functions:
        _, retriever_name = get_retriever()
        for n in ns:
            column_name = f"{retriever_name}_n{n}"
            retriever_columns.append(column_name)
            header.append(column_name)
    
    results.append(header)
    
    # Pre-load all retrievers once
    retrievers = []
    for get_retriever in retriever_functions:
        print(f"Loading retriever...")
        retriever, retriever_name = get_retriever()
        retrievers.append((retriever, retriever_name))
        print(f"Loaded: {retriever_name}")
    
    # Process each question
    for i, item in enumerate(dataset):
        question_id = f"{dataset_name}_q{i+1}"
        question_text = item.question
        correct_passage_id = item.passage_id
        
        row = [question_id, question_text, correct_passage_id]
        
        # Test each pre-loaded retriever configuration
        for retriever, retriever_name in retrievers:
            # Get retrieval results once for this question
            retriever_result = retriever.get_relevant_passages(question_text)
            passages = [passage for (passage, _) in retriever_result.passages]
            
            for n in ns:
                top_n_passages = passages[:n]
                retrieved_ids = [passage.id for passage in top_n_passages]
                found = correct_passage_id in retrieved_ids
                row.append(str(found).upper())
        
        results.append(row)
        
        if (i + 1) % 10 == 0:
            print(f"Processed {i + 1} questions for {dataset_name}")
    
    return results


def save_detailed_results_to_csv(results, filename):
    """Save detailed question-level results to CSV"""
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        for row in results:
            writer.writerow(row)

In [19]:
# Generate detailed results for each dataset
print("Generating detailed question-level results...")

# PoQuAD dataset
print("Processing PoQuAD dataset...")
poquad_detailed = generate_detailed_question_results(
    poquad_retriever_functions, poquad_dataset, ns, "poquad"
)
save_detailed_results_to_csv(poquad_detailed, "../../output/poquad_detailed_results.csv")

# PoQuAD OpenAI dataset  
print("Processing PoQuAD OpenAI dataset...")
poquad_openai_detailed = generate_detailed_question_results(
    poquad_openai_retriever_functions, poquad_dataset, ns, "poquad_openai"
)
save_detailed_results_to_csv(poquad_openai_detailed, "../../output/poquad_openai_detailed_results.csv")

# PolQA dataset
print("Processing PolQA dataset...")
polqa_detailed = generate_detailed_question_results(
    polqa_retriever_functions, polqa_dataset, ns, "polqa"
)
save_detailed_results_to_csv(polqa_detailed, "../../output/polqa_detailed_results.csv")

# PolQA OpenAI dataset
print("Processing PolQA OpenAI dataset...")
polqa_openai_detailed = generate_detailed_question_results(
    polqa_openai_retriever_functions, polqa_dataset, ns, "polqa_openai"
)
save_detailed_results_to_csv(polqa_openai_detailed, "../../output/polqa_openai_detailed_results.csv")

print("All detailed results saved to CSV files!")

Generating detailed question-level results...
Processing PoQuAD dataset...
Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized
Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized


KeyboardInterrupt: 

In [None]:
# Import the generators and model names from notebook 02
from common.names import INST_MODEL_PATHS, QA_MODEL_NAMES
from generators.instruction_generator import InstructionGenerator
from generators.openai_generator import OpenAIGenerator
from generators.question_answering_generator import QuestionAnsweringGenerator


def generate_manual_evaluation_file(retriever, retriever_name, generator, generator_name, generator_type, dataset, dataset_name, n):
    """Generate a file for manual evaluation with questions, answers, and evaluation columns"""
    results = []
    
    # Add header with metadata
    metadata_header = [f"# RETRIEVER: {retriever_name}"]
    metadata_header.append(f"# GENERATOR: {generator_name}")
    metadata_header.append(f"# TYPE: {generator_type}")
    metadata_header.append(f"# DATASET: {dataset_name}")
    metadata_header.append(f"# TOP_N: {n}")
    metadata_header.append("")  # Empty line
    
    # Add CSV header
    header = ["question", "question_id", "hasCorrectPassages", "answer", "correct_answer", "result"]
    
    # Process each question
    for i, item in enumerate(dataset):
        question_id = f"{dataset_name}_q{i+1}"
        question_text = item.question
        correct_passage_id = item.passage_id
        correct_answers = item.answers  # Get the correct answers from the dataset
        
        # Get retrieval results
        retriever_result = retriever.get_relevant_passages(question_text)
        passages = [passage for (passage, _) in retriever_result.passages]
        top_n_passages = passages[:n]
        
        # Check if correct passage is retrieved
        retrieved_ids = [passage.id for passage in top_n_passages]
        has_correct_passages = str(correct_passage_id in retrieved_ids).upper()
        
        # Generate answer
        answer = generator.generate_answer(question_text, top_n_passages)
        
        # Format correct answers (join multiple answers with " | " if there are multiple)
        if isinstance(correct_answers, list):
            correct_answer_text = " | ".join(correct_answers)
        else:
            correct_answer_text = str(correct_answers)
        
        # Create row
        row = [question_text, question_id, has_correct_passages, answer, correct_answer_text, ""]  # Empty result column for manual evaluation
        results.append(row)
        
        if (i + 1) % 10 == 0:
            print(f"Processed {i + 1} questions for {generator_name} on {dataset_name}")
    
    return metadata_header, header, results


def save_manual_evaluation_file(metadata, header, results, filename):
    """Save manual evaluation file with metadata and CSV data"""
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        # Write metadata as comments
        for line in metadata:
            file.write(line + "\n")
        
        # Write CSV data
        writer = csv.writer(file)
        writer.writerow(header)
        for row in results:
            writer.writerow(row)


def create_safe_filename(retriever_name, generator_name, generator_type, dataset_name, n):
    """Create a safe filename from the configuration"""
    # Clean names for filename
    safe_retriever = retriever_name.replace("/", "_").replace("-", "_")[:50]  # Limit length
    safe_generator = generator_name.replace("/", "_").replace("-", "_")[:30]
    
    return f"manual_eval_{dataset_name}_{safe_retriever}_{safe_generator}_{generator_type}_n{n}.csv"

In [21]:
# Create output directory for manual evaluation files
import os
os.makedirs("../../output/manual_eval", exist_ok=True)

In [None]:
# Generate manual evaluation files for all combinations used in notebook 02
print("Generating manual evaluation files for all combinations...")

# We'll use n=5 for manual evaluation as it includes more context
manual_eval_n = 5

# PoQuAD dataset combinations
print("\n=== Processing PoQuAD dataset ===")
for retriever_func in poquad_retriever_functions:
    retriever, retriever_name = retriever_func()
    print(f"Processing retriever: {retriever_name}")
    
    # QA Models
    for qa_model_name in QA_MODEL_NAMES:
        print(f"  Generating with QA model: {qa_model_name}")
        generator = QuestionAnsweringGenerator(qa_model_name, cache)
        
        metadata, header, results = generate_manual_evaluation_file(
            retriever, retriever_name, generator, qa_model_name, "QA", 
            poquad_dataset, "poquad", manual_eval_n
        )
        
        filename = create_safe_filename(retriever_name, qa_model_name, "QA", "poquad", manual_eval_n)
        save_manual_evaluation_file(metadata, header, results, f"../../output/manual_eval/{filename}")
    
    # Instruction Models
    for inst_model_path in INST_MODEL_PATHS:
        print(f"  Generating with Instruction model: {inst_model_path}")
        generator = InstructionGenerator(inst_model_path, cache)
        
        metadata, header, results = generate_manual_evaluation_file(
            retriever, retriever_name, generator, inst_model_path, "INST", 
            poquad_dataset, "poquad", manual_eval_n
        )
        
        filename = create_safe_filename(retriever_name, inst_model_path, "INST", "poquad", manual_eval_n)
        save_manual_evaluation_file(metadata, header, results, f"../../output/manual_eval/{filename}")

# PoQuAD OpenAI combinations
print("\n=== Processing PoQuAD OpenAI dataset ===")
for retriever_func in poquad_openai_retriever_functions:
    retriever, retriever_name = retriever_func()
    print(f"Processing retriever: {retriever_name}")
    
    # OpenAI Generator
    print(f"  Generating with OpenAI model: gpt-4o-mini")
    generator = OpenAIGenerator(cache)
    
    metadata, header, results = generate_manual_evaluation_file(
        retriever, retriever_name, generator, "gpt-4o-mini", "INST", 
        poquad_dataset, "poquad_openai", manual_eval_n
    )
    
    filename = create_safe_filename(retriever_name, "gpt-4o-mini", "INST", "poquad_openai", manual_eval_n)
    save_manual_evaluation_file(metadata, header, results, f"../../output/manual_eval/{filename}")

# PolQA dataset combinations
print("\n=== Processing PolQA dataset ===")
for retriever_func in polqa_retriever_functions:
    retriever, retriever_name = retriever_func()
    print(f"Processing retriever: {retriever_name}")
    
    # QA Models
    for qa_model_name in QA_MODEL_NAMES:
        print(f"  Generating with QA model: {qa_model_name}")
        generator = QuestionAnsweringGenerator(qa_model_name, cache)
        
        metadata, header, results = generate_manual_evaluation_file(
            retriever, retriever_name, generator, qa_model_name, "QA", 
            polqa_dataset, "polqa", manual_eval_n
        )
        
        filename = create_safe_filename(retriever_name, qa_model_name, "QA", "polqa", manual_eval_n)
        save_manual_evaluation_file(metadata, header, results, f"../../output/manual_eval/{filename}")
    
    # Instruction Models
    for inst_model_path in INST_MODEL_PATHS:
        print(f"  Generating with Instruction model: {inst_model_path}")
        generator = InstructionGenerator(inst_model_path, cache)
        
        metadata, header, results = generate_manual_evaluation_file(
            retriever, retriever_name, generator, inst_model_path, "INST", 
            polqa_dataset, "polqa", manual_eval_n
        )
        
        filename = create_safe_filename(retriever_name, inst_model_path, "INST", "polqa", manual_eval_n)
        save_manual_evaluation_file(metadata, header, results, f"../../output/manual_eval/{filename}")

# PolQA OpenAI combinations
print("\n=== Processing PolQA OpenAI dataset ===")
for retriever_func in polqa_openai_retriever_functions:
    retriever, retriever_name = retriever_func()
    print(f"Processing retriever: {retriever_name}")
    
    # OpenAI Generator
    print(f"  Generating with OpenAI model: gpt-4o-mini")
    generator = OpenAIGenerator(cache)
    
    metadata, header, results = generate_manual_evaluation_file(
        retriever, retriever_name, generator, "gpt-4o-mini", "INST", 
        polqa_dataset, "polqa_openai", manual_eval_n
    )
    
    filename = create_safe_filename(retriever_name, "gpt-4o-mini", "INST", "polqa_openai", manual_eval_n)
    save_manual_evaluation_file(metadata, header, results, f"../../output/manual_eval/{filename}")

print("\n✅ All manual evaluation files generated successfully!")
print("Files saved in: ../../output/manual_eval/")
print("Each file contains:")
print("- Metadata header with retriever, generator, type, dataset, and n info")
print("- CSV with columns: question, question_id, hasCorrectPassages, answer, correct_answer, result")
print("- Empty 'result' column for your manual evaluation")

Generating manual evaluation files for all combinations...

=== Processing PoQuAD dataset ===
Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized
Vectorizer with model intfloat/multilingual-e5-large initialized
Qdrant collection intfloat-multilingual-e5-large-Cosine repository initialized
Vectorizer with model sdadas/polish-reranker-large-ranknet initialized
Processing retriever: morfologik_index-intfloat/multilingual-e5-large-Cosine-clarin-pl-poquad-100000-0.5-sdadas/polish-reranker-large-ranknet
  Generating with QA model: radlab/polish-qa-v2
Vectorizer with model sdadas/polish-reranker-large-ranknet initialized
Processing retriever: morfologik_index-intfloat/multilingual-e5-large-Cosine-clarin-pl-poquad-100000-0.5-sdadas/polish-reranker-large-ranknet
  Generating with QA model: radlab/polish-qa-v2


Device set to use mps


Processed 10 questions for radlab/polish-qa-v2 on poquad
Processed 20 questions for radlab/polish-qa-v2 on poquad
Processed 30 questions for radlab/polish-qa-v2 on poquad
Processed 40 questions for radlab/polish-qa-v2 on poquad
Processed 50 questions for radlab/polish-qa-v2 on poquad
Processed 60 questions for radlab/polish-qa-v2 on poquad
Processed 70 questions for radlab/polish-qa-v2 on poquad
Processed 80 questions for radlab/polish-qa-v2 on poquad
Processed 90 questions for radlab/polish-qa-v2 on poquad
Processed 100 questions for radlab/polish-qa-v2 on poquad
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2
Processed 80 questions for radlab/polish-qa-v2 on poquad
Processed 90 questions for radlab/polish-qa-v2 on poquad
Processed 100 questions for radlab/polish-qa-v2 on poquad
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2


Device set to use mps


Processed 10 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 20 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 30 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 40 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 50 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 60 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 70 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 80 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 90 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 100 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
  Generating with Instruction model: ../../models/Bielik-11B-v2.2-Instruct-q4
Processed 10 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on poquad
Processed 20 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on poquad
Processed 30 questions for ../../models/Bielik-11B-v2.2-Instruct-

Device set to use mps


Processed 10 questions for radlab/polish-qa-v2 on poquad
Processed 20 questions for radlab/polish-qa-v2 on poquad
Processed 30 questions for radlab/polish-qa-v2 on poquad
Processed 40 questions for radlab/polish-qa-v2 on poquad
Processed 50 questions for radlab/polish-qa-v2 on poquad
Processed 60 questions for radlab/polish-qa-v2 on poquad
Processed 70 questions for radlab/polish-qa-v2 on poquad
Processed 80 questions for radlab/polish-qa-v2 on poquad
Processed 90 questions for radlab/polish-qa-v2 on poquad
Processed 100 questions for radlab/polish-qa-v2 on poquad
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2


Device set to use mps


Processed 10 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 20 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 30 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 40 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 50 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 60 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 70 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 80 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 90 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 100 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
  Generating with Instruction model: ../../models/Bielik-11B-v2.2-Instruct-q4
Processed 10 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on poquad
Processed 20 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on poquad
Processed 30 questions for ../../models/Bielik-11B-v2.2-Instruct-

Device set to use mps


Processed 10 questions for radlab/polish-qa-v2 on poquad
Processed 20 questions for radlab/polish-qa-v2 on poquad
Processed 30 questions for radlab/polish-qa-v2 on poquad
Processed 40 questions for radlab/polish-qa-v2 on poquad
Processed 50 questions for radlab/polish-qa-v2 on poquad
Processed 60 questions for radlab/polish-qa-v2 on poquad
Processed 70 questions for radlab/polish-qa-v2 on poquad
Processed 80 questions for radlab/polish-qa-v2 on poquad
Processed 90 questions for radlab/polish-qa-v2 on poquad
Processed 100 questions for radlab/polish-qa-v2 on poquad
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2


Device set to use mps


Processed 10 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 20 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 30 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 40 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 50 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 60 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 70 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 80 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 90 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
Processed 100 questions for timpal0l/mdeberta-v3-base-squad2 on poquad
  Generating with Instruction model: ../../models/Bielik-11B-v2.2-Instruct-q4
Processed 10 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on poquad
Processed 20 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on poquad
Processed 30 questions for ../../models/Bielik-11B-v2.2-Instruct-

Device set to use mps


Processed 10 questions for radlab/polish-qa-v2 on polqa
Processed 20 questions for radlab/polish-qa-v2 on polqa
Processed 30 questions for radlab/polish-qa-v2 on polqa
Processed 40 questions for radlab/polish-qa-v2 on polqa
Processed 50 questions for radlab/polish-qa-v2 on polqa
Processed 60 questions for radlab/polish-qa-v2 on polqa
Processed 70 questions for radlab/polish-qa-v2 on polqa
Processed 80 questions for radlab/polish-qa-v2 on polqa
Processed 90 questions for radlab/polish-qa-v2 on polqa
Processed 100 questions for radlab/polish-qa-v2 on polqa
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2
Processed 80 questions for radlab/polish-qa-v2 on polqa
Processed 90 questions for radlab/polish-qa-v2 on polqa
Processed 100 questions for radlab/polish-qa-v2 on polqa
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2


Device set to use mps


Processed 10 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 20 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 30 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 40 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 50 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 60 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 70 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 80 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 90 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 100 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
  Generating with Instruction model: ../../models/Bielik-11B-v2.2-Instruct-q4
Processed 10 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa
Processed 20 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa
Processed 30 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa


Device set to use mps


Processed 10 questions for radlab/polish-qa-v2 on polqa
Processed 20 questions for radlab/polish-qa-v2 on polqa
Processed 30 questions for radlab/polish-qa-v2 on polqa
Processed 40 questions for radlab/polish-qa-v2 on polqa
Processed 50 questions for radlab/polish-qa-v2 on polqa
Processed 60 questions for radlab/polish-qa-v2 on polqa
Processed 70 questions for radlab/polish-qa-v2 on polqa
Processed 80 questions for radlab/polish-qa-v2 on polqa
Processed 90 questions for radlab/polish-qa-v2 on polqa
Processed 100 questions for radlab/polish-qa-v2 on polqa
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2


Device set to use mps


Processed 10 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 20 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 30 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 40 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 50 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 60 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 70 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 80 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 90 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 100 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
  Generating with Instruction model: ../../models/Bielik-11B-v2.2-Instruct-q4
Processed 10 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa
Processed 20 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa
Processed 30 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa


Device set to use mps


Processed 10 questions for radlab/polish-qa-v2 on polqa
Processed 20 questions for radlab/polish-qa-v2 on polqa
Processed 30 questions for radlab/polish-qa-v2 on polqa
Processed 40 questions for radlab/polish-qa-v2 on polqa
Processed 50 questions for radlab/polish-qa-v2 on polqa
Processed 60 questions for radlab/polish-qa-v2 on polqa
Processed 70 questions for radlab/polish-qa-v2 on polqa
Processed 40 questions for radlab/polish-qa-v2 on polqa
Processed 50 questions for radlab/polish-qa-v2 on polqa
Processed 60 questions for radlab/polish-qa-v2 on polqa
Processed 70 questions for radlab/polish-qa-v2 on polqa
Processed 80 questions for radlab/polish-qa-v2 on polqa
Processed 90 questions for radlab/polish-qa-v2 on polqa
Processed 100 questions for radlab/polish-qa-v2 on polqa
  Generating with QA model: timpal0l/mdeberta-v3-base-squad2
Processed 80 questions for radlab/polish-qa-v2 on polqa
Processed 90 questions for radlab/polish-qa-v2 on polqa
Processed 100 questions for radlab/polish-

Device set to use mps


Processed 10 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 20 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 30 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 40 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 50 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 60 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 70 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 80 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 90 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
Processed 100 questions for timpal0l/mdeberta-v3-base-squad2 on polqa
  Generating with Instruction model: ../../models/Bielik-11B-v2.2-Instruct-q4
Processed 10 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa
Processed 20 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa
Processed 30 questions for ../../models/Bielik-11B-v2.2-Instruct-q4 on polqa


KeyboardInterrupt: 