### Defining the Dataset

In [1]:
documents = [
    {"text": "A cat is an animal.", "id": 1}, 
    {"text": "The city of new york is big.", "id": 2},
    {"text": "The city of chicago is big.", "id": 3},
    {"text": "The city of gentofte is big.", "id": 4},
    {"text": "The city of copenhagen is big.", "id": 5}
    ]

### Defining the Models

In [2]:
from models.builers.retriever import Retriever
from models.tfidf import TFIDF
from models.dpr import DPR
from models.dpr_crossencoder import DPRCrossencoder

models: dict[str: Retriever] = {
    "TF-IDF": TFIDF(documents=documents),
    "DPR": DPR(documents=documents),
    "Crossencoder": DPRCrossencoder(documents=documents, n=25),
}

  from .autonotebook import tqdm as notebook_tqdm


### Perform Experiment

In [5]:
from data.document import Document

query = "The city of new york might be cool but not as cool as a cat, cats are a pretty cool animal. Animals are not all cats."
k = 5

for model_type in models.keys():
    print(model_type)
    model: Retriever = models[model_type]
    result: list[Document] = model.Lookup(query=query, k=k)
    print([d.GetText() for d in result])

TF-IDF
['A cat is an animal.', 'The city of new york is big.', 'The city of chicago is big.', 'The city of gentofte is big.', 'The city of copenhagen is big.']
DPR
['The city of chicago is big.', 'The city of copenhagen is big.', 'The city of new york is big.', 'A cat is an animal.', 'The city of gentofte is big.']
Crossencoder
['The city of new york is big.', 'A cat is an animal.', 'The city of gentofte is big.', 'The city of copenhagen is big.', 'The city of chicago is big.']
