# This notebook shows the different ways to create `Qrels` and `Run` provided by `rank_eval`

In [1]:
from rank_eval import Qrels, Run

## 1. Query-by-Query (iterative)

In [2]:
# Create empty Qrels
qrels = Qrels()
# Add query to qrels
qrels.add(
    q_id="q_1",
    doc_ids=["doc_12", "doc_25"],
    scores=[5, 3],  # Only works with integers
)
# Add query to qrels
qrels.add(
    q_id="q_2",
    doc_ids=["doc_11", "doc_2"],
    scores=[6, 1],  # Only works with integers
)
print(qrels.qrels)

{q_1: {doc_12: 5, doc_25: 3}, q_2: {doc_11: 6, doc_2: 1}}


In [3]:
# Create empty Run
run = Run()
# Add query to run
run.add(
    q_id="q_1",
    doc_ids=["doc_12", "doc_23", "doc_25", "doc_36", "doc_32", "doc_35"],
    scores=[0.9, 0.8, 0.7, 0.6, 0.5, 0.4],  # Only works with floats
)
# Add query to run
run.add(
    q_id="q_2",
    doc_ids=["doc_12", "doc_11", "doc_25", "doc_36", "doc_2", "doc_35"],
    scores=[0.9, 0.8, 0.7, 0.6, 0.5, 0.4],  # Only works with floats
)
print(run.run)

{q_1: {doc_12: 0.9, doc_23: 0.8, doc_25: 0.7, doc_36: 0.6, doc_32: 0.5, doc_35: 0.4}, q_2: {doc_12: 0.9, doc_11: 0.8, doc_25: 0.7, doc_36: 0.6, doc_2: 0.5, doc_35: 0.4}}


## 2. Bulk import

In [4]:
# Create empty Qrels
qrels = Qrels()
# Add queries to qrels
qrels.add_multi(
    q_ids=["q_1", "q_2"],
    doc_ids=[
        ["doc_12", "doc_25"],  # q_1 relevant documents
        ["doc_11", "doc_2"],  # q_2 relevant documents
    ],
    scores=[
        [5, 3],  # q_1 relevance judgements
        [6, 1],  # q_2 relevance judgements
    ],
)
print(qrels.qrels)

{q_1: {doc_12: 5, doc_25: 3}, q_2: {doc_11: 6, doc_2: 1}}


In [5]:
# Create empty Run
run = Run()
# Add queries to run
run.add_multi(
    q_ids=["q_1", "q_2"],
    doc_ids=[
        # q_1 retrieved documents
        ["doc_12", "doc_23", "doc_25", "doc_36", "doc_32", "doc_35"],
        # q_2 retrieved documents
        ["doc_12", "doc_11", "doc_25", "doc_36", "doc_2", "doc_35"],
    ],
    scores=[
        [0.9, 0.8, 0.7, 0.6, 0.5, 0.4],  # q_1 retrieved document scores
        [0.9, 0.8, 0.7, 0.6, 0.5, 0.4],  # q_2 retrieved document scores
    ],
)
print(run.run)

{q_1: {doc_12: 0.9, doc_23: 0.8, doc_25: 0.7, doc_36: 0.6, doc_32: 0.5, doc_35: 0.4}, q_2: {doc_12: 0.9, doc_11: 0.8, doc_25: 0.7, doc_36: 0.6, doc_2: 0.5, doc_35: 0.4}}


## 3. From Python Dictionary

In [6]:
qrels_dict = {
    "q_1": {
        "doc_12": 5,
        "doc_25": 3,
    },
    "q_2": {
        "doc_11": 6,
        "doc_2": 1,
    },
}
qrels = Qrels.from_dict(qrels_dict)
print(qrels.qrels)

{q_1: {doc_12: 5, doc_25: 3}, q_2: {doc_11: 6, doc_2: 1}}


In [7]:
run_dict = {
    "q_1": {
        "doc_12": 0.9,
        "doc_23": 0.8,
        "doc_25": 0.7,
        "doc_36": 0.6,
        "doc_32": 0.5,
        "doc_35": 0.4,
    },
    "q_2": {
        "doc_12": 0.9,
        "doc_11": 0.8,
        "doc_25": 0.7,
        "doc_36": 0.6,
        "doc_2": 0.5,
        "doc_35": 0.4,
    },
}
run = Run.from_dict(run_dict)
print(run.run)

{q_1: {doc_12: 0.9, doc_23: 0.8, doc_25: 0.7, doc_36: 0.6, doc_32: 0.5, doc_35: 0.4}, q_2: {doc_12: 0.9, doc_11: 0.8, doc_25: 0.7, doc_36: 0.6, doc_2: 0.5, doc_35: 0.4}}


## 4. From Pandas DataFrame

In [8]:
from pandas import DataFrame

In [9]:
qrels_df = DataFrame.from_dict(
    {
        "q_id": [
            "q_1", "q_1",
            "q_2", "q_2",
        ],
        "doc_id": [
            "doc_12", "doc_25",
            "doc_11", "doc_2",
        ],
        "score": [
            5, 3,
            6, 1,
        ],
    }
)
qrels = Qrels.from_df(
    df=qrels_df,
    q_id_col="q_id",
    doc_id_col="doc_id",
    score_col="score",
)
print(qrels.qrels)

{q_1: {doc_12: 5, doc_25: 3}, q_2: {doc_11: 6, doc_2: 1}}


In [11]:
run_df = DataFrame.from_dict(
    {
        "q_id": [
            "q_1", "q_1", "q_1", "q_1", "q_1", "q_1",
            "q_2", "q_2", "q_2", "q_2", "q_2", "q_2"
        ],
        "doc_id": [
            "doc_12", "doc_23", "doc_25", "doc_36", "doc_32", "doc_35",
            "doc_12", "doc_11", "doc_25", "doc_36", "doc_2", "doc_35",
        ],
        "score": [
            0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 
            0.9, 0.8, 0.7, 0.6, 0.5, 0.4,
        ],
    }
)
run = Run.from_df(
    df=run_df,
    q_id_col="q_id",
    doc_id_col="doc_id",
    score_col="score",
)
print(run.run)

{q_1: {doc_12: 0.9, doc_23: 0.8, doc_25: 0.7, doc_36: 0.6, doc_32: 0.5, doc_35: 0.4}, q_2: {doc_12: 0.9, doc_11: 0.8, doc_25: 0.7, doc_36: 0.6, doc_2: 0.5, doc_35: 0.4}}


## 5. From TREC-Style file

In [18]:
qrels = Qrels.from_file("examples/data/create_qrels_and_run/qrels.txt")
print(qrels.qrels)

{q_1: {doc_12: 5, doc_25: 3}, q_2: {doc_11: 6, doc_2: 1}}


In [19]:
run = Run.from_file("examples/data/create_qrels_and_run/run.txt")
print(run.run)

{q_1: {doc_12: 0.9, doc_23: 0.8, doc_25: 0.7, doc_36: 0.6, doc_32: 0.5, doc_35: 0.4}, q_2: {doc_12: 0.9, doc_11: 0.8, doc_25: 0.7, doc_36: 0.6, doc_2: 0.5, doc_35: 0.4}}
