# Evaluate application

> Define metrics and evaluate query models

## Example setup

Connect to the application and define a query model.

In [1]:
from vespa.application import Vespa
from vespa.query import QueryModel, RankProfile, OR

app = Vespa(url = "https://api.cord19.vespa.ai")
query_model = QueryModel(
    match_phase = OR(),
    rank_profile = RankProfile(name="bm25", list_features=True))

## Labeled data

Define some labeled data. `pyvespa` expects labeled data to follow the format illustrated below. It is a list of dict where each dict represents a query containing `query_id`, `query` and a list of `relevant_docs`. Each relevant document contain a required `id` key and an optional `score` key.

In [2]:
labeled_data = [
    {
        "query_id": 0, 
        "query": "Intrauterine virus infections and congenital heart disease",
        "relevant_docs": [{"id": 0, "score": 1}, {"id": 3, "score": 1}]
    },
    {
        "query_id": 1, 
        "query": "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus",
        "relevant_docs": [{"id": 1, "score": 1}, {"id": 5, "score": 1}]
    }
]

## Define metrics

In [3]:
from vespa.evaluation import MatchRatio, Recall, ReciprocalRank

eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)]

## Evaluate in batch

In [4]:
evaluation = app.evaluate(
    labeled_data = labeled_data,
    eval_metrics = eval_metrics, 
    query_model = query_model, 
    id_field = "id",
)
evaluation

Unnamed: 0,query_id,match_ratio_retrieved_docs,match_ratio_docs_available,match_ratio_value,recall_10_value,reciprocal_rank_10_value
0,0,251862,309201,0.814558,0.0,0
1,1,275957,309201,0.892484,0.0,0


## Evaluate specific query

> You can have finer control with the `evaluate_query` method.

In [5]:
from pandas import concat, DataFrame

evaluation = []
for query_data in labeled_data:
    query_evaluation = app.evaluate_query(
        eval_metrics = eval_metrics, 
        query_model = query_model, 
        query_id = query_data["query_id"], 
        query = query_data["query"], 
        id_field = "id",
        relevant_docs = query_data["relevant_docs"],
        default_score = 0
    )
    evaluation.append(query_evaluation)
evaluation = DataFrame.from_records(evaluation)
evaluation

Unnamed: 0,query_id,match_ratio_retrieved_docs,match_ratio_docs_available,match_ratio_value,recall_10_value,reciprocal_rank_10_value
0,0,251862,309201,0.814558,0.0,0
1,1,275957,309201,0.892484,0.0,0
