# Using 🍬 SuiteEval

# Setup

Set up a new environment if you're working locally! Otherwise you can just run all cells

In [None]:
%pip install suiteval pyterrier-pisa seaborn

In [None]:
from os.path import join
import re

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyterrier_pisa import PisaIndex

from suiteeval import DatasetContext, NanoBEIR

### Visualisation

In [None]:
def plot_optimal(results: pd.DataFrame):
    idx = results.groupby('dataset')['nDCG@10'].idxmax()
    best = results.loc[idx, ['dataset', 'k1', 'b', 'nDCG@10']].reset_index(drop=True)

    fig, ax = plt.subplots(figsize=(8, 6))
    sc = ax.scatter(best['k1'], best['b'], c=best['nDCG@10'], s=70)

    # Annotate with dataset names and nDCG@10
    for _, row in best.iterrows():
        ax.annotate(f"{row['dataset']} ({row['nDCG@10']:.3f})",
                    (row['k1'], row['b']),
                    xytext=(4, 4), textcoords='offset points')

    ax.set_xlabel('k1')
    ax.set_ylabel('b')
    ax.set_title('Best (k1, b) per dataset by nDCG@10')
    cbar = fig.colorbar(sc, ax=ax, label='nDCG@10')  # noqa: F841
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## A Simple Experiment

Let's do a sweep on NanoBEIR to see how different BM25 parameters perform under different domains and query geometries.

We will use PyTerrier-PISA to quickly index and execute many queries

### Setting up our Pipelines

🍬 SuiteEval requires one or more generator functions to create new indices and pipelines dynamically, our pipeline generator will index a corpus and then yeild a bm25 model for our parameter space.
 
We are going to explore values of k1 and b. 

In [None]:
K1_VALUES = np.arange(0.1, 3.0, 0.1)
B_VALUES = np.arange(0.0, 1.0, 0.1)

def bm25_sweep(context: DatasetContext):
    index = PisaIndex(join(context.path, "index.pisa")) # context.path is a temporary directory
    index.index(context.get_corpus_iter()) # get_corpus_iter gives us a pyterrier compatible generator of {docno, text} records

    for K_1 in K1_VALUES:
        for B in B_VALUES:
            yield index.bm25(k_1=K_1, b=B), "BM25(k1={:.1f}, b={:.1f})".format(K_1, B) # When we yield a model and a name, this name is passed to PyTerrier

Now we can execute our pipeline. Under the hood we are creating indexes and models dynamically and running benchmarks across all datasets.

In [None]:
results = NanoBEIR(bm25_sweep)

Let's parse our values from the model names.

In [None]:
name_pattern = re.compile(r'BM25\(k1=([0-9.]+), b=[0-9.]+\)')

results['k1'] = results['model'].apply(lambda x: float(name_pattern.match(x).group(1)))
results['b'] = results['model'].apply(lambda x: float(re.search(r'b=([0-9.]+)\)', x).group(1)))

## Exploring our Results

Let's first see optimal values across all datasets

In [None]:
plot_optimal(results)

Now let's choose a few datasets which have highly variable document lengths to see how the parameter b affects nDCG.