## TREC Evaluation

### Goal

- Use Trec tools to evaluate search results from a vector database.

### Prerequisites

- This evaluation example depends on the infra and data pipeline from the `azure_postgresql` directory of this repo
- In particular, this requires the Postgres implementation of a vector index of the product docs found in `data/text/product_docs_embeddings.json`

### Disclaimer

- The following code relies on data based on the [MS Marco dataset](https://microsoft.github.io/msmarco/). However, the evaluation dataset is entirely fabricated for use of running a TREC Evaluation. The schema representation is correct, but the data is meaningless and TREC Evaluation results are not meaningful. To evaluate your own vector db's, either use the MS Marco dataset or find a dataset in your domain that can properly evaluate the indexing of your embedding data.

In [82]:
import os
from dotenv import load_dotenv
import openai

load_dotenv()

pg_host  = os.getenv("POSTGRESQL_HOST")
if pg_host is None or pg_host == "":
    print("POSTGRESQL_HOST environment variable not set.")
    exit()

pg_user  = os.getenv("POSTGRESQL_USERNAME")
if pg_user is None or pg_user == "":
    print("POSTGRESQL_USERNAME environment variable not set.")
    exit()

pg_password  = os.getenv("POSTGRESQL_PASSWORD")
if pg_password is None or pg_password == "":
    print("POSTGRESQL_PASSWORD environment variable not set.")
    exit()

db_name  = os.getenv("POSTGRESQL_DATABASE")
if db_name is None or db_name == "":
    print("POSTGRESQL_DATABASE environment variable not set.")
    exit()

aoai_endpoint  = os.getenv("AOAI_ENDPOINT")
if aoai_endpoint is None or aoai_endpoint == "":
    print("AOAI_ENDPOINT environment variable not set.")
    exit()

aoai_api_version  = os.getenv("AOAI_API_VERSION")
if aoai_api_version is None or aoai_api_version == "":
    print("AOAI_API_VERSION environment variable not set.")
    exit()

aoai_embedding_deployed_model  = os.getenv("AOAI_EMBEDDING_DEPLOYED_MODEL")
if aoai_embedding_deployed_model is None or aoai_embedding_deployed_model == "":
    print("AOAI_EMBEDDING_DEPLOYED_MODEL environment variable not set.")
    exit()

azure_openai_key  = os.getenv("AZURE_OPENAI_KEY")
if azure_openai_key is None or azure_openai_key == "":
    print("AZURE_OPENAI_KEY environment variable not set.")
    exit()

text_table_name = 'text_sample'
doc_table_name = 'doc_sample'
image_table_name = 'image_sample'

openai.api_type = "azure"
openai.api_key = azure_openai_key
openai.api_base = aoai_endpoint
openai.api_version = aoai_api_version

postgresql_params = {
    "host": pg_host,
    "port": "5432", 
    "dbname": db_name,
    "user": pg_user,
    "password": pg_password
}

In [83]:
import pandas as pd

queries_list = [
  {
    "query": "Which Azure resources are relevant for web hosting services?",
    "qid": 1,
    "relevant_docs": [1, 87, 44],
    "doc_relevance": [2, 1, 1]
  },
  {
    "query": "Which Azure resources are relevant for NoSQL databases?",
    "qid": 2,
    "relevant_docs": [6, 70, 52],
    "doc_relevance": [2, 1, 1]
  },
  {
    "query": "Which Azure resources are relevant for Security Analysis?",
    "qid": 2,
    "relevant_docs": [60, 34, 43],
    "doc_relevance": [1, 1, 1]
  }
]

qrel_df = pd.DataFrame(queries_list)

qrel_df = qrel_df[["qid", "relevant_docs", "doc_relevance"]].explode(["relevant_docs", "doc_relevance"]).reset_index(drop=True)

qrel_df["q0"] = "q0"

qrel_df.columns = ["query", "docid", "rel", "q0"]

qrel_df


Unnamed: 0,query,docid,rel,q0
0,1,1,2,q0
1,1,87,1,q0
2,1,44,1,q0
3,2,6,2,q0
4,2,70,1,q0
5,2,52,1,q0
6,2,60,1,q0
7,2,34,1,q0
8,2,43,1,q0


In [84]:
from psycopg2 import connect
from openai import AzureOpenAI
import pandas as pd

azure_oai_client = AzureOpenAI(
  api_key = azure_openai_key,  
  api_version = aoai_api_version,
  azure_endpoint = aoai_endpoint
)

def get_query_result_df(query):

  query_vector = azure_oai_client.embeddings.create(input = [query], model=aoai_embedding_deployed_model).data[0].embedding

  with connect(**postgresql_params) as connection:

      query_sql = f"""
      SELECT 
        id as docid
        ,'q0' as q0
        ,(content_vector <=> '{query_vector}') AS score
        ,rank() over (order by (content_vector <=> '{query_vector}') asc) as rank
        ,'pgvector' as system
      FROM {text_table_name}
      ORDER BY ((content_vector <=> '{query_vector}')) 
      LIMIT 10;"""

      df = pd.read_sql(query_sql,connection)

  return df

# query = 'Which Azure resources are relevant for web hosting services?'
# get_query_result_df(query)

dfs = []

for query in queries_list:

  temp = get_query_result_df(query.get("query"))

  temp["query"] = query.get("qid")
   
  dfs.append(temp)

run_df = pd.concat(dfs)
   
run_df

  df = pd.read_sql(query_sql,connection)
  df = pd.read_sql(query_sql,connection)
  df = pd.read_sql(query_sql,connection)


Unnamed: 0,docid,q0,score,rank,system,query
0,1,q0,0.149864,1,pgvector,1
1,87,q0,0.161002,2,pgvector,1
2,8,q0,0.171716,3,pgvector,1
3,98,q0,0.184495,4,pgvector,1
4,44,q0,0.187565,5,pgvector,1
5,88,q0,0.18776,6,pgvector,1
6,77,q0,0.188419,7,pgvector,1
7,43,q0,0.189274,8,pgvector,1
8,58,q0,0.189416,9,pgvector,1
9,56,q0,0.189592,10,pgvector,1


In [85]:
from evaluate import load

exact_match_metric = load("exact_match")
trec_eval = load("trec_eval")

## Evaluating Search Results from Text Data

### Understanding TREC data formats

- Trec tools have 3 main data formats
  - TrecRun, the rank of documents for a given query given by a system (search response data to be evaluated)
  - TrecQrel, the relevance of documents to a given query (ground truth)
  - TrecRes, the results of a Trec evaluation
- read more here: https://github.com/joaopalotti/trectools#file-formats

In [86]:
import pandas as pd

# Get TrecRun data
# run = pd.read_csv("./data/trec_run_data.csv")
run_df["query"] = run_df["query"].astype(str)
run_df["docid"] = run_df["docid"].astype(str)
run = run_df.to_dict(orient="list")

# Get TrecQrel data
# qrel = pd.read_csv("./data/trec_qrel_data.csv")
qrel_df["query"] = qrel_df["query"].astype(str)
qrel_df["docid"] = qrel_df["docid"].astype(str)
qrel = qrel_df.to_dict(orient="list")

results = trec_eval.compute(references=[qrel], predictions=[run])
print(f"Rprec: {results['Rprec']}")
print(f"recip_rank: {results['recip_rank']}")
print(f"map: {results['map']}")

Rprec: 0.0
recip_rank: 0.1388888888888889
map: 0.22089496617360393


  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
