In [1]:
import os
import json

from langchain_chroma import Chroma
from langchain_core.documents import Document

from src.model.model import MultiModalEmbeddingModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
THRESHOLD = 0.4
embedding_model = MultiModalEmbeddingModel("nomic-ai/nomic-embed-text-v1.5", "nomic-ai/nomic-embed-vision-v1.5")

<All keys matched successfully>
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


---

# Exact Match - Precision/Recall/F1

In [3]:
vector_store = Chroma(
    embedding_function=embedding_model,
    collection_name="nomic-embed-evaluate",
)

In [4]:
data_path = os.path.join(os.getcwd(), "dataset.json")

with open(data_path, "r") as f:
    data = json.load(f)

In [5]:
valid_chunks = [(k, elem) for k, elem in data.items() if elem["query"] != "" and elem["content"] != ""]
valid_chunks = dict(valid_chunks)

In [6]:
hashes = [k for k, _ in valid_chunks.items()]
documents = [Document(page_content=v["content"], metadata={"hash": k}) for k, v in valid_chunks.items()]

In [7]:
vector_store.add_documents(documents, ids=hashes)
print("Documents added to vector store.")

Documents added to vector store.


In [8]:
tp, fp, fn = 0, 0, 0
queries = [(k, v["query"]) for k, v in valid_chunks.items()]

for k, v in queries:
    results = vector_store.similarity_search_with_relevance_scores(v, k=1)
    
    if len(results) == 0:
        fn += 1
    else:
        doc = results[0][0]
        score = results[0][1]
        if doc.metadata["hash"] == k and score >= THRESHOLD:
            tp += 1
        else:
            fp += 1

In [9]:
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

print(f"TP: {tp}, FP: {fp}, FN: {fn}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")

TP: 24, FP: 134, FN: 0
Precision: 0.1519
Recall: 1.0000
F1 Score: 0.2637


---

# MRR - Mean Reciprocal Rank

In [10]:
del vector_store
vector_store = Chroma(
    embedding_function=embedding_model,
    collection_name="nomic-embed-evaluate",
)

In [11]:
vector_store.add_documents(documents, ids=hashes)
print("Documents added to vector store.")

Documents added to vector store.


In [12]:
ranks = []

for k, v in queries:
    results = vector_store.similarity_search_with_relevance_scores(v, k=4)
    results = sorted(results, key=lambda x: x[1], reverse=True)
    
    hashes = [doc.metadata["hash"] for doc, _ in results]
    rank = hashes.index(k) + 1 if k in hashes else 0
    rank = 1 / rank if rank != 0 else 0
    ranks.append(rank)

In [13]:
mrr = sum(ranks) / len(ranks)
print(f"Mean Reciprocal Rank (MRR): {mrr:.4f}")

Mean Reciprocal Rank (MRR): 0.2157
