In [None]:
%pip install ../dist/alquimia_fair_forge-0.0.1.tar.gz -q

In [None]:
from fair_forge.metrics import Context
from pydantic import SecretStr
from helpers.retriever import LakeFSRetriever
from elasticsearch import Elasticsearch, helpers
import os

In [None]:
## LakeFS
lakefs_host= os.environ.get("lakefs_host")
lakefs_username= os.environ.get("lakefs_username")
lakefs_password= os.environ.get("lakefs_password")
lakefs_repository= os.environ.get("lakefs_repository")
lakefs_branch= os.environ.get("lakefs_branch")
lakefs_dataset= os.environ.get("lakefs_dataset")

In [3]:
JUDGE_API_KEY = SecretStr(os.environ.get("judge_api_key"))
ELASTIC_URL = os.environ.get('ELASTIC_URL')
ELASTIC_AUTH = [os.environ.get('ELASTIC_AUTH_USER'), os.environ.get('ELASTIC_AUTH_PASSWORD')]
RUN_NAME= os.environ.get("run_name")
index_name = f"{RUN_NAME}-bias"

In [None]:
metrics = Context.run(
    LakeFSRetriever,
    judge_api_key=JUDGE_API_KEY,
    lakefs_host=lakefs_host,
    lakefs_username=lakefs_username,
    lakefs_password=lakefs_password,
    lakefs_repository=lakefs_repository,
    lakefs_branch=lakefs_branch,
    lakefs_dataset=lakefs_dataset
)

In [None]:
es = Elasticsearch(
    ELASTIC_URL,
    basic_auth=tuple(ELASTIC_AUTH),
)

In [None]:
def recreate_index(index_name: str, mapping: dict):
    if es.indices.exists(index=index_name):
        es.indices.delete(index=index_name)
        print(f"Index '{index_name}' deleted.")
    es.indices.create(index=index_name, body=mapping)
    print(f"Index '{index_name}' created.")

In [None]:
mapping_contextualizer = {
  "mappings": {
    "properties": {
      "session_id": {"type": "keyword"},
      "context": {"type": "text"},
      "context_insight": {"type": "text"},
      "context_awareness": {"type": "float"},
      "context_thinkings": {"type": "text"},
      "qa_id": {"type": "keyword"},
      "assistant_id": {"type": "keyword"},
    }
  }
}

recreate_index(index_name, mapping_contextualizer)

In [None]:
docs = []
for metric in metrics:
    docs.append({
            "_index": index_name,
            "_source": metric.model_dump()
    })

helpers.bulk(es, docs)
print(f"Indexed {len(docs)} documents.")