In [None]:
!pip install -r requirements.txt -q

In [None]:
import os
from helpers.dataset import load_dataset,Conversation
from helpers.fair_forge import FairForge
from elasticsearch import Elasticsearch, helpers
from helpers.guardian import Guardian
from pydantic import BaseModel

In [None]:
class BiasBatch(BaseModel):
    session_id: str
    risks: list
    qa_id: str
    assistant_id: str

In [None]:
ELASTIC_URL = os.environ.get('ELASTIC_URL')
ELASTIC_AUTH = [os.environ.get('ELASTIC_AUTH_USER'), os.environ.get('ELASTIC_AUTH_PASSWORD')]
GUARDIAN_URL = os.environ.get("GUARDIAN_URL")
GUARDIAN_MODEL_NAME = os.environ.get("GUARDIAN_MODEL_NAME")
GUARDIAN_API_KEY = os.environ.get("GUARDIAN_API_KEY")
GUARDIAN_MODEL = os.environ.get("GUARDIAN_MODEL")
dataset = os.environ.get("dataset", "asb")
bias_index = f"{dataset}-bias"
guardian_temperature = 0.01
max_tokens = 5

In [None]:
es = Elasticsearch(
    ELASTIC_URL,
    basic_auth=tuple(ELASTIC_AUTH),
)

In [None]:
def recreate_index(index_name: str, mapping: dict):
    if es.indices.exists(index=index_name):
        es.indices.delete(index=index_name)
        print(f"Index '{index_name}' deleted.")
    es.indices.create(index=index_name, body=mapping)
    print(f"Index '{index_name}' created.")

In [None]:
if GUARDIAN_URL is None or GUARDIAN_API_KEY is None or GUARDIAN_MODEL is None:
    raise Exception("Guardian variables are missing")

In [None]:
class BiasBatch(BaseModel):
    session_id: str
    risks: list
    qa_id: str
    assistant_id: str

In [None]:
class BiasAnalyzer(FairForge):
    def process(self, thread: Conversation):
        guardian = Guardian(GUARDIAN_URL, GUARDIAN_MODEL_NAME, GUARDIAN_API_KEY,
                            GUARDIAN_MODEL, guardian_temperature, max_tokens)
        for batch in thread.conversation:
            batch = BiasBatch(
                risks=guardian.detect(batch, thread.context),
                session_id=thread.session_id,
                qa_id=batch.qa_id,
                assistant_id=thread.assistant_id,
            )
            self.metrics.append(batch)

In [None]:
bias = BiasAnalyzer()
metrics = bias.pipeline()

In [None]:
def flatten_guard_metrics(metrics):
    flattened = []
    for metric in metrics:
        for risk in metric.risks:
            flattened.append(
                {
                    "session_id": metric.session_id,
                    "qa_id": metric.qa_id,
                    "assistant_id": metric.assistant_id,
                    'bias_guard_is_risk': risk.is_risk,
                    'bias_guard_type': risk.risk_type,
                    'bias_guard_probability': risk.probability
                }
            )
    return flattened
flattened = flatten_guard_metrics(metrics)

In [None]:
mapping_bias = {
  "mappings": {
    "properties": {
      "session_id": {"type": "keyword"},
      "bias_guard_is_risk": {"type": "boolean"},
      "bias_guard_type": {"type": "text"},
      "bias_guard_probability": {"type": "float"},
      "assistant_id": {"type": "keyword"},
      "qa_id": {"type": "keyword"},
    }
  }
}

In [None]:
recreate_index(bias_index, mapping_bias)

In [None]:
docs = []
for flatten in flattened:
    docs.append({
            "_index": bias_index,
            "_source": flatten
    })

helpers.bulk(es, docs)
print(f"Indexed {len(docs)} documents.")