In [1]:
from trustifai import Trustifai, MetricContext

# Confidence for LLM response

_works for only those LLMs which supports logprobs_

In [2]:
trust_engine = Trustifai("config_file.yaml")

In [15]:
response = trust_engine.generate("Who is CEO of Microsoft?")

In [16]:
response

{'response': 'The CEO of Microsoft is Satya Nadella. He has held the position since February 2014.',
 'metadata': {'confidence_score': 0.93,
  'confidence_label': 'High Confidence',
  'confidence_details': {'explanation': 'Model is highly confident in its response based on logprobs.',
   'avg_logprob': -0.06,
   'variance': 0.02,
   'token_count': 21,
   'sentence_analysis': [{'text': 'The CEO of Microsoft is Satya Nadella.',
     'score': 0.94,
     'label': 'High Confidence',
     'token_count': 10},
    {'text': ' He has held the position since February 2014',
     'score': 0.95,
     'label': 'High Confidence',
     'token_count': 10}]},
  'logprobs_available': True}}

In [3]:
response = trust_engine.generate("what was the major means of transport in the year 2150? Why it got failed?", visualize=True)

In [4]:
response

{'response': 'There is no historical record for the year 2150, as it is in the future. If you are referring to a fictional work, such as a science fiction novel, movie, or series, please specify the context or title so I can provide an accurate answer.\n\nIf you meant a different year or are asking about predictions for transportation in the year 2150, let me know!',
 'metadata': {'confidence_score': 0.6,
  'confidence_label': 'Low Confidence',
  'confidence_details': {'explanation': 'Model is uncertain about its output.',
   'avg_logprob': -0.36,
   'variance': 0.14,
   'token_count': 78,
   'sentence_analysis': [{'text': 'There is no historical record for the year 2150, as it is in the future.',
     'score': 0.81,
     'label': 'Medium Confidence',
     'token_count': 19},
    {'text': ' If you are referring to a fictional work, such as a science fiction novel, movie, or series, please specify the context or title so I can provide an accurate answer',
     'score': 0.69,
     'label

In [None]:
from IPython.core.display import display, HTML

In [6]:
display(HTML(response["metadata"]['confidence_details']['html']))

# Trust Score for RAG response

In [None]:
from langchain_core.documents import Document

query = "What is Acme Corp's policy on remote work?"

answer = """Acme Corp operates on a fully remote-first policy with no office requirements. 
Employees can work from anywhere in the world and the company has closed all 
physical office locations as of January 2024. They provide a $2000 annual 
stipend for coworking spaces and have implemented a 4-day work week.
"""


documents = [
    Document(
        page_content="Acme Corp announced a hybrid work model in 2023, requiring employees to be in office 3 days per week.",
        metadata={"source": "hr_policy_2023.pdf"}
    ),
    Document(
        page_content="The company provides home office stipends of up to $500 for remote setup.",
        metadata={"source": "benefits_guide.pdf"}
    )
]
# documents = ["New Delhi is the capital of India. It houses the parliament.",
#              "The Eiffel Tower is the most famous monument in Paris, France.",
#              "India's capital city, New Delhi, is known for the Red Fort."]

#supports document objects like langchain/llamaindex documents, list, dictionary etc.

trust_engine = Trustifai("config_file.yaml")

metric_context = MetricContext(
    query=query,
    answer=answer,
    documents=documents,
)

In [19]:
trust_score = trust_engine.get_trust_score(metric_context)

In [20]:
trust_score

{'score': 0.53,
 'label': 'UNRELIABLE',
 'details': {'evidence_coverage': {'score': 0.0,
   'label': 'Likely Hallucinated Answer',
   'details': {'explanation': 'Many claims lack support from source documents.',
    'strategy': 'LLM',
    'total_sentences': 3,
    'supported_sentences': 0,
    'unsupported_sentences': ['Acme Corp operates on a fully remote-first policy with no office requirements.',
     'Employees can work from anywhere in the world and the company has closed all \nphysical office locations as of January 2024.',
     'They provide a $2000 annual \nstipend for coworking spaces and have implemented a 4-day work week.'],
    'failed_checks': 0}},
  'semantic_drift': {'score': 0.91,
   'label': 'Strong Alignment',
   'details': {'explanation': 'Answer semantically aligned with source documents.',
    'total_documents': 2}},
  'consistency': {'score': 0.86,
   'label': 'Stable Consistency',
   'details': {'explanation': 'Model produces highly consistent responses.',
    's

In [13]:
rg = trust_engine.build_reasoning_graph(trust_score)

In [15]:
trust_engine.visualize(rg)

<class 'pyvis.network.Network'> |N|=7 |E|=6

In [11]:
print(trust_engine.visualize(rg,"mermaid"))

```mermaid
flowchart TD
   evidence_coverage["<b>Evidence Coverage</b><br/>Score: 0.00<br/>Likely Hallucinated Answer"]
   semantic_drift["<b>Semantic Drift</b><br/>Score: 0.91<br/>Strong Alignment"]
   consistency["<b>Consistency</b><br/>Score: 0.80<br/>Fragile Consistency"]
   source_diversity["<b>Source Diversity</b><br/>Score: 0.85<br/>High Trust"]
   trust_aggregation{"<b>Trust Score</b><br/>Score: 0.52"}
   final_decision("<b>Decision: UNRELIABLE</b>")
    evidence_coverage --> trust_aggregation
    semantic_drift --> trust_aggregation
    consistency --> trust_aggregation
    source_diversity --> trust_aggregation
    trust_aggregation --> final_decision
    style evidence_coverage fill:#ff6b6b,color:#000000
    style semantic_drift fill:#2ecc71,color:#000000
    style consistency fill:#f39c12,color:#000000
    style source_diversity fill:#2ecc71,color:#000000
    style trust_aggregation fill:#ff6b6b,color:#000000
    style final_decision fill:#ff6b6b,color:#000000
```
