# **Trustworthy Retrieval-Augmented Generation with the Trustworthy Language Model**

# **SetUp**

## Install necessary Package

In [None]:
%pip install -U cleanlab-studio llama-index llama-index-embeddings-huggingface

In [None]:
%pip install llama-index llama-index-readers-web

In [4]:
from cleanlab_studio import Studio

studio = Studio("cleanlab-studio-apikey")

# **Integrate TLM with LlamaIndex**

In [5]:
tlm = studio.TLM()

In [6]:
from typing import Any, Dict
import json

# Import LlamaIndex dependencies
from llama_index.core.base.llms.types import (
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from llama_index.core.llms.custom import CustomLLM
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader


class TLMWrapper(CustomLLM):
    context_window: int = 16000
    num_output: int = 256
    model_name: str = "TLM"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        # Prompt tlm for a response and trustworthiness score
        response: Dict[str, str] = tlm.prompt(prompt)
        output = json.dumps(response)
        return CompletionResponse(text=output)

    @llm_completion_callback()
    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
        # Prompt tlm for a response and trustworthiness score
        response = tlm.prompt(prompt)
        output = json.dumps(response)

        # Stream the output
        output_str = ""
        for token in output:
            output_str += token
            yield CompletionResponse(text=output_str, delta=token)

# **Build a RAG pipeline with TLM**

In [7]:
Settings.llm = TLMWrapper()

# **Specify Embedding Model**

In [None]:
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# **Load Data and Create Index + Query Engine**

In [11]:
from llama_index.core import SummaryIndex
from llama_index.readers.web import SimpleWebPageReader
from IPython.display import Markdown, display
import os

In [18]:
documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://paulgraham.com/worked.html"]
)

In [None]:
documents[0]

In [20]:
# documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)

In [21]:
query_engine = index.as_query_engine()

# **Answering queries with our RAG system**

In [23]:

# This method presents formatted responses from our TLM-based RAG pipeline. It parses the output to display both the response itself and the corresponding trustworthiness score.
def display_response(response):
    response_str = response.response
    output_dict = json.loads(response_str)
    print(f"Response: {output_dict['response']}")
    print(f"Trustworthiness score: {round(output_dict['trustworthiness_score'], 2)}")

# **Questions**

In [24]:
response = query_engine.query(
    "What did the author do growing up?"
)
display_response(response)

Response: Growing up, the author worked on writing and programming. They wrote short stories and tried programming on an IBM 1401 computer. They later got a microcomputer and started programming on it, writing simple games and a word processor.
Trustworthiness score: 0.91


In [26]:
response = query_engine.query(
    "Can I mirror an essay on my site??"
)
display_response(response)

Response: Yes, you can mirror an essay on your site. The author mentions that anyone can publish anything on the web, so you have the freedom to publish and share essays online.
Trustworthiness score: 0.89
