### Install the required packages

In [1]:
!pip install trulens-eval

Collecting langchain-community<0.1,>=0.0.38 (from langchain>=0.1.14->trulens-eval)
  Using cached langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)
Using cached langchain_community-0.0.38-py3-none-any.whl (2.0 MB)
Installing collected packages: langchain-community
  Attempting uninstall: langchain-community
    Found existing installation: langchain-community 0.0.37
    Uninstalling langchain-community-0.0.37:
      Successfully uninstalled langchain-community-0.0.37
Successfully installed langchain-community-0.0.38


In [2]:
!pip install python-dotenv



In [None]:
!pip install openai

In [None]:
!pip install numpy

In [14]:
import os
from dotenv import load_dotenv

In [22]:
import numpy as np

### Load TruLens Library Modules

In [15]:
from trulens_eval import Tru
from trulens_eval.tru_custom_app import instrument
from trulens_eval import Feedback, Select
from trulens_eval.feedback import Groundedness

### Load .env

In [16]:
# Load from environment
load_dotenv('.env', override=True)
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

### Load FAISS Vector Database

In [10]:
DB_FAISS_PATH = '../vectorstore/db_faiss'

In [54]:
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain_community.chat_models import ChatOllama
from langchain.retrievers import ContextualCompressionRetriever

In [52]:
llm = ChatOllama(model="llama2")

In [6]:
def load_embedding_model():
    return HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1", model_kwargs={'device': 'cpu'})

In [7]:
embedding_model = load_embedding_model()



In [37]:
# Load the FAISS vector store
db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
faiss_retriever = db.as_retriever(search_kwargs={'k': 5})

### Ensemble Retriever

In [33]:
import pickle


def load_bm25_retriever():
    with open('../bm25_retriever.pkl', 'rb') as f:
        bm25_retriever = pickle.load(f)
    return bm25_retriever

In [35]:
bm25_retriever = load_bm25_retriever()

In [36]:
# Create an ensemble retriever with the BM25 and FAISS retrievers
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])

### Contextual Compression

In [53]:
compressor = LLMChainExtractor.from_llm(llm)

In [62]:
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=faiss_retriever)

### Evaluating the base retrieval (FAISS Retriever) of the sysem using Truelens-eval

In [12]:
tru = Tru()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.


In [17]:
from openai import OpenAI
oai_client = OpenAI()

In [79]:
class BaseRetrieval:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = db.similarity_search(query, k=5)
        print(results)
        return results[0].page_content
    
    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        completion = oai_client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=
        [
            {"role": "user",
            "content": 
            f"We have provided context information below. \n"
            f"---------------------\n"
            f"{context_str}"
            f"\n---------------------\n"
            f"Given this information, please answer the question: {query}"
            }
        ]
        ).choices[0].message.content
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion

In [80]:
rag = BaseRetrieval()

In [20]:
from trulens_eval.feedback.provider.openai import OpenAI

provider = OpenAI()
grounded = Groundedness(groundedness_provider=provider)

[nltk_data] Downloading package punkt to
[nltk_data]     /home/adeptschneiderthedev/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [23]:
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness")
    .on(Select.RecordCalls.retrieve.rets.collect())
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name = "Answer Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on_output()
)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons, name = "Context Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on(Select.RecordCalls.retrieve.rets.collect())
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.retrieve.rets.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.app.retrieve.args.query .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.app.retrieve.args.query .
✅ In Context Relevance, input context will be set to __record__.app.retrieve.rets.collect() .


### Construct the App

In [81]:
from trulens_eval import TruCustomApp
tru_rag = TruCustomApp(rag,
    app_id = 'Retrieval Pipeline Testing v1',
    feedbacks = [f_groundedness, f_answer_relevance, f_context_relevance])

In [82]:
queries = [
    "Can the Conference of the Parties of the WHO FCTC assist countries in securing financial resources for implementation?",
    "What should be the minimum size of health warnings and messages on tobacco products, and where should they be placed?",
    "I opened a company to produce sensors in Kuala Lumpur. Based on the law in the file, how should I register for sales tax, and what are my obligations?",
    "I opened a company to produce sensors in Kuala Lumpur. During product I paid sales tax on my inputs. Based on the law in the file, what are conditions to be eligible for a refund of the sales tax?",
    "What specific indicators and targets are outlined in Canada's Cybersecurity Strategy?",
    "What measures is the government of Canada taking in response to data security challenges posed by the emergence of novel technologies?",
    "What are the API requirements that apply to the Consent building block?",
    "What additional building blocks are essential to support the functionality of the consent building block?",
    "What are the key findings of the CyberPeace Institute's analysis of cyber threats affecting NGOs in International Geneva?",
    "What are the key lessons learnt from the case studies examined in the report?"
]

In [83]:
def tru_rag_retrieval_pipeline(query):
    with tru_rag as recording:
        rag.query(query)
    tru.get_leaderboard(app_ids=["Retrieval Pipeline Testing v1"])

In [87]:
for query in queries:
    tru_rag_retrieval_pipeline(query)

In [42]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.0.105:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

### Evaluating the ensemble retrieval system (BM25Retriever + FAISS Retriever) sysem using Truelens-eval

In [40]:
class EnsembleRetrieval:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = ensemble_retriever.get_relevant_documents(query)
        return results[0].page_content
    
    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        completion = oai_client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=
        [
            {"role": "user",
            "content": 
            f"We have provided context information below. \n"
            f"---------------------\n"
            f"{context_str}"
            f"\n---------------------\n"
            f"Given this information, please answer the question: {query}"
            }
        ]
        ).choices[0].message.content
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion

In [41]:
ensemble_retrieval_rag = EnsembleRetrieval()

In [44]:
from trulens_eval import TruCustomApp
ensemble_retrieval_tru_rag = TruCustomApp(ensemble_retrieval_rag,
    app_id = 'Retrieval Pipeline Testing v2 (Ensemble Retrieval)',
    feedbacks = [f_groundedness, f_answer_relevance, f_context_relevance])

In [47]:
def tru_rag_ensemble_retrieval_pipeline(query):
    with ensemble_retrieval_tru_rag as recording:
        ensemble_retrieval_rag.query(query)
    tru.get_leaderboard(app_ids=["Retrieval Pipeline Testing v1"])

In [48]:
for query in queries:
    tru_rag_ensemble_retrieval_pipeline(query)

  warn_deprecated(


Groundedness per statement in source:   0%|          | 0/5 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/21 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/18 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/15 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/20 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/15 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/22 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/6 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/7 [00:00<?, ?it/s]

In [50]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.0.105:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

### Evaluating the Contextual Retrieval sysem using Truelens-eval

In [63]:
class ContextualCompressionRetrieval:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = compression_retriever.get_relevant_documents(query)
        return results[0].page_content
    
    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        completion = oai_client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=
        [
            {"role": "user",
            "content": 
            f"We have provided context information below. \n"
            f"---------------------\n"
            f"{context_str}"
            f"\n---------------------\n"
            f"Given this information, please answer the question: {query}"
            }
        ]
        ).choices[0].message.content
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion

In [65]:
contextual_compression_retrieval_rag = ContextualCompressionRetrieval()

In [66]:
from trulens_eval import TruCustomApp
contextual_compression_retrieval_tru_rag = TruCustomApp(contextual_compression_retrieval_rag,
    app_id = 'Retrieval Pipeline Testing v3 (Contextual Retrieval)',
    feedbacks = [f_groundedness, f_answer_relevance, f_context_relevance])

In [70]:
def tru_rag_contextual_compression_retrieval_pipeline(query):
    with contextual_compression_retrieval_tru_rag as recording:
        contextual_compression_retrieval_rag.query(query)
    tru.get_leaderboard(app_ids=["Retrieval Pipeline Testing v3 (Contextual Retrieval)"])

In [72]:
for query in queries:
    tru_rag_contextual_compression_retrieval_pipeline(query)

In [None]:
tru.run_dashboard()