In [2]:
import os
import openai
import io
import pandas as pd
import llama_index 
from pypdf import PdfReader, PdfWriter

In [3]:
openai.api_key = os.environ["OPENAI_API_KEY"]

## Read context document

In [4]:
# !curl https://www.ipcc.ch/report/ar6/wg2/downloads/report/IPCC_AR6_WGII_Chapter03.pdf --output IPCC_AR6_WGII_Chapter03.pdf

In [5]:
def select_pdf_pages(file, pages):
    """Given a pdf file object and list of pages, return a new pdf (as a bytes-object) with only those pages"""
    pdf_reader = PdfReader(file)
    pdf_writer = PdfWriter()
    for page in pages:
        pdf_writer.add_page(pdf_reader.pages[page])

    buf = io.BytesIO()
    pdf_writer.write(buf)
    buf.seek(0)
    return buf

new_pdf = select_pdf_pages(
    open("IPCC_AR6_WGII_Chapter03.pdf", "rb"), [i for i in range(0, 30)]
)
with open("IPCC_AR6_WGII_Chapter03_subset.pdf", "wb") as f:
    f.write(new_pdf.getvalue())

In [6]:
# Initialize models for LlamaIndex
from llama_index.llms import OpenAI
from llama_index.embeddings import OpenAIEmbedding

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
embed_model = OpenAIEmbedding()

### Load data, split into chunks

In [7]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["IPCC_AR6_WGII_Chapter03_subset.pdf"]
).load_data()

# print(len(documents))

## Index-building functions

In [32]:
from llama_index import ServiceContext, VectorStoreIndex
from llama_index.node_parser import SentenceWindowNodeParser

def build_basic_index(documents, llm, embed_model=OpenAIEmbedding()):
    basic_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
    basic_index = VectorStoreIndex.from_documents(
        documents=documents, service_context=basic_context
    )
    return basic_index

def build_sentence_window_index(documents, llm, embed_model=OpenAIEmbedding()):
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original-text"
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser
    )
    sentence_index = VectorStoreIndex.from_documents(
        documents=documents, service_context=sentence_context
    )
    return sentence_index

### Configure TruLens Evaluation framework

In [None]:
from trulens_eval.feedback import Groundedness, GroundTruthAgreement
from trulens_eval import Tru
from trulens_eval import OpenAI, Feedback, TruLlama
import numpy as np

# Initialize the main entry-point to TruLens, Tru
tru = Tru()

openai_provider = OpenAI(model_engine="gpt-3.5-turbo-1106")

qa_df = pd.read_csv("ipcc_test_questions.csv")
qa_set = [{"query": item["Question"], "response": item["Answer"]} for index, item in qa_df.iterrows()]

# Initialize metrics to collect 

# Answer relevance
f_qa_relevance = Feedback(
    openai_provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()

# Context relevance
f_qs_relevance = Feedback(
    openai_provider.relevance_with_cot_reasons, name="Context Relevance"
).on_input().on(TruLlama.select_source_nodes().node.text).aggregate(np.mean)


# Groundedness
grounded = Groundedness(groundedness_provider=openai_provider)
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness")
    .on(TruLlama.select_source_nodes().node.text)
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Ground truth agreement 
f_groundtruth = Feedback(
    GroundTruthAgreement(qa_set).agreement_measure, name="Answer Correctness"
).on_input_output()


metrics = [f_qa_relevance, f_qs_relevance, f_groundedness, f_groundtruth]

def get_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        feedbacks=metrics,
        app_id=app_id
    )
    return tru_recorder



## Run Evals for Basic Index

In [23]:
basic_query_index = build_basic_index(documents=documents, llm=llm, embed_model=embed_model)
basic_query_engine = basic_query_index.as_query_engine()
basic_recorder = get_trulens_recorder(basic_query_engine, app_id="Basic Query Engine")


In [24]:
with basic_recorder as recording:
    for q in qa_set:
        basic_query_engine.query(q['query'])

In [28]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
# records.head(5)
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.0.24:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

## Run Evals for Sentence Window Index

In [33]:
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
sentence_window_index = build_sentence_window_index(documents=documents, llm=llm, embed_model=embed_model)

postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

sentence_query_engine = sentence_window_index.as_query_engine(
    node_postprocessors=[postproc]
)

sentence_recorder = get_trulens_recorder(
    sentence_query_engine, app_id="Sentence Window Query Engine"
)

In [34]:
with sentence_recorder as recording:
    for q in qa_set:
        sentence_query_engine.query(q['query'])

A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x1bb4c527610 is calling an instrumented method <function BaseQueryEngine.query at 0x000001BB1B3D6660>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x1bb22254610) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x1bb4c527610 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x000001BB1BEF7740>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x1bb22254610) using this function.
A new object of type <class 'llama_index.indices.vector_store.retrievers.retriever.VectorIndexRetriever'> at 0x1bb4c527190 is calling an instrumented method <function BaseRetriever.retrieve at 0x000001BB1E1319E0>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on ot

## Run Evals for your Custom RAG App