In [None]:
%pip install llama_index

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
import sys
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
import os
 
# get current directory
path = os.getcwd()
print("Current Directory", path)
 
# prints parent directory
print(os.path.abspath(os.path.join(path, os.pardir)))

In [None]:
OPENAI_API_KEY=os.environ['OPENAI_API_KEY']

In [None]:
COLLECTION_NAME=os.environ['COLLECTION_NAME']
DB_NAME=os.environ['DB_NAME']

In [None]:
from mongodb_service import mongodb_setup
db , mongo_client = mongodb_setup(DB_NAME=DB_NAME, COLLECTION_NAME=COLLECTION_NAME)

In [None]:
vector_store=MongoDBAtlasVectorSearch(mongodb_client=mongo_client)

In [None]:
sentence_index = VectorStoreIndex.from_vector_store(vector_store)

In [None]:
%pip install llama_index

In [None]:
%pip install torch sentence-transformers

In [None]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    top_n=2, model="BAAI/bge-reranker-base"
)
postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

In [None]:
%pip install llama-index-embeddings-openai
%pip install llama-index-embeddings-huggingface

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI


llm = OpenAI(model="gpt-4-turbo", temperature=0.1)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

Settings.llm = llm


sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6, node_postprocessors=[postproc, rerank],
     llm=llm,
)

In [None]:
from trulens_eval import Tru

tru = Tru()
tru.reset_database()

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
import trulens_eval

provider = trulens_eval.OpenAI()

Answer Relevance

In [None]:
from trulens_eval import Feedback

f_qa_relevance = Feedback(
    provider.relevance_with_cot_reasons, 
    name="Answer Relevance"
).on_input_output()

In [None]:
# context relevance 

from trulens_eval import TruLlama

context_selection = TruLlama.select_source_nodes().node.text


In [None]:
import numpy as np 

f_qs_relevance = (
    Feedback(provider.qs_relevance,
            name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean) 
    
)

In [None]:
import numpy as np

f_qs_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons,
             name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

In [None]:
from trulens_eval.feedback import Groundedness

grounded = Groundedness(groundedness_provider=provider)

In [None]:
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons,
             name="Groundedness"
            )
    .on(context_selection)
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

In [None]:
from trulens_eval import TruLlama
from trulens_eval import FeedbackMode

tru_recorder = TruLlama(
    sentence_window_engine,
    app_id="App_1",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ]
)

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)
        
eval_questions

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        sentence_window_engine.query(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
%pip install "ipython>=8.12.0" "ipywidgets>=8.0.6"

In [None]:
tru.run_dashboard()