### Trulens Evaluation 
- [LangChain Quickstart](https://www.trulens.org/getting_started/quickstarts/langchain_quickstart/#create-rag) 
- [Text to Text](https://www.trulens.org/getting_started/quickstarts/text2text_quickstart/)
- [Feedback Function Implementations](https://www.trulens.org/component_guides/evaluation/feedback_implementations/)  
    - [Classification-based](https://www.trulens.org/component_guides/evaluation/feedback_implementations/stock/)  
        - [context_relevance_with_cot_reasons](https://www.trulens.org/reference/trulens/providers/openai/#trulens.providers.openai.OpenAI.context_relevance_with_cot_reasons)  
        - [context_relevance](https://www.trulens.org/reference/trulens/providers/langchain/provider/#trulens.providers.langchain.provider.Langchain.context_relevance)  

In [9]:
from openai import OpenAI
from trulens.core import Feedback, TruSession
from trulens.providers.openai import OpenAI as fOpenAI
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from typing import List
import json
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables and initialize clients
load_dotenv()
OpenAI_key = os.getenv("OPENAI_API_KEY")  

client = OpenAI()
session = TruSession()
session.reset_database()

Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


In [2]:
prompt_path = "prompts/classification_prompts/"

def read_prompt(prompt_path):
    with open(prompt_path, 'r') as file:
        return file.read()

system_input_stage_1 = read_prompt(prompt_path + "classification_system_stage_1.txt")
human_input_stage_1 = read_prompt(prompt_path + "classification_human_stage_1.txt")
system_input_stage_2 = read_prompt(prompt_path + "classification_system_stage_2.txt")
human_input_stage_2 = read_prompt(prompt_path + "classification_human_stage_2.txt")
system_input_stage_3 = read_prompt(prompt_path + "classification_system_stage_3.txt")
human_input_stage_3 = read_prompt(prompt_path + "classification_human_stage_3.txt")

In [3]:
class answer_format(BaseModel):
    answer : str = Field(description="answer to the question")

llm = ChatOpenAI(model="gpt-4o", api_key=OpenAI_key, temperature=0, max_tokens=16384) # gpt-40 max tokens = 16384 

parser = PydanticOutputParser(pydantic_object=answer_format)
format_instructions = parser.get_format_instructions()


def llm_standalone(prompt):
    system_input_stage_1 = read_prompt(prompt_path + "classification_system_stage_1.txt")
    df = pd.read_csv('company_data.csv')

    stage_1_prompt = ChatPromptTemplate.from_messages([
        ("system", system_input_stage_1),
        ("user", prompt)
    ])

    stage_1_chain = stage_1_prompt | llm | parser    
    
    all_responses = []
    first_row = df.iloc[0]
    response = stage_1_chain.invoke({
        "company_name": first_row['company'],
        "title": first_row['title'],
        "content": first_row['content'], 
        "format_instructions": format_instructions
    })
    all_responses.append(response.answer)
    return all_responses

prompt_input = human_input_stage_1 
prompt_output = llm_standalone(prompt_input)

#### I got stuck here
- was trying to use ```.on( // plain text data // )```, for example ```Feedback(...).on_input().on( // text // ).on_output()``` to evaluate the relevance of the input and output based on the context.  
- failed to find a method
    -> implemented it with RAG in Classification notebook 

In [4]:
## Initialize Feedback Function(s)

from trulens_eval import Feedback, Select
from trulens_eval.feedback import OpenAI as fOpenAI

# Initialize OpenAI-based feedback function collection class:
fopenai = fOpenAI(model_engine = "gpt-4o")

# context relevance feedback function
f_qs_relevance = Feedback(
    fopenai.relevance_with_cot_reasons, name = "Context Relevance" 
).on_input_output()

  from trulens_eval import Feedback, Select


✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


  from trulens_eval.feedback import OpenAI as fOpenAI


In [5]:
## Instrument the callable for logging with TruLens 
from trulens.apps.basic import TruBasicApp

tru_llm_standalone_recorder = TruBasicApp(
    llm_standalone, app_name="Happy Bot", feedbacks=[f_qs_relevance]
)

with tru_llm_standalone_recorder as recording:
    tru_llm_standalone_recorder.app(prompt_input)

In [6]:
session.get_records_and_feedback()[0]

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,app_name,app_version,latency,total_tokens,total_cost,cost_currency
0,app_hash_0be59e2df0a8e350b033067e8e916dbe,"{""tru_class_info"": {""name"": ""TruBasicApp"", ""mo...",TruWrapperApp(trulens.apps.basic),record_hash_8c5972dc4d6e927525d882a4d13ce05d,Here are the definitions of categories and sub...,yes,-,{'record_id': 'record_hash_8c5972dc4d6e927525d...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-11-29T07:31:50.515939"", ""...",2024-11-29T07:31:51.498128,Happy Bot,base,0.982039,5862,0.01473,USD


In [7]:
from trulens.dashboard import run_dashboard

run_dashboard(session)  # open a local streamlit app to explore

# stop_dashboard(session) # stop if needed

Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.0.163:52475 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

--- 
Testing Context Relevance Feedback Function 
- to use on(context) $\Longrightarrow$ BaseRetriever needed in the application $\Longleftrightarrow$ RAG  

In [8]:
from trulens.apps.langchain import TruChain
import numpy as np  

provider = OpenAI() 

context = TruChain.select_context(llm_standalone)

feedback = (
    Feedback(provider.context_relevance)
    .on_input()
    .on(context)
    )

ValueError: Cannot find any `BaseRetriever` in app.



---