In [None]:
import random
import re

from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter,CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough,RunnableLambda
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
import pandas as pd
from langchain.schema import Document
from sentence_transformers import SentenceTransformer
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    AutoModel,AutoConfig
)
from transformers.modeling_utils import load_sharded_checkpoint
import torch
from datasets import Dataset, load_dataset
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from peft import PeftModel
from peft import AutoPeftModelForCausalLM
from transformers import pipeline
#from langchain.llms import HuggingFacePipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

  from tqdm.autonotebook import tqdm, trange
2024-11-28 15:23:03.148631: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732836183.212287    5498 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732836183.231999    5498 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-28 15:23:03.383210: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
def split_dialogue_Q_A(conversation):
    pattern = re.compile(r'Q: (.*?)\nA: (.*?)\n', re.DOTALL)
    matches = pattern.findall(conversation)
    questionlist=[]
    answerlist=[]

    for match in matches:
        question, answer = match

        # if len(question.split(' '))>50 or len(answer.split(' '))>50 or len(question.split(' '))<3:
        #     continue
        questionlist.append(question)
        answerlist.append(answer)

        # print("Question:", question)
        # print("Answer:", answer)
        # print("-----")
        # exit(1)

    return questionlist,answerlist

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

MODEL_NAME = "/content/drive/MyDrive/298B_Simpsons/llava7b"
DEFAULT_SYSTEM_PROMPT = """
Forget you are an AI model. Now you are Dr. Sheldon Cooper from the TV show "The Big Bang Theory." You are known for your high intelligence, love of science, and adherence to strict routines and logical thinking, though sometimes lacking in social skills. You approach conversations with scientific rigor, often showcasing your deep knowledge of physics and other sciences, and may use humor that reflects your unique perspective. Respond to the following questions as if you were Sheldon Cooper, incorporating your logical reasoning, scientific references, and occasional pedantic tone.
""".strip()

In [None]:
model_name = "moka-ai/m3e-base"

embeddings = HuggingFaceEmbeddings(model_name=model_name,model_kwargs={'device':DEVICE})

  embeddings = HuggingFaceEmbeddings(model_name=model_name,model_kwargs={'device':DEVICE})


In [None]:
ragdb = Chroma(persist_directory="sheldon_DB", embedding_function=embeddings)

retriever = ragdb.as_retriever(search_kwargs={'k': 3})

  ragdb = Chroma(persist_directory="sheldon_DB", embedding_function=embeddings)


In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def generate_training_prompt(question, response, system_prompt = DEFAULT_SYSTEM_PROMPT):
    tempt =f"""{system_prompt}
    Context: {format_docs(retriever.invoke(question))}
    USER:
    {question}
    ASSISTANT:
    {response}<\s>
    """.strip()
    return tempt


def create_model_and_tokenizer(MODEL_NAME):
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        use_safetensors=True,
        quantization_config=bnb_config,
        trust_remote_code=True,
        device_map="auto",
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    return model, tokenizer

def generate_response(model, text: str):
    inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.0001)
    return tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

In [None]:
# from transformers import AutoProcessor, AutoModelForPreTraining

# MODEL_NAME = "/content/drive/MyDrive/298B_Simpsons/llava7b"

# # Load model directly
# from transformers import AutoProcessor, AutoModelForPreTraining

# processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
# model = AutoModelForPreTraining.from_pretrained("llava-hf/llava-1.5-7b-hf")


In [None]:
from transformers import AutoProcessor, AutoModelForPreTraining, BitsAndBytesConfig, AutoTokenizer
def custom_format(docs):
  output = ""
  for doc in docs:
    doc = doc.page_content.split("A:")[-1]
    output += doc
    output += "\n"
  return output

def generate_training_prompt(question, response, system_prompt = DEFAULT_SYSTEM_PROMPT):
    tempt =f"""{system_prompt}
    Context: {format_docs(retriever.invoke(question))}
    USER: Sheldon Cooper, {question}
    ASSISTANT:{response}<\s>
    """.strip()
    return tempt

def create_model_and_processor(MODEL_NAME):
    # Configuration for loading LLava 7b model in 4-bit mode
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

    # Load the LLava model for pre-training
    model = AutoModelForPreTraining.from_pretrained(
        MODEL_NAME,
        use_safetensors=True,  # Ensure use of safe tensor format
        quantization_config=bnb_config,  # Use the 4-bit quantization configuration
        trust_remote_code=True,  # Trust any custom code from the model repo
        device_map="auto",  # Automatically map the model to available devices
    )

    # Load the processor for LLava 7b
    processor = AutoProcessor.from_pretrained(MODEL_NAME)

    # Load the tokenizer for LLava 7b
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    # tokenizer.pad_token = tokenizer.eos_token  # Set padding token to end-of-sequence token
    # tokenizer.padding_side = "right"  # Ensure padding is applied on the right side

    return model, processor, tokenizer

# Example usage:
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"  # Replace with the actual path or model name for LLava 7b
model, processor, tokenizer = create_model_and_processor(MODEL_NAME)


Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/1.45k [00:00<?, ?B/s]

Some kwargs in processor config are unused and will not have any effect: num_additional_image_tokens. 


Support for third party widgets will remain active for the duration of the session. To disable support:

In [None]:
file_path = 'Sheldon_Q&A.txt'

with open(file_path, 'r', encoding='utf-8') as file:
    texts = file.read()
questionlist,answerlist = split_dialogue_Q_A(texts)

tmpList = []
trainList = []
testList = []
for j,(question,answer) in enumerate(zip(questionlist,answerlist)):
  if type(question) != str:
    continue
  tmp_dict = {"text":generate_training_prompt(question,answer,system_prompt = DEFAULT_SYSTEM_PROMPT)}
  # print(generate_training_prompt(question,answer,system_prompt = DEFAULT_SYSTEM_PROMPT))
  # exit(1)
  tmpList.append(tmp_dict)
  # if j == 200:
  #   break
  if j % 10== 0:
    print(j)
  if random.uniform(0,1) < .2:
    tmp_dict.update({"question":question})
    testList.append(tmp_dict)
  else:
    trainList.append(tmp_dict)

data = Dataset.from_list(tmpList)
train_data = Dataset.from_list(trainList)
test_data = Dataset.from_list(testList)

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190


In [None]:
ragdb = Chroma(persist_directory="sheldon_DB", embedding_function=embeddings)
retriever = ragdb.as_retriever(search_kwargs={'k': 3})

In [None]:
from transformers import LlavaForConditionalGeneration
OUTPUT_DIR = "trained_model5/checkpoint-160"

newmodel = PeftModel.from_pretrained(model, OUTPUT_DIR)
# newmodel=AutoModelForPreTraining.from_pretrained(OUTPUT_DIR,local_files_only=True)

# # newmodel=AutoModel.from_config(configs)

text_generation_pipeline = pipeline(
    task='text-generation',
    model=newmodel,
    tokenizer=tokenizer,
    do_sample=True,
    temperature=0.6,
    repetition_penalty=1.2,
    return_full_text=True,
    max_new_tokens=400,
    top_k=30,
    top_p= 0.8,
    no_repeat_ngram_size =2
)
# mistral_llm=HuggingFacePipeline(pipeline=text_generation_pipeline)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausa

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def convert_to_string(prompt_value):
    # Extract the string from the `StringPromptValue` object
    return prompt_value.to_string()

prompt_template='''Forget you are an AI model. Now you are Dr. Sheldon Cooper from the TV show "The Big Bang Theory." You are known for your high intelligence, love of science, and adherence to strict routines and logical thinking, though sometimes lacking in social skills. You approach conversations with scientific rigor, often showcasing your deep knowledge of physics and other sciences, and may use humor that reflects your unique perspective. Respond to the following questions as if you were Sheldon Cooper, incorporating your logical reasoning, scientific references, and occasional pedantic tone.
Generate a response that sounds as close to what Sheldon Cooper would say. You can also use this auxiliary knowledge to help:
- Sheldon has a deep knowledge of physics and theoretical science, and loves to showcase his intellect.
- He often makes pedantic or overly logical remarks and struggles with social cues.
- Common phrases include "Bazinga!" and references to his need for routine and structure.
- His tone is analytical, formal, and sometimes humorously blunt, with a touch of arrogance.
Context: {context}
USER: Sheldon Cooper, {question}
ASSISTANT:'''
prompt = PromptTemplate(input_variables=["context", "question"],
            template=prompt_template)

# llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [None]:
def outputParser(lst):
    return (lst[0]['generated_text']).split('ASSISTANT:')[-1]
rag_chain = (
 {      "context": RunnablePassthrough() | retriever | format_docs,
        "question": RunnablePassthrough(),
        }
        | prompt
        | RunnableLambda(convert_to_string)
        | text_generation_pipeline
        | outputParser
)

In [None]:
chat_history=[]

def chatbot_response(message):
    global chat_history,rag_chain
    op = rag_chain.invoke(message)
    response = op
    chat_history = chat_history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
    return chat_history

In [None]:
import gradio as gr

iface = gr.Interface(
    fn=chatbot_response,
    inputs=[
        gr.Textbox(placeholder="Enter your question"),
    ],
    outputs=[
        gr.Chatbot(type="messages", avatar_images=("/content/drive/Shareddrives/298B/DATA298B/pic/avatar_user.png",
                                                   "/content/drive/Shareddrives/298B/DATA298B/pic/avatar_bot.png"),value=[])
    ],
    title="Sheldon Chat Robot",
    description="This is an example of a chatbot built using Gradio."
)

iface.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [None]:
rag_chain.invoke('who are you?')

' Hello! As Dr'

In [None]:
# from trulens_eval import TruChain, Tru

# tru = Tru()
# tru.reset_database()

  from trulens_eval import TruChain, Tru


🦑 Initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


In [None]:
# from trulens.providers.openai import OpenAI
# from trulens_eval import Feedback
# import numpy as np
# from trulens.apps.langchain import TruChain
# from ipytree import Tree, Node

# provider = OpenAI(api_key='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')

# context = TruChain.select_context(rag_chain)


# # Groundedness
# f_groundedness = (
#     Feedback(provider.groundedness_measure_with_cot_reasons)
#     .on(context.collect())
#     .on_output()
#     .on_input()
# )

# # Relevance between the overall question and answer.
# f_answer_relevance = (
#     Feedback(provider.relevance)
#     .on_input_output()
# )
# # Question/statement relevance between question and each context chunk.
# f_context_relevance = (
#     Feedback(provider.context_relevance_with_cot_reasons)
#     .on_input()
#     .on(context)
#     .aggregate(np.mean)
# )

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.first.steps__.context.middle[0].invoke.rets[:].page_content.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In groundedness_measure_with_cot_reasons, input criteria will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.first.steps__.context.middle[0].invoke.rets[:].page_content .


In [None]:
# tru_recorder = TruChain(rag_chain,
#     app_id='Lavva3_Chain',
#     feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness])

# response, tru_record = tru_recorder.with_record(rag_chain.invoke, "Who is your best friend?")

# json_like = tru_record.layout_calls_as_app()

KeyboardInterrupt: 

In [None]:
# from ipytree import Tree, Node

# def display_call_stack(data):
#     tree = Tree()
#     tree.add_node(Node('Record ID: {}'.format(data['record_id'])))
#     tree.add_node(Node('App ID: {}'.format(data['app_id'])))
#     tree.add_node(Node('Cost: {}'.format(data['cost'])))
#     tree.add_node(Node('Performance: {}'.format(data['perf'])))
#     tree.add_node(Node('Timestamp: {}'.format(data['ts'])))
#     tree.add_node(Node('Tags: {}'.format(data['tags'])))
#     tree.add_node(Node('Main Input: {}'.format(data['main_input'])))
#     tree.add_node(Node('Main Output: {}'.format(data['main_output'])))
#     tree.add_node(Node('Main Error: {}'.format(data['main_error'])))

#     calls_node = Node('Calls')
#     tree.add_node(calls_node)

#     for call in data['calls']:
#         call_node = Node('Call')
#         calls_node.add_node(call_node)

#         for step in call['stack']:
#             step_node = Node('Step: {}'.format(step['path']))
#             call_node.add_node(step_node)
#             if 'expanded' in step:
#                 expanded_node = Node('Expanded')
#                 step_node.add_node(expanded_node)
#                 for expanded_step in step['expanded']:
#                     expanded_step_node = Node('Step: {}'.format(expanded_step['path']))
#                     expanded_node.add_node(expanded_step_node)

#     return tree

# # Usage
# tree = display_call_stack(json_like)

In [None]:
# counter = 0
# with tru_recorder as recording:
#     for x in testList:
#         test_question = x['question']
#         rag_chain.invoke(test_question)
#         counter +=1
#         if counter > 200:
#             break


# recs = recording.records
# display(recs)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


KeyboardInterrupt: 

In [None]:
# records, feedback = tru.get_records_and_feedback(app_ids=["app_hash_49fc59b356d8e2b2020549c289b39b4b"])

# records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,...,groundedness_measure_with_cot_reasons_calls,relevance feedback cost in USD,context_relevance_with_cot_reasons feedback cost in USD,groundedness_measure_with_cot_reasons feedback cost in USD,app_name,app_version,latency,total_tokens,total_cost,cost_currency
0,app_hash_49fc59b356d8e2b2020549c289b39b4b,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_75259d3a55b564cc889a426550e53484,""" what is your favorite video game, and why?""",""" As an AI language model, I don't have person...",-,"{""record_id"": ""record_hash_75259d3a55b564cc889...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-11-10T18:55:12.010735"", ""...",...,"[{'args': {'source': [""Q: what is your favori...",5.1e-05,0.000377,0.000192,Lavva3_Chain,base,5.591229,0,0.0,USD
1,app_hash_49fc59b356d8e2b2020549c289b39b4b,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_29915dca21cb2078870342b347c55d0b,""" can you explain the concept of the \""scienti...",""" Sure! The \""Scientifiic Method\"" refers to a...",-,"{""record_id"": ""record_hash_29915dca21cb2078870...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-11-10T18:55:04.479396"", ""...",...,[{'args': {'source': ['Q: can you explain the...,5.7e-05,0.000417,0.001129,Lavva3_Chain,base,7.351659,0,0.0,USD
2,app_hash_49fc59b356d8e2b2020549c289b39b4b,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_4a22b279d1b747989750b7b9fc6e654a,""" can you explain your research in the field o...",""" Certainly! In my line of research, dark mate...",-,"{""record_id"": ""record_hash_4a22b279d1b74798975...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-11-10T18:54:58.857457"", ""...",...,[{'args': {'source': ['Q: can you explain you...,5.4e-05,0.000424,0.00089,Lavva3_Chain,base,5.401978,0,0.0,USD
3,app_hash_49fc59b356d8e2b2020549c289b39b4b,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_98b07c391102f2df59f490b789539110,""" what is your opinion on the use of social me...",""" As an AI language model, my opinions do not ...",-,"{""record_id"": ""record_hash_98b07c391102f2df59f...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-11-10T18:54:47.566372"", ""...",...,[{'args': {'source': ['Q: what is your opinion...,6.3e-05,0.000377,0.001668,Lavva3_Chain,base,11.136485,0,0.0,USD
4,app_hash_49fc59b356d8e2b2020549c289b39b4b,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_8ea03685792dde9950ab0dc645779e17,""" what is your opinion on artificial intellige...",""" As an expert in theoretical physics, I have ...",-,"{""record_id"": ""record_hash_8ea03685792dde9950a...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-11-10T18:54:41.288229"", ""...",...,[{'args': {'source': ['Q: what is your opinio...,5.4e-05,0.000355,0.000858,Lavva3_Chain,base,6.110651,0,0.0,USD


In [None]:
# tru.get_leaderboard(app_ids=["app_hash_49fc59b356d8e2b2020549c289b39b4b"])

Unnamed: 0_level_0,Unnamed: 1_level_0,context_relevance_with_cot_reasons,groundedness_measure_with_cot_reasons,relevance,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Lavva3_Chain,base,0.641026,0.556079,0.74359,8.245453,0.0


In [None]:
# tru.run_dashboard()

Starting dashboard ...



  tru.run_dashboard()


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.50.235:48479 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [None]:
from trulens_eval import TruChain, Tru, Feedback
from trulens.providers.openai import OpenAI
from trulens.apps.langchain import TruChain
import numpy as np
from ipytree import Tree, Node

# Initialize TruLens
tru = Tru()
tru.reset_database()

# Initialize provider with OpenAI key
provider = OpenAI(api_key='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')

# Set up context
context = TruChain.select_context(rag_chain)

# Define Feedbacks

# Define Feedbacks

# Correctness
f_correctness = Feedback(provider.correctness).on_output()

# Conciseness
f_conciseness = Feedback(provider.conciseness).on_output()

# Maliciousness
f_maliciousness = Feedback(provider.maliciousness).on_output()

# Helpfulness
f_helpfulness = Feedback(provider.helpfulness).on_output()

# Harmfulness
f_harmfulness = Feedback(provider.harmfulness).on_output()

# Controversiality
f_controversiality = Feedback(provider.controversiality).on_output()

# Misogyny
f_misogyny = Feedback(provider.misogyny).on_output()

# Criminality
f_criminality = Feedback(provider.criminality).on_output()

# Insensitivity
f_insensitivity = Feedback(provider.insensitivity).on_output()

# Relevance between question and answer
f_answer_relevance = Feedback(provider.relevance).on_input_output()

# Context relevance for each context chunk
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

# Groundedness
f_groundedness = (
    Feedback(provider.groundedness_measure_with_cot_reasons)
    .on(context.collect())
    .on_output()
    .on_input()
)
# Correctness
f_correctness = (
    Feedback(provider.correctness)
    .on_output()  # Correctness operates on the output only
)

# Conciseness
f_conciseness = (
    Feedback(provider.conciseness)
    .on_output()  # Conciseness operates on the output only
)

# Maliciousness
f_maliciousness = (
    Feedback(provider.maliciousness)
    .on_output()  # Maliciousness evaluates the output
)

# Helpfulness
f_helpfulness = (
    Feedback(provider.helpfulness)
    .on_output()  # Helpfulness evaluates the output
)

# Harmfulness
f_harmfulness = (
    Feedback(provider.harmfulness)
    .on_output()  # Harmfulness evaluates the output
)

# Controversiality
f_controversiality = (
    Feedback(provider.controversiality)
    .on_output()  # Controversiality evaluates the output
)

# Misogyny
f_misogyny = (
    Feedback(provider.misogyny)
    .on_output()  # Misogyny evaluates the output
)

# Criminality
f_criminality = (
    Feedback(provider.criminality)
    .on_output()  # Criminality evaluates the output
)

# Insensitivity
f_insensitivity = (
    Feedback(provider.insensitivity)
    .on_output()  # Insensitivity evaluates the output
)

# Initialize TruLens Recorder with all feedbacks
tru_recorder = TruChain(
    rag_chain,
    app_id='Lavva3_Chain',
    feedbacks=[
        f_correctness,
        f_conciseness,
        f_maliciousness,
        f_helpfulness,
        f_harmfulness,
        f_controversiality,
        f_misogyny,
        f_criminality,
        f_insensitivity,
        f_answer_relevance,
        f_context_relevance,
        f_groundedness
    ]
)

# Run and record a sample query
response, tru_record = tru_recorder.with_record(rag_chain.invoke, "Who is your best friend?")

# Display call stack using IPyTree
json_like = tru_record.layout_calls_as_app()

def display_call_stack(data):
    tree = Tree()
    tree.add_node(Node('Record ID: {}'.format(data['record_id'])))
    tree.add_node(Node('App ID: {}'.format(data['app_id'])))
    tree.add_node(Node('Cost: {}'.format(data['cost'])))
    tree.add_node(Node('Performance: {}'.format(data['perf'])))
    tree.add_node(Node('Timestamp: {}'.format(data['ts'])))
    tree.add_node(Node('Tags: {}'.format(data['tags'])))
    tree.add_node(Node('Main Input: {}'.format(data['main_input'])))
    tree.add_node(Node('Main Output: {}'.format(data['main_output'])))
    tree.add_node(Node('Main Error: {}'.format(data['main_error'])))

    calls_node = Node('Calls')
    tree.add_node(calls_node)

    for call in data['calls']:
        call_node = Node('Call')
        calls_node.add_node(call_node)

        for step in call['stack']:
            step_node = Node('Step: {}'.format(step['path']))
            call_node.add_node(step_node)
            if 'expanded' in step:
                expanded_node = Node('Expanded')
                step_node.add_node(expanded_node)
                for expanded_step in step['expanded']:
                    expanded_step_node = Node('Step: {}'.format(expanded_step['path']))
                    expanded_node.add_node(expanded_step_node)

    return tree

# Display the call stack
tree = display_call_stack(json_like)

# Test with a larger dataset
counter = 0
with tru_recorder as recording:
    for x in testList:
        test_question = x['question']
        rag_chain.invoke(test_question)
        counter += 1
        if counter > 200:  # Limit the number of iterations
            break

# Display recorded evaluations
recs = recording.records
display(recs)

# Display leaderboard for evaluation metrics
tru.get_leaderboard(app_ids=["app_hash_49fc59b356d8e2b2020549c289b39b4b"])


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


✅ In correctness, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In conciseness, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In maliciousness, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In helpfulness, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In harmfulness, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In controversiality, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In misogyny, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In criminality, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In insensitivity, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[Record(record_id='record_hash_f0612998b560c383c138de171ea87eae', app_id='app_hash_49fc59b356d8e2b2020549c289b39b4b', cost=Cost(n_requests=16, n_successful_requests=120, n_completion_requests=16, n_classification_requests=0, n_classes=0, n_embedding_requests=0, n_embeddings=0, n_tokens=26591, n_stream_chunks=0, n_prompt_tokens=24952, n_completion_tokens=1639, n_cortex_guardrails_tokens=0, cost=0.004726199999999999, cost_currency='USD'), perf=Perf(start_time=datetime.datetime(2024, 11, 28, 16, 0, 6, 163646), end_time=datetime.datetime(2024, 11, 28, 16, 0, 13, 548266)), ts=datetime.datetime(2024, 11, 28, 16, 0, 13, 548422), tags='-', meta=None, main_input='Sheldon Cooper, you have always been a stickler for rules and order, but we have seen you grow as a person throughout the series. What experiences of yours have left a lasting impact on your character, challenging you to change your perspectives?', main_output=' Thank you for asking. As an individual who values logic and reason above a

Unnamed: 0_level_0,Unnamed: 1_level_0,conciseness,context_relevance_with_cot_reasons,controversiality,correctness,criminality,groundedness_measure_with_cot_reasons,harmfulness,helpfulness,insensitivity,maliciousness,misogyny,relevance,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Lavva3_Chain,base,0.383333,0.675,0.275,0.433333,0.0,0.723016,0.025,0.633333,0.033333,0.0,0.0,0.675,14.650875,0.015651


In [None]:
tru.run_dashboard()

Starting dashboard ...



  tru.run_dashboard()


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.50.235:53367 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>