# Notebook For Q&A OffLine Evaluation - Part 1

## Step1 : create set of possible questions for each Q&A (aka Ground truth)

In [17]:
import os

from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq 
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

from langchain_core.output_parsers import StrOutputParser
from langchain.chains import RetrievalQA
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

from langchain_groq import ChatGroq 

### loading the q&a vector database

In [19]:
# File paths and other configurations
EMBEDDING_MODEL =  os.getenv("EMBEDDING_MODEL")
LLM_MODEL = os.getenv("LLM_MODEL")
INDEX_NAME = os.getenv("INDEX_NAME")

embed_model = FastEmbedEmbeddings(model_name=EMBEDDING_MODEL)

new_vector_store = FAISS.load_local(
    INDEX_NAME, embed_model, allow_dangerous_deserialization=True
)

# Access the document store
stored_docs = new_vector_store.docstore._dict

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/218M [00:00<?, ?B/s]

### a prompt for emulating test questions

In [46]:
# File paths and other configurations
LLM_MODEL = os.getenv("LLM_MODEL")

# Initialize the Groq LLM
groq_llm = ChatGroq(
    model=LLM_MODEL,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

emulation_prompt = """You emulate a potential client who is considering hypnotherapy services.
Formulate 5 questions this client might ask based on a FAQ record.
The record should contain the answer to the questions, and the questions should be complete and not too short.
If possible, use as few words as possible from the record.

The record:

question: {question}
answer: {text}

Provide the output as a valid JSON array without using code blocks, extra characters like asterisks (*), or numbering. The format should be exactly:

["question1", "question2", ..., "question5"]"""



emulation_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(input_variables=["question","text"], template=emulation_prompt)
)

messages = [emulation_system_prompt]

emulation_tamplate = ChatPromptTemplate(
    input_variables=["question","text"], messages=messages
)

output_parser = StrOutputParser()

emulation_chain = emulation_tamplate | groq_llm | output_parser


### testing the prompt

In [44]:
answer = """Everyone is capable of change. Whether you will effect that change is largely down to you. Cognitive Hypnotherapy isn’t a miracle pill.
It’s a process of mental reprogramming. Our sessions will guide you to learn how to think, behave, and feel differently. 

Since I began practicing as a therapist, I’ve noticed some common traits among the clients who have made the most positive changes in their lives.
They are curious, willing to learn about themselves, and being prepared to put in the work. I can guide you. 
And I will support you always. But nobody can do it for you."""

reply = emulation_chain.invoke(dict(question='Q: Will hypnotherapy work for Me?',text=answer))
print(reply)

["Can hypnotherapy work for me?", 
"What is the role of the therapist in cognitive hypnotherapy?", 
"What are the common traits of clients who have made positive changes through hypnotherapy?", 
"How does hypnotherapy lead to change?", 
"Is hypnotherapy a quick fix or a process of mental reprogramming?"]


### creating the ground truth dataset for testing the chatbot

In [48]:
import ast
from tqdm import tqdm  # Import tqdm

val_dict = dict(doc_id=[], questions=[])

# Iterate over the documents with a progress bar
for doc_id, doc in tqdm(stored_docs.items(), desc="Processing Documents"):

    quest, answer = doc.page_content.split('\n', 1)
    reply = emulation_chain.invoke(dict(question=quest, text=answer))

    val_dict['doc_id'].append(doc_id)
    val_dict['questions'].append(ast.literal_eval(reply))
   

Processing Documents: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:23<00:00,  1.35s/it]


In [56]:
import pandas as pd

df = pd.DataFrame.from_dict(val_dict)

# Explode the 'questions' column to create a row for each question
df_ground_truth = df.explode('questions')

# Reset the index if needed
df_ground_truth.reset_index(drop=True, inplace=True)

df_ground_truth

Unnamed: 0,doc_id,questions
0,b76e60a1-3332-436d-ad4c-08aff5c83cad,What is hypnosis in your practice?
1,b76e60a1-3332-436d-ad4c-08aff5c83cad,How does hypnosis make positive suggestions to...
2,b76e60a1-3332-436d-ad4c-08aff5c83cad,Is hypnotherapy safe?
3,b76e60a1-3332-436d-ad4c-08aff5c83cad,Can you give examples of when a person might e...
4,b76e60a1-3332-436d-ad4c-08aff5c83cad,How does hypnotherapy compare to other tools a...
...,...,...
80,7e902e07-75a1-4def-aac0-e832e5e1df80,Can you tell me if your practice is governed b...
81,7e902e07-75a1-4def-aac0-e832e5e1df80,What measures do you have in place to ensure t...
82,7e902e07-75a1-4def-aac0-e832e5e1df80,Are there any medical conditions that would pr...
83,7e902e07-75a1-4def-aac0-e832e5e1df80,Could you explain why certain conditions such ...


In [57]:
df_ground_truth.to_csv('ground-truth-data.csv', index=False)