# Notebook For Q&A OffLine Evaluation

## Step1 : create set of possible questions for each Q&A (aka Ground truth)

In [4]:
import os

from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader

In [5]:
# File paths and other configurations
DATA_PATH =  '../../data/markdown_files'

# Initialize the document loader
loader = DirectoryLoader(DATA_PATH, glob="**/*.md", loader_cls=TextLoader)

# Load all markdown files
documents = loader.load()

# Define headers to split on with labels (e.g., `#`, `##`, `###`)
headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
    ("####", "Header 4")
]

# Initialize the MarkdownHeaderTextSplitter
splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

# Split the documents into smaller chunks and combine headers with their respective text
split_documents = []
for document in documents:
    split_texts = splitter.split_text(document.page_content)
    split_documents+=split_texts


In [7]:
split_documents[0]

Document(metadata={'Header 1': 'Frequently Asked Questions about Daydream Therapy (FAQs)', 'Header 2': 'FAQ', 'Header 4': 'What is hypnosis and is it safe?'}, page_content='During hypnosis I use this state within you to make positive suggestions to your unconscious mind, based on what you tell me you want to achieve. Your unconscious is in a much more receptive position when you are in this relaxed state. Therefore, it is an incredibly effective tool, to take you towards your solution. When combined with other tools and techniques, such as NLP, can make short work of long-standing problems.  \nAnd yes, it is a safe, and natural state that we all go in and out of many times during our day, perhaps without even knowing it. If you have ever been driving along a road and then ‘come to’ and wondered who was in control of the car for the last five minutes, or been ’miles away’ when someone is trying to talk to you – that’s trance, and we are all very good at it. It’s a little like day-dreami

In [None]:
from langchain_groq import ChatGroq 

In [None]:
# File paths and other configurations
LLM_MODEL = os.getenv("LLM_MODEL")

# Initialize the Groq LLM
groq_llm = ChatGroq(
    model=LLM_MODEL,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

review_template = """You emulate a potential client who is considering hypnotherapy services.
Formulate 5 questions this client might ask based on a FAQ record.
The record should contain the answer to the questions, and the questions should be complete and not too short.
If possible, use as few words as possible from the record.

The record:

question: {question}
answer: {text}

Provide the output as a list without using code blocks:

["question1", "question2", ..., "question5"]"""


review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(input_variables=["question","text"], template=review_template)
)

messages = [review_system_prompt]

review_prompt = ChatPromptTemplate(
    input_variables=["question","text"], messages=messages
)

reviews_vector_chain = RetrievalQA.from_chain_type(
    llm=groq_llm,
    chain_type="stuff",
    retriever=new_vector_store.as_retriever(search_type="similarity_score_threshold", 
                                            search_kwargs={"score_threshold": .5, 
                                            "k": 3}),
)
reviews_vector_chain.combine_documents_chain.llm_chain.prompt = review_prompt


## Step2 : Run the generated questions using the Q&A bot

## Step 3: Evaluate results