## Define a tool
We define a search tool with GoogleSerperAPI

In [None]:
import os
from langchain.utilities import GoogleSerperAPIWrapper
os.environ["SERPER_API_KEY"] = 'fbac5061b434c6b0e5f55968258b144209993ab2'
search = GoogleSerperAPIWrapper()

In [None]:
import guidance
llama = guidance.llms.LlamaCpp(
    model ="/home/karajan/Downloads/Manticore-13B.ggmlv3.q4_0.bin",
    tokenizer = "openaccess-ai-collective/manticore-13b-chat-pyg",
    before_role = "<|",
    after_role = "|>",
    n_gpu_layers=300,
    n_threads=12,
    caching=False, )
guidance.llm = llama

In [None]:
from langchain.llms import LlamaCpp

model_type = "LlamaCpp"
model_path = "/home/karajan/Downloads/Manticore-13B.ggmlv3.q4_0.bin"
model_n_ctx =1000
target_source_chunks = 4
n_gpu_layers = 500
use_mlock = 0
n_batch = os.environ.get('N_BATCH') if os.environ.get('N_BATCH') != None else 512
callbacks = []
qa_prompt = ""
llm = LlamaCpp(model_path=model_path, n_ctx=model_n_ctx, callbacks=callbacks, verbose=False,n_gpu_layers=n_gpu_layers, use_mlock=use_mlock,top_p=0.9, n_batch=n_batch)


In [None]:
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter, RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
import os
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import re 
from colorama import Fore, Style

retriver=""
EMBEDDINGS_MAP = {
    **{name: HuggingFaceInstructEmbeddings for name in ["hkunlp/instructor-xl", "hkunlp/instructor-large"]},
    **{name: HuggingFaceEmbeddings for name in ["all-MiniLM-L6-v2", "sentence-t5-xxl"]}
}
EMBEDDINGS_MODEL = "all-MiniLM-L6-v2"

def clean_text(text):
    # Remove line breaks
    text = text.replace('\n', ' ')

    # Remove special characters
    text = re.sub(r'[^\w\s]', '', text)
    
    return text

def load_unstructured_document(document: str) -> list[Document]:
    with open(document, 'r') as file:
        text = file.read()
    title = os.path.basename(document)
    return [Document(page_content=text, metadata={"title": title})]

def split_documents(documents: list[Document], chunk_size: int = 250, chunk_overlap: int = 20) -> list[Document]:
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(documents)



def ingest_file(file_path):
        # Load unstructured document
        documents = load_unstructured_document(file_path)

        # Split documents into chunks
        documents = split_documents(documents, chunk_size=250, chunk_overlap=100)

        # Determine the embedding model to use
        EmbeddingsModel = EMBEDDINGS_MAP.get(EMBEDDINGS_MODEL)
        if EmbeddingsModel is None:
            raise ValueError(f"Invalid embeddings model: {EMBEDDINGS_MODEL}")

        model_kwargs = {"device": "cuda:0"} if EmbeddingsModel == HuggingFaceInstructEmbeddings else {}
        embedding = EmbeddingsModel(model_name=EMBEDDINGS_MODEL, model_kwargs=model_kwargs)

        # Store embeddings from the chunked documents
        vectordb = Chroma.from_documents(documents=documents, embedding=embedding)

        retriever = vectordb.as_retriever(search_kwargs={"k":4})

        print(file_path)
        print(retriever)

        return retriever, file_path


def checkQuestion(question: str, retriever, llm):
    global qa_prompt
    question = question.replace("Action Input: ", "")
    QUESTION_CHECK_PROMPT_TEMPLATE = """###Instruction: You are an AI assistant who uses document information to answer questions. You MUST answer with 'yes' or 'no'. Given the following pieces of context, determine if there are any elements related to the question in the context. To assist me in this task, you have access to a vector database context that contains various documents related to different topics.
Context:{context}
Don't forget you MUST answer with 'yes' or 'no'
Question: Is there any info related to ""{question}"" in the context? Yes or No?
### Response:
"""
    QUESTION_QA_PROMPT_TEMPLATE = """### Human: You are an helpful assistant that tries to answer questions concisely and precisely. Your answer must ONLY be based on the context information provided.
    Context:{context}
    Question: {question}
    ### Assistant:
"""
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
    # Answer the question
    answer_data = qa({"query": question})

    # Check if 'answer' is in answer_data, if not print it in bold red
    if 'result' not in answer_data:
        print(f"\033[1;31m{answer_data}\033[0m")
        return "Issue in retrieving the answer."

    answer = answer_data['result']
    context_documents = answer_data['source_documents']

    # Combine all contexts into one
    context = " ".join([clean_text(doc.page_content) for doc in context_documents])
    # Formulate the prompt for the LLM
    question_check_prompt = QUESTION_CHECK_PROMPT_TEMPLATE.format(context=context, question=question)
    qa_prompt = QUESTION_QA_PROMPT_TEMPLATE.format(context=context, question=question)

    print(Fore.GREEN + Style.BRIGHT + question_check_prompt + Style.RESET_ALL)
    # Submit the prompt to the LLM directly
    # answerable = llm(question_check_prompt)
    # print(Fore.GREEN + Style.BRIGHT + answerable + Style.RESET_ALL)
    # if "yes" in answerable.lower():
    #    return answerable
    # else:
    #     return answerable
    
    return context


In [None]:
retriever, file_patch = ingest_file("/home/karajan/Documents/notion.txt")

In [None]:
print(checkQuestion("What's the wifi code", retriever, llm))

## Let's test

# Build the agent with Guidance

In [None]:
guidance.llm.cache.clear()
print('Done.')

### Define prompt templates

In [None]:
valid_answers = ['Action', 'Final Answer']
valid_tools = ['Check Question', 'Chroma Search']

prompt_start_template = """{{#system~}}
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Answer the following questions as best you can. You have access to the following tools:

Check Question: A tool to validate if a question is answerable or not. The input is the question to validate.

Chroma Search: A wrapper around Chroma Search. Useful for when you need to answer questions about current events. The input is the question to search relevant information.

Strictly use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Check Question, Chroma Search]
Action Input: the input to the action, should be a question.
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

For examples:
Question: How old is CEO of Microsoft wife?
Thought: First, I need to find who is the CEO of Microsoft.
Action: Google Search
Action Input: Who is the CEO of Microsoft?
Observation: Satya Nadella is the CEO of Microsoft.
Thought: Now, I should find out Satya Nadella's wife.
Action: Google Search
Action Input: Who is Satya Nadella's wife?
Observation: Satya Nadella's wife's name is Anupama Nadella.
Thought: Then, I need to check Anupama Nadella's age.
Action: Google Search
Action Input: How old is Anupama Nadella?
Observation: Anupama Nadella's age is 50.
Thought: I now know the final answer.
Final Answer: Anupama Nadella is 50 years old.
{{~/system}}

{{#user~}}
### Input:
Question:{{question}}
Start by checking your database to see if it contains relevant information to the question.
{{~/user}}

{{#assistant~}}
Question:{{question}}
{{gen 'thought' stop='\\n'}}
{{select 'answer' logprobs='logprobs' options=valid_answers}}:
{{~/assistant}}
 """

prompt_mid_template = """
{{#assistant~}}
{{history}}{{select 'tool_name' logprobs='logprobs' options=valid_tools}}
{{gen 'actInput' stop='\\n'}}
{{do_tool tool_name actInput}}
{{~/assistant}}

{{#assistant~}}
Observation:{{gen 'thought' stop='\\n'}}
{{~/assistant}}

{{#assistant~}}
{{select 'answer' logprobs='logprobs' options=valid_answers}}: 
{{~/assistant}}
"""

prompt_final_template = """
{{#assistant~}}
{{history}}{{select 'tool_name' options=valid_tools}}
{{gen 'actInput' stop='\\n'}}
{{do_tool tool_name actInput}}
{{~/assistant}}

{{#assistant~}}
{{gen 'thought' stop='\\n'}}
{{~/assistant}}

{{#assistant~}}
{{select 'answer' logprobs='logprobs' options=valid_answers}}: {{gen 'fn' stop='\\n'}}
{{~/assistant}}
"""

In [None]:
question="What is the code of the wifi?"

### Define tools, feel free to add more tools

In [None]:
def searchGoogle(t):    
    return checkQuestion(question, retriever, llm)

def qaChroma(key_word):
        print(Fore.GREEN + Style.BRIGHT + qa_prompt + Style.RESET_ALL)
        qa = llm(qa_prompt)
        # qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False)

        print(qa)
        #res = qa.run(key_word)
        #print(res)
        #return res
        return qa

dict_tools = {
    'Check Question': searchGoogle,
    'Chroma Search': qaChroma
}

### Our agent with Guidance

In [None]:
class CustomAgentGuidance:
    def __init__(self, guidance, tools, num_iter=3):
        self.guidance = guidance
        self.tools = tools
        self.num_iter = num_iter

    def do_tool(self, tool_name, actInput):
        return self.tools[tool_name](actInput)
    
    def __call__(self, query):
        prompt_start = self.guidance(prompt_start_template)
        result_start = prompt_start(question=query, valid_answers=valid_answers)

        result_mid = result_start
        
        for _ in range(self.num_iter - 1):
            if result_mid['answer'] == 'Final Answer':
                break
            history = result_mid.__str__()
            prompt_mid = self.guidance(prompt_mid_template)
            result_mid = prompt_mid(history=history, do_tool=self.do_tool, valid_answers=valid_answers, valid_tools=valid_tools)
        
        if result_mid['answer'] != 'Final Answer':
            history = result_mid.__str__()
            prompt_mid = self.guidance(prompt_final_template)
            result_final = prompt_mid(history=history, do_tool=self.do_tool, valid_answers=['Final Answer'], valid_tools=valid_tools)
        else:
            history = result_mid.__str__()
            prompt_mid = self.guidance(history + "{{gen 'fn' stop='\\n'}}")
            result_final = prompt_mid()
        return result_final['fn']

### Okay, let's try it

In [None]:
custom_agent = CustomAgentGuidance(guidance, dict_tools)

final_answer = custom_agent(question)

In [None]:
guidance.llm.cache.clear()
print('Done.')

In [None]:
custom_agent = CustomAgentGuidance(guidance, dict_tools)

final_answer = custom_agent(question)