In [67]:
import os
import time

from dotenv import load_dotenv

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_openai import ChatOpenAI

from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.output_parsers import StrOutputParser

from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import load_tools
from langchain.tools import AIPluginTool
from langchain.agents import AgentExecutor, create_react_agent

from langgraph.graph import END, StateGraph

from pprint import pprint

In [3]:
embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 10843.60it/s]


In [4]:
llm = ChatOpenAI(temperature=0)

In [5]:
urls = [
    "https://docs.kamino.finance/",
    "https://docs.kamino.finance/kamino-lend-litepaper",
    "https://docs.kamino.finance/products/overview",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
print(f"len of documents :{len(docs_list)}")

len of documents :3


In [6]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)
print(f"length of document chunks generated :{len(doc_splits)}")

length of document chunks generated :8


In [41]:
doc_splits

[Document(page_content="Kamino Portal | Hub | Kamino DocsKamino DocsHubHubRiskAutomated LiquiditySearchCtrl\u2006+\u2006KKamino PortalKamino Lend LitepaperProductsOverviewMultiplyHow ToOpen a PositionManage a PositionManage RiskHow It WorksRisksBorrow/LendSupplying AssetskToken CollateralBorrowing AssetsPosition Risk & LiquidationsPosition RiskBorrow FactorsFeesLong/ShortLiquidityKamino PointsOverviewRates & BoostsSeasonsSeason 1Season 2PageReferralsAutomated LiquidityLiquidity VaultskTokensLiquidity AnalyticsLiquidity Vault RisksSecurity & RiskRisk FrameworkOracle SecurityLive Risk DashboardAuto-deleverageInterest Rate ModelAsset RiskAuditsBuild on KaminoSDK & Smart ContractsDeveloper DocumentationBug BountyBrand AssetsKMNOToken InfoStakingPowered by GitBookKamino PortalAutomated products powered by secure DeFi primitivesWhat is Kamino Finance?Kamino Finance was originally created to offer users the easiest possible way of providing liquidity and earning yield on-chain.The protocol's 

In [7]:
vectorstore = Chroma.from_documents(documents=doc_splits,
                                    embedding=embed_model,
                                    collection_name="local-rag")

In [8]:
retriever = vectorstore.as_retriever(search_kwargs={"k":2})

In [43]:
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an expert at routing a 
    user question to a vectorstore or web search. Use the vectorstore for questions on Kamino Portal, 
    Kamino Lend Litepaper, and Kamino Overview. You do not need to be stringent with the keywords 
    in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search' 
    or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and 
    no premable or explaination. Question to route: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question"],
)
start = time.time()
question_router = prompt | llm | JsonOutputParser()
#
question = "llm agent memory"
print(question_router.invoke({"question": question}))
end = time.time()
print(f"The time required to generate response by Router Chain in seconds:{end - start}")

{'datasource': 'web_search'}
The time required to generate response by Router Chain in seconds:1.1396307945251465


In [44]:
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
start = time.time()
rag_chain = prompt | llm | StrOutputParser()

In [45]:
#
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
     <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"],
)
start = time.time()
retrieval_grader = prompt | llm | JsonOutputParser()
question = "kamino"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
end = time.time()
print(f"The time required to generate response by the retrieval grader in seconds:{end - start}")

{'score': 'yes'}
The time required to generate response by the retrieval grader in seconds:0.8524599075317383


In [46]:
# Prompt
prompt = PromptTemplate(
    template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)
start = time.time()
generation = []
hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader_response = hallucination_grader.invoke({"documents": docs, "generation": generation})
end = time.time()
print(f"The time required to generate response by the generation chain in seconds:{end - start}")
print(hallucination_grader_response)

The time required to generate response by the generation chain in seconds:0.9329128265380859
{'score': 'no'}


In [47]:
# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)
start = time.time()
answer_grader = prompt | llm | JsonOutputParser()
answer_grader_response = answer_grader.invoke({"question": question,"generation": generation})
end = time.time()
print(f"The time required to generate response by the answer grader in seconds:{end - start}")
print(answer_grader_response)

The time required to generate response by the answer grader in seconds:0.8401024341583252
{'score': 'no'}


In [64]:
# Setup environment variables
URL = "https://blockchatstatic.blob.core.windows.net/api-configuration"

tools = load_tools(["requests_post"], allow_dangerous_tools=True)

# AIPluginTool only fetches and returns the openapi.yaml linked to in /.well-known/ai-plugin.json
# This may need some more work to avoid blowing up LLM context window
solana_search_tool = AIPluginTool.from_plugin_url(URL + "/.well-known/ai-plugin.json")
tools += [solana_search_tool]

In [65]:
# Load environment variables from .env file
load_dotenv()

# Access the environment variables
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')
web_search_tool = TavilySearchResults(k=3)
tools += [web_search_tool]

In [139]:
template = '''Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {query}
Thought:{agent_scratchpad}'''

prompt = PromptTemplate.from_template(template)

In [140]:
# Setup an agent to answer the question without further human feedback
agent = create_react_agent(
    llm, tools, prompt)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)

In [141]:
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    question : str
    generation : str
    web_search : str
    documents : List[str]

In [150]:
from langchain.schema import Document
def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}
#
def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    
    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}
#
def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]
    
    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score['score']
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}
#
def web_search(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = agent_executor.invoke({"query": question})
    web_results = docs["output"]
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}
#

In [151]:
def route_question(state):
    """
    Route question to web search or RAG.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    print(question)
    source = question_router.invoke({"question": question})  
    print(source)
    print(source['datasource'])
    if source['datasource'] == 'web_search':
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source['datasource'] == 'vectorstore':
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"

In [152]:
def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

In [153]:
def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score['score']

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question,"generation": generation})
        grade = score['score']
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

In [154]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search) # web search
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generatae

In [155]:
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": "websearch",
        "vectorstore": "retrieve",
    },
)

workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

In [156]:
app = workflow.compile()

In [157]:
inputs = {"question": "What is prompt engineering?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])

---ROUTE QUESTION---
What is prompt engineering?
{'datasource': 'web_search'}
web_search
---ROUTE QUESTION TO WEB SEARCH---
---WEB SEARCH---


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use tavily_search_results_json to search for information on prompt engineering.
Action: tavily_search_results_json
Action Input: "prompt engineering"[0m[38;5;200m[1;3m[{'url': 'https://www.coursera.org/articles/what-is-prompt-engineering', 'content': 'Examples of prompt engineering\nHere are a few examples of prompt engineering to give you a better understanding of what it is and how you might engineer a prompt with a text and image model.\n Learners are advised to conduct additional research to ensure that courses and other credentials pursued meet their personal, professional, and financial goals.\n$1 unlocks unlimited opportunities\nCoursera Footer\nPopular AI Content\nPopular Programs\nPopular Skills\nPopular Career Resources\nCoursera\nCommunity\nMore You can do the same

In [158]:
inputs = {"question": "What is Kamino Finance?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])

---ROUTE QUESTION---
What is Kamino Finance?
{'datasource': 'web_search'}
web_search
---ROUTE QUESTION TO WEB SEARCH---
---WEB SEARCH---


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use tavily_search_results_json to search for information about Kamino Finance.
Action: tavily_search_results_json
Action Input: "Kamino Finance"[0m[38;5;200m[1;3m[{'url': 'https://web-kamino.finance/', 'content': 'Kamino Finance is a decentralized finance protocol that allows users to borrow, lend, and leverage their assets with competitive interest rates and flexible terms. Users can also boost their SOL yields, provide liquidity to DEXs, and access automated strategies and analytics with Kamino.'}, {'url': 'https://app.kamino.finance/', 'content': 'Kamino is a Solana DeFi protocol that unifies Lending, Liquidity, and Leverage into a single, secure DeFi product suite. You can use Kamino to borrow and lend crypto assets; leverage your SOL staking yield, provide leveraged liquidit

In [159]:
inputs = {"question": "How many SOL does 8fbqVvpK3Dj7fdP2c8JJhtD7Zy3n9qtwAeGfbkgPu625 have?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])

---ROUTE QUESTION---
How many SOL does 8fbqVvpK3Dj7fdP2c8JJhtD7Zy3n9qtwAeGfbkgPu625 have?
{'datasource': 'vectorstore'}
vectorstore
---ROUTE QUESTION TO RAG---
---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---
'Finished running: grade_documents:'
---WEB SEARCH---


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the Solana Labs API to get information about the SOL balance of a specific address.
Action: solana[0mInvalid Format: Missing 'Action Input:' after 'Action:'[32;1m[1;3mI need to provide an action input after calling the solana tool.
Action: solana
Action Input: [0m[33;1m[1;3mUsage Guide: This extension is for exploring Solana blockchain data, such as inspecting what tokens a wallet has or explaining what happened in a transaction. Use i

In [160]:
inputs = {"question": "What is Unified Liquidity Market in Kamino?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
value["generation"]

---ROUTE QUESTION---
What is Unified Liquidity Market in Kamino?
{'datasource': 'vectorstore'}
vectorstore
---ROUTE QUESTION TO RAG---
---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
'Finished running: grade_documents:'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running: generate:'


'The Unified Liquidity Market in Kamino is a single liquidity market that allows for higher leverage when lending/borrowing within a certain asset grouping. This market design aims to prevent fragmentation of liquidity and increase yields for lenders by avoiding multi-pool designs. The risk engine in K-Lend enables risk isolation even within this unified liquidity market.'

In [161]:
inputs = {"question": "Hello"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
value["generation"]

---ROUTE QUESTION---
Hello
{'datasource': 'vectorstore'}
vectorstore
---ROUTE QUESTION TO RAG---
---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---
'Finished running: grade_documents:'
---WEB SEARCH---


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis is not a specific question, I should ask for more information or context.
Action: tavily_search_results_json
Action Input: "Hello"[0m[38;5;200m[1;3m[{'url': 'https://en.wikipedia.org/wiki/Hello', 'content': 'P.S. first cost of sender & receiver to manufacture is only $7.00.[12]\nBy 1889, central telephone exchange operators were known as \'hello-girls\' because of the association between the greeting and the telephone.[14][15]\nA 1918 fiction novel uses the spelling "Halloa" in the context of telephone conve

'Hello may have originated from an older spelling variant "hullo" and is commonly used as a greeting or expression of surprise.'