In [None]:
!pip install -qU langchain langchain-huggingface langchain_community langchainhub langchain-chroma bs4
from langchain.agents import AgentExecutor, create_react_agent
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.tools import tool
from transformers import pipeline
from langchain import hub
import getpass
import os

In [None]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_api_key"

In [None]:
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, temperature=0.5, verbose=True
)

# SQL + LangChain

Escolher database a ser utilizada:

In [None]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///chinook.db")
db.get_usable_table_names()

Criar chain que transforma pergunta em uma query SQL:

In [None]:
from langchain.chains import create_sql_query_chain

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many employees are there"})
response

'SELECT COUNT(*) FROM employees;'

Consultar a database com a query gerada:

In [None]:
db.run(response)

'[(8,)]'

Prompt utilizado pela chain:

In [None]:
chain.get_prompts()[0].pretty_print()

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result

Criar tool para consultar a base de dados a partir da query gerada:

In [None]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

execute_query = QuerySQLDataBaseTool(db=db)
write_query = create_sql_query_chain(llm, db)
chain = write_query | execute_query
chain.invoke({"question": "How many employees are there"})

'[(8,)]'

Criar chain única para converter a pergunta para SQL, executar a query e devolver resposta em linguagem natural:

In [None]:
answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

In [None]:
answer = answer_prompt | llm | StrOutputParser()
chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer
)
chain.invoke({"question": "How many employees are there"})

'8 employees.'

# GraphCypherQAChain

**Instalações e importações necessárias**

In [None]:
!pip -q install neo4j

In [None]:
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate

Conexão com o grafo e criação do prompt:

In [None]:
from langchain_community.graphs import Neo4jGraph
graph = Neo4jGraph(
    url="bolt://44.193.17.131:7687",
    username="neo4j",
    password="recruit-envelope-runoffs"
)

In [None]:
print(graph.schema)

Criação de chain para consultar grafo:

In [None]:
CYPHER_GENERATION_TEMPLATE = """
You are an expert Neo4j Developer translating user questions into Cypher to answer questions about movies and provide recommendations.
Convert the user's question based on the schema.

Schema: {schema}
Question: {question}
"""

In [None]:
cypher_generation_prompt = PromptTemplate(
    template=CYPHER_GENERATION_TEMPLATE,
    input_variables=["schema", "question"],
)

In [None]:
cypher_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    cypher_prompt=cypher_generation_prompt,
    verbose=True
)

**Exemplos de funcionamento**

In [None]:
cypher_chain.invoke({"query": "Who voices the character 'Woody' in Toy Story?"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)
WHERE m.title = 'Toy Story' AND a.name = 'Tom Hanks'
RETURN a.name AS actorName, m.title AS movieTitle
[0m
Full Context:
[32;1m[1;3m[{'actorName': 'Tom Hanks', 'movieTitle': 'Toy Story'}][0m

[1m> Finished chain.[0m


{'query': "Who voices the character 'Woody' in Toy Story?",
 'result': " Tom Hanks voices the character 'Woody' in Toy Story."}

In [None]:
cypher_chain.invoke({"query": "How many movies is 'Tom Hanks' in?"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (p:Person)-[:ACTED_IN]->(m:Movie)
WHERE p.name = 'Tom Hanks'
RETURN count(*) as numMovies
[0m
Full Context:
[32;1m[1;3m[{'numMovies': 38}][0m

[1m> Finished chain.[0m


{'query': "How many movies is 'Tom Hanks' in?",
 'result': ' Tom Hanks is in 38 movies.'}

# RAG

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_chroma import Chroma

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
   web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
   bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

In [None]:
print(docs[0].page_content[:500])



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
   chunk_size=1000, chunk_overlap=200, add_start_index=True
)
splits = text_splitter.split_documents(docs)

In [None]:
splits[0]

Document(page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n\

In [None]:
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
prompt = hub.pull("rlm/rag-prompt")
prompt.pretty_print()


You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: [33;1m[1;3m{question}[0m 
Context: [33;1m[1;3m{context}[0m 
Answer:


In [None]:
def format_docs(docs):
   return "\n\n".join(doc.page_content for doc in docs)

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain.invoke("What is Task Decomposition?")

'\n\nTask decomposition is a process where a complex problem is broken down into smaller, manageable tasks. This can be done through language models with simple prompting, task-specific instructions, or human inputs. For example, a user might ask a language model to "Write a story outline for a novel." The language model would then decompose this task into smaller steps, such as "Create a list of main characters," "Determine the setting," etc. This tree structure allows for efficient and effective problem-solving.\n\nHowever, it\'s important to note that task decomposition can be challenging for language models, especially over long-term planning. They may struggle to adjust plans when faced with unexpected errors and may not be as robust as humans in learning from trial and error. Despite these challenges, task decomposition is a crucial aspect of problem-solving and is essential for the efficient execution of complex tasks.\n\nReference(s):\n- Yao, S., Li, Y., & Wang, Z. (2023). Tree

# Hybrid Search

In [None]:
!pip install --upgrade --quiet  rank_bm25 > /dev/null langchain-ai21 faiss-cpu

In [None]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import FAISS
import os

In [None]:
doc_list = [
    "To know the direction, you have to look right",
    "This is correct",
    "You are right",
    "Right after the meeting, we can have lunch",
     "Turn left at the next intersection"
]

In [None]:
bm25_retriever = BM25Retriever.from_texts(doc_list)
bm25_retriever.k = 2

In [None]:
bm25_retriever.invoke("right")

[Document(page_content='You are right'),
 Document(page_content='To know the direction, you have to look right')]

In [None]:
faiss_vectorstore = FAISS.from_texts(doc_list, embeddings)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})

In [None]:
faiss_retriever.invoke("right")

[Document(page_content='This is correct'),
 Document(page_content='You are right')]

In [None]:
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])

In [None]:
docs = ensemble_retriever.invoke("right")
docs

[Document(page_content='You are right'),
 Document(page_content='This is correct'),
 Document(page_content='To know the direction, you have to look right')]