In [1]:
# As variáveis globais permitem interagir com sistemas, externos. Estou usando as seguintes:

# O langsmith permite fazer o backtracking das execuções

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings


USER_AGENT environment variable not set, consider setting it to identify your requests.


# 1. Inicio - Construção Geral 

In [3]:
# #### INDEXING ####

# # Load Documents
# loader = WebBaseLoader( # Método do LangChain para coletar dados de texto da internet
#     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), # Pagina WEB de onde foi retirado o texto
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#     ),
# )
# docs = loader.load() #Faz o load da página WEB selecionada

# # Split
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Separa o texto encontrado em chuncks de 1000 caracter com interacalamento de 200 tokens
# splits = text_splitter.split_documents(docs) # Realiza a tokenização

# # Embed
# vectorstore = Chroma.from_documents(documents=splits,  
#                                     embedding=OpenAIEmbeddings()) # Com os chuncks separados, faz o embedding dos vetores

# retriever = vectorstore.as_retriever() 

# #### RETRIEVAL and GENERATION ####

# # Prompt
# prompt = hub.pull("rlm/rag-prompt")

# # LLM
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) #Chama a instancia do CHAT GPT

# # Post-processing
# def format_docs(docs):
#     return "\n\n".join(doc.page_content for doc in docs)

# # Chain
# rag_chain = ( #Maneira de construir a chamada da RAG
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# # Question
# rag_chain.invoke("What is Task Decomposition?") # Nova chamada com nova query

In [4]:
# Construindo um exemplo de pergunta e de documento
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [5]:
import tiktoken # Biblioteca que ajuda na tokenização dos textos inputados

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name) 
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base") # O CL100K é uma base padrão de tokenização de textos, bem conhecida

8

A chamada do chat aqui, para produção dos embeddings tem um custo associado.

In [6]:
from langchain_openai import OpenAIEmbeddings # Importando o modelo de Embedding que será usado

emdb = OpenAIEmbeddings()
query_result = emdb.embed_query(question) # Fazendo a construção do embedding da pergunta
document_result = emdb.embed_query(document) # Fazendo a construção do embedding do documento

len(query_result) # Tamanho do vetor de embedding produzido (Ele vai ser padrão, idependente do número de tokens. Obs: Ele já faz a tokenização por trás) O embedding tem representação semantica.

1536

Enfim, com os vetores de ambos (Pergunta e documento) produzidos podemos calcular sua similaridade via Cosine Similarity

In [7]:
import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)

Cosine Similarity: 0.8807044730847652


# 2. Indexing

Podemos indexar outros documentos também. Vamos carregar alguns documentos pre indexados da lib do LangChain.

In [8]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Carrega a página que desejamos usar dentro do método
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()


In [10]:
## Split dos documentos

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 300, # Tamanho de tokens baseados na separação base que o tiktoken faz
    chunk_overlap = 50) # Tamanho do overlap de textos na hora da construção dos chuncks de textos

# Faz os splits

splits = text_splitter.split_documents(blog_docs)

Constroe uma pre database de informações de dados. Isso será utilizado depois na hora do embedding.

In [11]:
splits[:5]

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refi

Por fim, fazemos a construção dos embedding dos documentos. Utilizar o FAISS, pois o Chroma estoura a memória da minha maquina. 

No FAISS é preciso salvar em disco os embeddings para que possa ser posteriormente utilizado como um retriver.

Salvo local, depois load e utilizo o método as_retriver()

In [12]:
# Aqui vamos vetorizar os documentos e realmente construir os indexes, que nada mais são que os vetores embeddados

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(documents = splits, 
                                    embedding=OpenAIEmbeddings()) #Chamamos o modelo de embedding que queremos aplicar

vectorstore.save_local("Vecto_Database/")

In [13]:
vectorstore = FAISS.load_local(folder_path="Vecto_Database/", embeddings = OpenAIEmbeddings(), allow_dangerous_deserialization= True)

In [14]:
retriever = vectorstore.as_retriever()

# 3. Retrieval

In [15]:
retriver = vectorstore.as_retriever(search_kwargs = {'k': 5})

In [16]:
docs = retriver.get_relevant_documents('What is this topic about')

  docs = retriver.get_relevant_documents('What is this topic about')


Retorna uma lista com os vetores mais próximos a pergunta realizada. Já faz o embedding da pergunta e compara com os vizinhos mais próximos.

Trás os documentos mais próximos. Posso olhar os resultados dentro do Lang Smith

https://smith.langchain.com/o/c52de26c-1ebb-5bec-8f64-fca7b79f6e79/projects/p/2586179e-d4d5-4222-8739-8ee94a2f3022?timeModel=%7B%22duration%22%3A%227d%22%7D&peek=85624dc5-9143-444f-bdc6-704ab1183b04

In [17]:
docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='}\n]\nChallenges#\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='},\n  {\n    "role": "user",\n    "content": "{{There are 10 levels in total. The main character is a plumber named Mario, who can walk and jump. It is a classical platform game just like Super Mario. The main character moves from left to right, trying to get to the destination, where there are many obstacles and attacks from enemies in the process.}}\\n\\nIs anything else unclear? If yes, only answer in the form:\\n{remaining unclear areas} remaining questions.\\n{Next question}\\nIf everything is sufficiently clear, only answer \\"Nothing more to clarify.\\"."\n  },\n  {\n    "role": "assistant",\n    "content": "Remaining unclear areas: 2 remaining

In [18]:
len(docs)

5

# 4. Generation

In [21]:
from langchain.prompts import ChatPromptTemplate # Nos ajuda a construir um tamplate padrão de chamada da nossa RAG


# Esse é o prompt padrão que vamos usar
tamplate = ''' Anwser the question based only on the following context:

{context}

Question: {question}'''

prompt = ChatPromptTemplate.from_template(tamplate) # Recebe uma docstring com os parâmetros padrõres recebidos
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template=' Anwser the question based only on the following context:\n\n{context}\n\nQuestion: {question}'))])

In [22]:
# Precisamos também definir uma LLM

llm  = ChatOpenAI(model_name = 'gpt-3.5-turbo', temperature = 0) #Permite definir a criatividade do modelo, também permite escolher o modelo que quero usar

Construção de uma CHAIN, encadeamento de informações


Utiliza-se o caracter | para unificar o prompt + llm

In [26]:
# Criando a CHAIN - Expression language LLC language from LangChain

chain = prompt | llm

In [28]:
# Rodando a cadeia/chain criada

chain.invoke({"context":docs, "question": "What is the webpage about?"}) # Para chamar um modelo de llm normalmente utilizamos a função invoke. Passamos dentro os parametros que construimos dentro do from tamplate
# Recebe um dicionário como parametro da invoke!!

AIMessage(content='The webpage is about building LLM-centered agents and discussing the challenges and limitations associated with them.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 1020, 'total_tokens': 1039}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-fdfea9ae-1814-429a-9643-462546f7e442-0', usage_metadata={'input_tokens': 1020, 'output_tokens': 19, 'total_tokens': 1039})

In [None]:
# Tudo que sobe aqui também vai para o LangSmith

In [29]:
# Dentro do modulo hub existem diversos prompts padrões que podem ser utlizados para acelerar o desenvolvimento da aplicação

from langchain import hub

prompt_hub_rag = hub.pull('rlm/rag-prompt') # Importa esse modelo padrão de prompt

  prompt = loads(json.dumps(prompt_object.manifest))


In [30]:
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [31]:
# Posso então combinar esse modelo com outros métodos disponíveis dentro do LangChain para fazer a concatenação dessas informações.


from langchain_core.output_parsers import StrOutputParser # Parseia a string de saida da LLM em uma string
from langchain_core.runnables import RunnablePassthrough 

rag_chain = (
                {'context': retriver, 'question': RunnablePassthrough()} #Retriver é o "banco de dados vetorial", esta buscando os 5 casos mais prox
                | prompt
                | llm
                | StrOutputParser()
)

rag_chain.invoke('What is this webpage about?') # Utilizar o RunnablePassthrough permite passar a pergunta diretamente dentro da chamada da rag_Chain

# Utilizar essa combinação evita que nos tenhamos que fazer a busca de maneira manual, faz a busca automatica dentro do {'context': retriver, 'question': RunnablePassthrough()}

'The webpage is about building LLM-centered agents and providing instructions for writing code to implement the architecture.'

# 5.1 Query translation - Multi-Query

Reescrever as perguntas de maneira a tentar capturar a melhor resposta possivel

In [None]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [None]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

In [None]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

# 5.1 RAG FUSION

Unificar as respostas das perguntas e rankea-las

In [1]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion 
    | ChatOpenAI(temperature=0)
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [None]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

In [None]:
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})