In [1]:
import time

start_time = time.time()

In [None]:
from pinecone import Pinecone, ServerlessSpec
pinecone_api_key = "Your Key"

pc = Pinecone(api_key=pinecone_api_key)

In [3]:
index_name = "mejorado-test-index" 

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "Your Key"

from langchain_openai import OpenAIEmbeddings, ChatOpenAI

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

llm = ChatOpenAI(model="gpt-4o-mini")

In [5]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [6]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Add documents to the Pinecone.
vector_store.add_documents(documents=splits)

# Retrieve and generate using the relevant snippets of the blog.
retriever = vector_store.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

USER_AGENT environment variable not set, consider setting it to identify your requests.


'Task Decomposition is the process of breaking down a complex task into smaller, more manageable sub-tasks. This approach helps in organizing and prioritizing work, making it easier to tackle each component effectively. It is commonly used in project management and problem-solving to enhance efficiency and clarity.'

In [7]:
# Eliminar todos los vectores del índice Pinecone
vector_store.delete(delete_all=True)
print("Todos los vectores han sido eliminados del Vector Store.")

Todos los vectores han sido eliminados del Vector Store.


In [8]:
end_time = time.time()
print(f"Tiempo de ejecución del Notebook 2: {end_time - start_time:.2f} segundos")

Tiempo de ejecución del Notebook 2: 18.10 segundos
