In [40]:
from pinecone import Pinecone, ServerlessSpec
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
import boto3
import re


In [20]:
PINECONE_API_KEY = ''
OPENAI_API_KEY = ''

In [23]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=OPENAI_API_KEY)
pinecone_api_key = PINECONE_API_KEY
pc = Pinecone(api_key=pinecone_api_key)

In [25]:
index_name = 'langgraph-docs'
index = pc.Index(index_name)

In [27]:
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [30]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 10})
context = retriever.invoke("Give me code to build retreiver tool using Langgraph")

In [33]:
context

[Document(id='baec1f50-ee29-4e51-ad49-80a238d1009b', metadata={}, page_content='Agentic RAG\nSkip to content\nAgentic RAG\nInitializing search\nGitHub\nHome\nTutorials\nHow-to Guides\nConceptual Guides\nReference\nGitHub\nHome\nTutorials\nTutorials\nQuick Start\nQuick Start\nQuick Start\nðŸš€ LangGraph Quick Start\nQuick Start: Launch Local LangGraph Server\nLangGraph Cloud Quick Start\nChatbots\nChatbots\nChatbots\nBuild a Customer Support Bot\nPrompt Generation from User Requirements\nCode generation with RAG and self-correction\nRAG\nRAG\nRAG\nAdaptive RAG\nLanggraph adaptive rag local\nAgentic RAG\nAgentic RAG\nTable of contents\nSetup\nRetriever\nAgent State\nNodes and Edges\nGraph\nCorrective RAG (CRAG)\nCorrective RAG (CRAG) using local LLMs\nSelf-RAG\nSelf-RAG using local LLMs\nAn agent for interacting with a SQL database\nAgent Architectures\nAgent Architectures\nAgent Architectures\nMulti-Agent Systems\nPlanning Agents\nReflection & Critique\nEvaluation & Analysis\nEvaluation

In [44]:
def replace_s3_locations_with_content(docs):
 
    s3_client =  boto3.client('s3', aws_access_key_id='',
                      aws_secret_access_key='')
    

    for doc in docs:
        s3_locations = re.findall(r's3://[^\s\n]+', doc.page_content)
        for s3_location in s3_locations:
            try:
                bucket, key = s3_location.replace('s3://', '').split('/', 1)
                response = s3_client.get_object(Bucket=bucket, Key=key)
                file_content = response['Body'].read().decode('utf-8')
                doc.page_content = doc.page_content.replace(s3_location, file_content)
            
            except Exception as e:
                print(f"Error processing {s3_location}: {e}")
    
    return docs

In [46]:
updated_context = replace_s3_locations_with_content(context)

In [49]:
def remove_code_file_placeholders(docs):
    """
    Remove code file placeholders from documents
    
    Args:
        docs (list): List of documents
    
    Returns:
        list: Documents with code file placeholders removed
    """
    for doc in docs:
        doc.page_content = re.sub(r'\[CODE FILE: [^\]]+\]', '', doc.page_content)
    return docs

In [51]:
updated_context_1 = remove_code_file_placeholders(updated_context)

In [53]:
updated_context_1

 Document(id='09628c95-1c16-4853-af63-7e071c771d38', metadata={}, page_content='Adaptive RAG\nSkip to content\nAdaptive RAG\nInitializing search\nGitHub\nHome\nTutorials\nHow-to Guides\nConceptual Guides\nReference\nGitHub\nHome\nTutorials\nTutorials\nQuick Start\nQuick Start\nQuick Start\nðŸš€ LangGraph Quick Start\nQuick Start: Launch Local LangGraph Server\nLangGraph Cloud Quick Start\nChatbots\nChatbots\nChatbots\nBuild a Customer Support Bot\nPrompt Generation from User Requirements\nCode generation with RAG and self-correction\nRAG\nRAG\nRAG\nAdaptive RAG\nAdaptive RAG\nTable of contents\nSetup\nCreate Index\nLLMs\nWeb Search Tool\nConstruct the Graph\nDefine Graph State\nDefine Graph Flow\nCompile Graph\nUse Graph\nLanggraph adaptive rag local\nAgentic RAG\nCorrective RAG (CRAG)\nCorrective RAG (CRAG) using local LLMs\nSelf-RAG\nSelf-RAG using local LLMs\nAn agent for interacting with a SQL database\nAgent Architectures\nAgent Architectures\nAgent Architectures\nMulti-Agent Syst

In [56]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

### OpenAI

# Grader prompt
code_gen_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a coding assistant with expertise in LCEL, Langgraph language. \n 
    Here is a subset set of Langgraph documentation:  \n ------- \n  {context} \n ------- \n Answer the user 
    question based only on the above provided documentation. Ensure any code you provide can be executed \n 
    with all required imports and variables defined. Structure your answer with a description of the code solution. \n
    Then list the imports. And finally list the functioning code block. If you are unable to answer from the context give I don't know Here is the user question:""",
        ),
        ("placeholder", "{messages}"),
    ]
)


# Data model
class code(BaseModel):
    """Schema for code solutions to questions about LCEL."""

    prefix: str = Field(description="Description of the problem and approach")
    imports: str = Field(description="Code block import statements")
    code: str = Field(description="Code block not including import statements")


expt_llm = "gpt-4o"
llm = ChatOpenAI(temperature=0, model=expt_llm, api_key=OPENAI_API_KEY)
code_gen_chain_oai = code_gen_prompt | llm.with_structured_output(code)
question = "Give me code to build retreiver tool using Langgraph"
solution = code_gen_chain_oai.invoke(
    {"context": updated_context_1, "messages": [("user", question)]}
)
solution

code(prefix='To build a retriever tool using Langgraph, you need to first set up a vector store with indexed documents and then create a retriever tool that can search through these documents. The following code demonstrates how to achieve this using Langgraph and related libraries. It involves loading documents from URLs, splitting them into chunks, storing them in a vector database, and then creating a retriever tool to query these documents.', imports='from langchain_community.document_loaders import WebBaseLoader\nfrom langchain_community.vectorstores import Chroma\nfrom langchain_openai import OpenAIEmbeddings\nfrom langchain_text_splitters import RecursiveCharacterTextSplitter\nfrom langchain.tools.retriever import create_retriever_tool', code='# Define URLs to load documents from\nurls = [\n    "https://lilianweng.github.io/posts/2023-06-23-agent/",\n    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",\n    "https://lilianweng.github.io/posts/2023-10-25-adv-a