# Install the necessary libraries

In [1]:
!pip install -qU langchain langchain_openai langgraph arxiv duckduckgo-search -q
!pip install -qU faiss-cpu pymupdf pypdf -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m817.7/817.7 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.0/64.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m299.3/299.3 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.4/116.4 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup

In [16]:
from google.colab import userdata
from uuid import uuid4
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import ArxivLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

In [4]:
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"AIE1 - LangGraph - {uuid4().hex[0:8]}"
os.environ["LANGCHAIN_API_KEY"] =  userdata.get('LANGCHAIN_API_KEY')

# Instantiate a Simple Retrieval Chain using LCEL

In [17]:
loader = PyPDFLoader("course-catalog.pdf")
pages = loader.load_and_split()

In [18]:
# Split the dpocument into smaller chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=350, chunk_overlap=50
)

chunked_documents = text_splitter.split_documents(pages)
#
# Instantiate the Embedding Model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small",openai_api_key=os.environ['OPENAI_API_KEY'])
# Create Index- Load document chunks into the vectorstore
faiss_vectorstore = FAISS.from_documents(
    documents=chunked_documents,
    embedding=embeddings,
)
# Create a retriver
retriever = faiss_vectorstore.as_retriever()

# Generate RAG prompt

In [19]:
from langchain_core.prompts import ChatPromptTemplate

RAG_PROMPT = """\
Use the following context to answer the user's query. If you cannot answer the question, please respond with 'I don't know'.

Question:
{question}

Context:
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

# Instantiate the LLM

In [20]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")

In [21]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retrieval_augmented_generation_chain = (
       {"context": itemgetter("question")
    | retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
)

In [22]:
retrieval_augmented_generation_chain

{
  context: RunnableLambda(itemgetter('question'))
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7c3f08612e30>),
  question: RunnableLambda(itemgetter('question'))
}
| RunnableAssign(mapper={
    context: RunnableLambda(itemgetter('context'))
  })
| {
    response: ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="Use the following context to answer the user's query. If you cannot answer the question, please respond with 'I don't know'.\n\nQuestion:\n{question}\n\nContext:\n{context}\n"))])
              | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7c3f0877b6a0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7c3f08772230>, openai_api_key=SecretStr('**********'), openai_proxy=''),
    context: RunnableLambda(ite

In [23]:
await retrieval_augmented_generation_chain.ainvoke({"question" : "How can i get details about GCP databricks cloud integrations "})

{'response': AIMessage(content="To get details about GCP Databricks cloud integrations, you can enroll in the course provided in the document. The course covers topics like deploying workspaces, custom-managed keys, encryption levels, creating GCP buckets, setting up Google Pub/Sub, and more. It also requires beginner-level knowledge of GCP and access to a GCP project. \n\nI don't know where else you could find this information.", response_metadata={'token_usage': {'completion_tokens': 85, 'prompt_tokens': 1172, 'total_tokens': 1257}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'stop', 'logprobs': None}, id='run-50e6db25-8366-4150-8538-8d7849dda666-0'),
 'context': [Document(page_content='●\nDeploy\nworkspaces\ninto\nyour\nown\nmanaged\nVPCs.\n●\nCreate\nyour\nown\ncustomer-managed\nkeys.\n●\nApply\ncustomer-managed\nkeys\nto\nachieve\ndifferent\nlevels\nof\nencryption\nin\n●\nyour\nDatabricks\nworkspaces.\nGCP\nDatabricks\nCloud\nIntegrations

# Creating our Tool Belt

As is usually the case, we’ll want to equip our agent with a toolbelt to help answer questions and add external knowledge.

There’s a load of tools in the LangChain Community Repo but we’ll stick to a couple just so we can observe the cyclic nature of LangGraph in action!

In [24]:
from langchain_community.tools.ddg_search import DuckDuckGoSearchRun
from langgraph.prebuilt import ToolExecutor
tool_belt = [
    DuckDuckGoSearchRun()
]

tool_executor = ToolExecutor(tool_belt)

In [25]:
from langchain_core.utils.function_calling import convert_to_openai_function

model = ChatOpenAI(temperature=0)

functions = [convert_to_openai_function(t) for t in tool_belt]

model = model.bind_functions(functions)

# Leverage LangGraph

LangGraph leverages a StatefulGraph which uses an AgentState object to pass information between the various nodes of the graph.

There are more options than what we’ll see below — but this AgentState object is one that is stored in a TypedDict with the key messages and the value is a Sequence of BaseMessages that will be appended to whenever the state changes.

In [26]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

class AgentState(TypedDict):
  messages: Annotated[Sequence[BaseMessage], operator.add]

In [27]:
from langgraph.prebuilt import ToolInvocation
import json
from langchain_core.messages import FunctionMessage

def call_model(state):
  messages = state["messages"]
  response = model.invoke(messages)
  return {"messages" : [response]}

def call_tool(state):
  last_message = state["messages"][-1]

  action = ToolInvocation(
      tool=last_message.additional_kwargs["function_call"]["name"],
      tool_input=json.loads(
          last_message.additional_kwargs["function_call"]["arguments"]
      )
  )

  response = tool_executor.invoke(action)

  function_message = FunctionMessage(content=str(response), name=action.tool)

  return {"messages" : [function_message]}

# Workflow

In [28]:
from langgraph.graph import StateGraph, END

workflow = StateGraph(AgentState)

workflow.add_node("agent", call_model)
workflow.add_node("action", call_tool)
workflow.nodes

{'agent': RunnableLambda(call_model), 'action': RunnableLambda(call_tool)}

In [29]:
workflow.set_entry_point("agent")

# Conditional edge for routing

In [30]:
def should_continue(state):
  last_message = state["messages"][-1]

  if "function_call" not in last_message.additional_kwargs:
    return "end"

  return "continue"

workflow.add_conditional_edges(
    "agent",
    should_continue,
    {
        "continue" : "action",
        "end" : END
    }
)

Finally connect the conditional edge to the agent node and action node

In [32]:
workflow.add_edge("action", "agent")

In [33]:
app = workflow.compile()
#
app

CompiledStateGraph(nodes={'__start__': PregelNode(config={'tags': ['langsmith:hidden']}, channels=['__start__'], triggers=['__start__'], writers=[ChannelWrite<messages>(recurse=True, writes=[ChannelWriteEntry(channel='messages', value=<object object at 0x7c3f0c2cff00>, skip_none=False, mapper=_get_state_key(recurse=False))]), ChannelWrite<start:agent>(recurse=True, writes=[ChannelWriteEntry(channel='start:agent', value='__start__', skip_none=False, mapper=None)])]), 'agent': PregelNode(config={'tags': []}, channels={'messages': 'messages'}, triggers=['action', 'start:agent'], mapper=functools.partial(<function _coerce_state at 0x7c3f0887fd90>, <class '__main__.AgentState'>), writers=[ChannelWrite<agent,messages>(recurse=True, writes=[ChannelWriteEntry(channel='agent', value='agent', skip_none=False, mapper=None), ChannelWriteEntry(channel='messages', value=<object object at 0x7c3f0c2cff00>, skip_none=False, mapper=_get_state_key(recurse=False))]), _route(recurse=True, _is_channel_write

In [34]:
from langchain_core.messages import HumanMessage

inputs = {"messages" : [HumanMessage(content="What is pre requisite for Generative AI fundamendals")]}

response = app.invoke(inputs)
print(response)

{'messages': [HumanMessage(content='What is pre requisite for Generative AI fundamendals'), AIMessage(content='The prerequisites for learning Generative AI fundamentals typically include a strong understanding of the following topics:\n\n1. Machine Learning: Knowledge of machine learning concepts and algorithms is essential for understanding how generative models work.\n\n2. Deep Learning: Familiarity with deep learning frameworks like TensorFlow or PyTorch, as well as neural networks, is important for implementing generative models.\n\n3. Probability and Statistics: Understanding probability theory and statistical concepts is crucial for working with generative models and evaluating their performance.\n\n4. Linear Algebra: Proficiency in linear algebra is necessary for understanding the mathematical foundations of generative models.\n\n5. Python Programming: Proficiency in Python programming is essential for implementing generative models using popular libraries like TensorFlow, PyTor

In [35]:
response['messages'][-1].content

'The prerequisites for learning Generative AI fundamentals typically include a strong understanding of the following topics:\n\n1. Machine Learning: Knowledge of machine learning concepts and algorithms is essential for understanding how generative models work.\n\n2. Deep Learning: Familiarity with deep learning frameworks like TensorFlow or PyTorch, as well as neural networks, is important for implementing generative models.\n\n3. Probability and Statistics: Understanding probability theory and statistical concepts is crucial for working with generative models and evaluating their performance.\n\n4. Linear Algebra: Proficiency in linear algebra is necessary for understanding the mathematical foundations of generative models.\n\n5. Python Programming: Proficiency in Python programming is essential for implementing generative models using popular libraries like TensorFlow, PyTorch, or Keras.\n\n6. Data Preprocessing: Knowledge of data preprocessing techniques is important for preparing 