In [None]:
#install required dependencies
!pip install -qU langgraph langchain langchain_openai chromadb beautifulsoup4
!pip install -qU fastapi uvicorn python-dotenv pydantic

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')


from getpass import getpass
 if "OPENAI_API_KEY" not in os.environ:
     os.environ["OPENAI_API_KEY"] = getpass("enter ur openAI API key: ")

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings


urls = [
    "https://blog.hubspot.com/marketing/what-is-content-marketing",
    "https://www.semrush.com/blog/seo-copywriting/",
    "https://neilpatel.com/blog/how-to-create-a-successful-social-media-marketing-campaign/",
]


docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]


text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=100
)
doc_splits = text_splitter.split_documents(docs_list)

#creating own vector store
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="marketing-blogs",
    embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever()

print("vector store created successfully")

In [None]:
#### THe RAG Graph

from typing import List, TypedDict
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_openai import ChatOpenAI

# 1. Define the state for our graph
# This state will be passed between nodes
class GraphState(TypedDict):
    question: str
    generation: str
    documents: List[str]


# 2. Define the Nodes for our graph
def retrieve_docs(state):
    """
    Retrieves documents from the vector store.
    """
    print("---RETRIEVING DOCUMENTS---")
    question = state["question"]
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}

def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.
    """
    print("---CHECKING DOCUMENT RELEVANCE---")
    question = state["question"]
    documents = state["documents"]

    # LLM with function calling to grade relevance
    llm = ChatOpenAI(model="gpt-4o", temperature=0)
    
    prompt = PromptTemplate(
        template="""You are a grader assessing relevance of a retrieved document to a user question. \n
        Here is the retrieved document: \n\n {document} \n\n
        Here is the user question: {question} \n
        If the document contains keywords related to the user question, grade it as relevant.
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
        Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.""",
        input_variables=["question", "document"],
    )

    retrieval_grader = prompt | llm | JsonOutputParser()
    
    filtered_docs = []
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score['score']
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
    
    return {"documents": filtered_docs, "question": question}


def generate(state):
    """
    Generates an answer using the retrieved documents.
    """
    print("---GENERATING ANSWER---")
    question = state["question"]
    documents = state["documents"]

    prompt = PromptTemplate(
        template="""You are a marketing assistant AI. Answer the user's question based on the context below:
        
        CONTEXT:
        {context}
        
        QUESTION:
        {question}
        
        ANSWER:""",
        input_variables=["context", "question"],
    )

    llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
    
    rag_chain = prompt | llm | StrOutputParser()
    
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


# 3. Define the Conditional Edge
def decide_to_generate(state):
    """
    Determines whether to generate an answer or end the process.
    """
    print("---ASSESSING RELEVANCE & DECIDING NEXT STEP---")
    documents = state["documents"]

    if not documents:
        # All documents were filtered out, so we can't answer
        print("---DECISION: NO RELEVANT DOCUMENTS, CANNOT ANSWER---")
        return "cannot_answer"
    else:
        # We have relevant documents, proceed to generation
        print("---DECISION: RELEVANT DOCUMENTS FOUND, PROCEED TO GENERATE---")
        return "generate"

In [None]:
from langgraph.graph import END, StateGraph

# Define the graph structure
workflow = StateGraph(GraphState)

# Add the nodes
workflow.add_node("retrieve", retrieve_docs)
workflow.add_node("grade_documents", grade_documents)
workflow.add_node("generate", generate)
workflow.add_node("cannot_answer", generate) # If we can't answer, we still use the generate node to formulate a "can't answer" response.

# Set the entry point
workflow.set_entry_point("retrieve")

# Add the edges
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "generate": "generate",
        "cannot_answer": END, # End the flow if no docs are relevant
    },
)
workflow.add_edge("generate", END)

# Compile the graph into a runnable app
app = workflow.compile()
print("Graph compiled successfully.")

In [None]:
import pprint

# Define a query
inputs = {"question": "What is the role of SEO copywriting in content marketing?"}

# Run the graph
for output in app.stream(inputs):
    for key, value in output.items():
        pprint.pprint(f"Node: {key}")
        pprint.pprint(value, indent=2, width=80, depth=None)
    pprint.pprint("\n---\n")

# Print the final generation
print("\nFINAL ANSWER:")
pprint.pprint(value['generation'])

In [None]:
# Another example
inputs_2 = {"question": "How can I create a successful social media campaign for a summer sale?"}

for output in app.stream(inputs_2):
    for key, value in output.items():
        pprint.pprint(f"Node: {key}")
        pprint.pprint(value, indent=2, width=80, depth=None)
    pprint.pprint("\n---\n")

print("\nFINAL ANSWER:")
pprint.pprint(value['generation'])