In [None]:
#1. Install Dependencies
# Install required libraries:
# - langchain: document loading, splitting, embeddings, LLM interface
# - langgraph: workflow orchestration
# - langchain-openai: OpenAI integrations
# - networkx + matplotlib: workflow visualization

!pip install langchain langgraph langchain-openai langchain-text-splitters langchain-community networkx matplotlib

In [None]:
# 2. Setup OpenAI API Key
# Store OpenAI API key as environment variable
# LangChain automatically reads this variable when calling models

import os
os.environ["OPENAI_API_KEY"] = "your_api_key_here"

In [None]:
3. Define Application State
# State defines data flowing through the LangGraph pipeline
# question: user input
# context: retrieved documents
# answer: generated LLM response

from typing_extensions import TypedDict, List
from langchain_core.documents import Document

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [None]:
4. Load Knowledge Base
# Load external knowledge from JSON file
# Each entry becomes a LangChain Document object

import json
from langchain_core.documents import Document

with open('knowledge_base.json', 'r') as f:
    knowledge_items = json.load(f)

local_docs = [Document(page_content=item['text']) for item in knowledge_items]

In [None]:
#5. Create Embeddings + Vector Store
# Convert documents into vector embeddings
# Store them in an in-memory vector database for similarity search

from langchain.embeddings import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = InMemoryVectorStore(embeddings)

# Store all document chunks as vectors
vector_store.add_documents(all_splits)

In [None]:
#6. Prompt Template + LLM Initialization
# Custom prompt forces LLM to use retrieved context
# Prevents hallucination

from langchain.chat_models import init_chat_model

CUSTOM_PROMPT = """
You are an advanced assistant. Use the context to answer. If insufficient info, say so clearly.

Question: {question}

Context:
{context}

Answer:
"""

# Initialize GPT model with controlled creativity
llm = init_chat_model("openai:gpt-4.1", temperature=0.3)

In [None]:
#7. Workflow Functions (LangGraph Nodes)
# Retrieves top 5 most relevant document chunks for user query

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"], k=5)
    return {"context": retrieved_docs}

In [None]:
#Generate Answer
# Builds final prompt using retrieved context
# Sends to LLM and returns response

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    prompt_filled = CUSTOM_PROMPT.format(
        question=state["question"], context=docs_content)
    response = llm.invoke([{"role": "user", "content": prompt_filled}])
    return {"answer": response.content}

In [None]:
#Classify Question (Optional Extension Point)
# Placeholder classifier for routing logic
# Currently passes question unchanged

def classify(state: State):
    is_advanced = "advanced" in state["question"].lower()
    return {"question": state["question"]}

In [None]:
#Refine Answer
# Adds refinement note to improve clarity

def refine(state: State):
    refined_answer = state["answer"] + "\n\n[Refined for clarity and completeness]"
    return {"answer": refined_answer}

In [None]:
#8. Build LangGraph Workflow
# Create directed workflow graph
# Order: classify → retrieve → generate → refine

from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence(
    [classify, retrieve, generate, refine]
)

# Connect start node
graph_builder.add_edge(START, "classify")

# Compile graph
graph = graph_builder.compile()

In [None]:
#9. Visualize Workflow
# Draw workflow graph using NetworkX + Matplotlib
# Helps understand RAG pipeline visually

import networkx as nx
import matplotlib.pyplot as plt

def visualize_langgraph_clean(graph_builder):
    G = nx.DiGraph()

    for node_name in graph_builder.nodes:
        G.add_node(node_name)

    for src, tgt in graph_builder.edges:
        G.add_edge(src, tgt)

    try:
        pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
    except:
        pos = nx.spring_layout(G)

    nx.draw(G, pos, with_labels=True)
    plt.show()

visualize_langgraph_clean(graph_builder)

In [None]:
#10. Run Interactive RAG System
