In [1]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, List
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import os
os.environ["OPENAI_API_KEY"] = ""   # open-AI api key

In [4]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

In [5]:
class StudyState(TypedDict):
    pdf_path: str
    docs: list
    vectorstore: object
    summary: str
    flashcards: List[str]

In [6]:
# Defining a state structure 
class StudyState(TypedDict):
    pdf_path: str
    docs: list
    vectorstore: object
    summary: str
    flashcards: List[str]

In [7]:
# Now we will load our pdf
def load_pdf_node(state: StudyState):
    loader = PyPDFLoader(state["pdf_path"])
    state["docs"] = loader.load()
    return state

In [8]:
# Now we will build the embeddings or chunks
def embed_node(state: StudyState):
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    chunks = splitter.split_documents(state["docs"])

    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    state["vectorstore"] = FAISS.from_documents(chunks, embeddings)
    return state

In [10]:
# This code is for building the summary based on the chunks
def summary_node(state: StudyState):
    docs = state["vectorstore"].similarity_search("overview", k=12)
    combined = "\n\n".join(d.page_content for d in docs)

    state["summary"] = llm.invoke(f"Summarize clearly for a student:\n{combined}").content
    return state

In [11]:
# This code is for the flashcards
def flashcard_node(state: StudyState):
    docs = state["vectorstore"].similarity_search("overview", k=12)
    combined = "\n\n".join(d.page_content for d in docs)

    flash = llm.invoke(
        f"Generate 10 flashcards in Q/A format only:\n{combined}"
    ).content.split("\n\n")  # split into cards

    state["flashcards"] = flash
    return state


We have build these functions separately so that they can as nodes for our langgraph



In [12]:
# Now we will build our graph
graph = StateGraph(StudyState)

graph.add_node("LOAD_PDF", load_pdf_node)
graph.add_node("EMBED", embed_node)
graph.add_node("SUMMARY", summary_node)
graph.add_node("FLASHCARDS", flashcard_node)

graph.set_entry_point("LOAD_PDF")
graph.add_edge("LOAD_PDF", "EMBED")
graph.add_edge("EMBED", "SUMMARY")
graph.add_edge("SUMMARY", "FLASHCARDS")
graph.add_edge("FLASHCARDS", END)

app = graph.compile()