### Imports

In [1]:
from dotenv import load_dotenv
import nest_asyncio
import os
from llama_index.llms.openai import OpenAI as llama_openai
from llama_index.embeddings.openai import OpenAIEmbedding as llama_openai_embedding
from llama_index.core import Settings as llama_settings
from llama_parse import LlamaParse as llama_parse
from langchain_core.documents import  Document
from langchain_community.graphs import Neo4jGraph
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_core.tools import BaseTool, BaseModel, Field
from typing import Type, Any, Dict, List
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import (
    AgentExecutor,
    create_tool_calling_agent,
)

In [2]:
load_dotenv()
nest_asyncio.apply()

In [3]:
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")

### Parsing

In [4]:
llm = llama_openai(model="gpt-4o")
embed_model = llama_openai_embedding(model="text-embedding-3-small")

llama_settings.llm = llm
llama_settings.embed_model = embed_model

In [None]:
llama_parsing_instruction = """
1. **Text**: Identify and extract all text content, including explanations, descriptions, and definitions.
2. **Image**: Identify and extract all images, ensuring to capture diagrams, illustrations, and visual representations.
3. **Graph**: Identify and extract all graphs, including line graphs, bar graphs, and any other graphical data representations.
4. **Table**: Identify and extract all tables, capturing data and structured information presented in tabular form.
5. **Practice Problems**: Identify and extract all practice problems, including exercises and questions for students to solve.
6. **Examples**: Identify and extract all worked examples that demonstrate how to solve problems or apply concepts.

#### Tagging Requirements:

For each extracted element, tag the following:

1. **Chapter**: The overarching division of the textbook in which the content is located.
2. **Concept**: The specific subsection or topic within a chapter that the content pertains to.
3. **Learning Outcome Name**: The most fundamental unit of learning, which is a combination of the concept, the cognitive ability (based on Bloom's taxonomy), and the context. For example:
    - **Learning Outcome**: "Calculating resistance using VI graph"
        - **Cognitive Ability**: Analyse
        - **Concept**: Calculating resistance
        - **Context**: VI graph

4. **Cognitive Ability**: The level of cognitive ability required, based on Bloom's taxonomy (e.g., Remember, Understand, Apply, Analyze, Evaluate, Create).

#### Special Instructions for Formulas and Equations:

- For any formulas or equations encountered, ensure they are provided in LaTeX format, enclosed in `$$`. For example:
    - Equation: $$ E = mc^2 $$
    - Formula: $$ \text{Area of a circle} = \pi r^2 $$

#### Additional Notes:

- **Multiple Concepts and Learning Outcomes**: Each page can have multiple concepts, and each concept can have multiple learning outcomes.
- **Contextual Awareness**: Ensure the context of each concept and learning outcome is accurately captured, reflecting the specific example, problem, or explanation provided.

#### Example of Tagged Content:

**Chapter**: Electricity  
**Concept**: Ohm's Law  
**Learning Outcome Name**: Calculating resistance using VI graph  
- **Cognitive Ability**: Analyze  
- **Context**: VI graph

**Text**: "Ohm's Law states that the current through a conductor between two points is directly proportional to the voltage across the two points."

**Image**: [Image of a circuit diagram]

**Graph**: [Graph showing Voltage vs. Current]

**Table**: 
| Voltage (V) | Current (I) |
|-------------|-------------|
| 1           | 0.2         |
| 2           | 0.4         |
| 3           | 0.6         |

**Practice Problems**:
1. "Calculate the resistance if the voltage is 5V and the current is 1A."

**Examples**:
- **Example 1**: "If the voltage across a resistor is 10V and the current is 2A, the resistance can be calculated using Ohm's Law as follows: $$ R = \frac{V}{I} = \frac{10V}{2A} = 5 \Omega $$"
"""

### Adjust the target pages here... (set start_page and end_page to None for all pages)

In [None]:
start_page = 14
end_page = 22

target_pages = ""
if start_page is not None and end_page is not None:
    target_pages = ",".join([str(i) for i in range(start_page, end_page+1)])
else:
    target_pages = None

target_pages

In [None]:
docs = llama_parse(
  result_type="markdown", 
  target_pages=target_pages, 
  parsing_instruction=llama_parsing_instruction,
  use_vendor_multimodal_model=True,
  vendor_multimodal_model_name="openai-gpt4o",
  vendor_multimodal_api_key=OPENAI_API_KEY,
).load_data("book.pdf")

### Metadata addition

In [None]:
documents = []
current_page = start_page or 0
for doc in docs:
    document = Document(page_content=doc.text)
    document.metadata = {"page": current_page, "course_id": "1"}
    current_page += 1
    documents.append(document)

In [None]:
documents[0]

### Knowledge Graph Generation

In [6]:
graph = Neo4jGraph(
    username="neo4j",
    password="password",
    url="bolt://localhost:7687",
    database="langchain-rag",
    refresh_schema=False,
)

In [None]:
system_prompt = """
Schema: 
<Entity: CONCEPT> <Relationship: HAS_OUTCOME> <Entity: LEARNING_OUTCOME>

<Entity: LEARNING_OUTCOME> <Relationship: PREREQ_FOR> <Entity: LEARNING_OUTCOME>

<Entity: LEARNING_OUCOME> <Relationship: SUPPORTS> <Entity: QUESTION>


Follow the above schema to construct graph
    """

template = ChatPromptTemplate([
    ("system", system_prompt),
])

In [7]:
llm = ChatOpenAI(temperature=0.5, model_name="gpt-4o")

llm_transformer = LLMGraphTransformer(
    llm=llm, 
    allowed_nodes=["CONCEPT", "LEARNING_OUTCOME", "QUESTION"],
    allowed_relationships=[ "HAS_OUTCOME", "PREREQ_FOR", "SUPPORTS"],
    relationship_properties=['weight'],
    node_properties=['text', 'page', 'course_id'],
    strict_mode=False,
)

In [None]:
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [None]:
graph.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True)

In [8]:
vector = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    node_label='__Entity__',
    embedding_node_property='embedding',
    text_node_properties=['id', 'page', 'text', 'course_id'],
    username="neo4j",
    password="password",
    url="bolt://localhost:7687",
    database="langchain-rag",
)

### Using RAG as retriever to fetch documents

In [9]:
retriever = vector.as_retriever()
retriever.invoke("Fundamental theorem of arithmetic")

[Document(page_content='\nid: The Fundamental Theorem Of Arithmetic\npage: \ntext: The Fundamental Theorem of Arithmetic\ncourse_id: '),
 Document(page_content='\nid: Fundamental Theorem Of Arithmetic\npage: 1.2\ntext: The order of prime factors in ascending order makes the factorisation unique.\ncourse_id: '),
 Document(page_content='\nid: Irrational Numbers\npage: \ntext: In Class IX, you were introduced to irrational numbers and many of their properties. You studied about their existence and how the rationals and the irrationals together made up the real numbers. You even studied how to locate irrationals on the number line. However, we did not prove that they were irrationals. In this section, we will prove that \\( \\sqrt{2}, \\sqrt{3}, \\sqrt{5} \\) and, in general, \\( \\sqrt{p} \\) is irrational, where \\( p \\) is a prime. One of the theorems, we use in our proof, is the Fundamental Theorem of Arithmetic.\ncourse_id: '),
 Document(page_content='\nid: Proof Of Theorem 1.2\npage

## Agent 

### Tools

- Fetch Concepts of a chapter

In [10]:
class GetConceptsOfChapterInput(BaseModel):
    chapter_name: str = Field(description="Chapter name to fetch concepts for")


class GetConceptsOfChapter(BaseTool):
    name = "get_concepts_of_chapter"
    description = """
    Fetch concepts of a specific chapter from the textbook using provided chapter name
    """
    args_schema: Type[BaseModel] = GetConceptsOfChapterInput

    def _run(self, query: str) -> list:
        raise NotImplementedError("This method is not implemented")

    async def _arun(
        self,
        chapter_name: str,
    ):
        chapters_data = [
            {
                "concepts": [
                    {"name": "Prime factorization related to decimal expansions"},
                    {"name": "Terminating and non-terminating decimal expansions"},
                    {
                        "name": "Proof of irrationality using Fundamental Theorem of Arithmetic"
                    },
                    {"name": "Fundamental Theorem of Arithmetic"},
                    {"name": "Euclid's division algorithm"},
                ],
                "name": "Real Numbers",
            },
            {
                "concepts": [
                    {"name": "Division Algorithm for Polynomials"},
                    {"name": "Geometrical Meaning of the Zeroes of a Polynomial"},
                    {
                        "name": "Relationship between Zeroes and Coefficients of a Polynomial"
                    },
                    {"name": "Introduction to Polynomials"},
                ],
                "name": "Polynomials",
            },
            {
                "concepts": [
                    {"name": "Dependent and consistent equations"},
                    {"name": "Graphical method of solution"},
                    {"name": "Elimination method"},
                    {"name": "Consistency and inconsistency of linear equations"},
                    {"name": "Substitution method"},
                ],
                "name": "Pair of Linear Equations in Two Variables",
            },
            {
                "concepts": [
                    {"name": "Quadratic formula for roots"},
                    {"name": "Solution of quadratic equations by factorisation"},
                    {"name": "Nature of roots"},
                    {
                        "name": "Applications of quadratic equations in real-life problems"
                    },
                ],
                "name": "Quadratic Equations",
            },
            {
                "concepts": [
                    {"name": "General form of an AP"},
                    {"name": "nth term of an AP"},
                    {"name": "Definition of Arithmetic Progression (AP)"},
                    {"name": "Sum of first n terms of an AP"},
                ],
                "name": "Arithmetic Progressions",
            },
            {
                "concepts": [
                    {"name": "Properties of similar triangles"},
                    {"name": "Criteria for similarity of triangles"},
                    {"name": "Similarity of triangles"},
                    {"name": "Similar figures and triangles"},
                ],
                "name": "Triangles",
            },
            {
                "concepts": [
                    {"name": "Distance formula for points"},
                    {"name": "Section formula for internal division"},
                    {"name": "Graphical interpretation and applications of formulas"},
                ],
                "name": "Coordinate Geometry",
            },
            {
                "concepts": [
                    {"name": "Trigonometric ratios"},
                    {
                        "name": "Specific values of trigonometric ratios at 0°, 30°, 45°, 60°, and 90°"
                    },
                    {"name": "Trigonometric identities"},
                    {
                        "name": "Examples solving for unknowns using trigonometric ratios"
                    },
                ],
                "name": "Introduction to Trigonometry",
            },
            {
                "concepts": [{"name": "Heights and Distances"}],
                "name": "Some Applications of Trigonometry",
            },
            {
                "concepts": [
                    {"name": "Number of Tangents from a Point on a Circle"},
                    {"name": "Tangent to a Circle"},
                ],
                "name": "Circles",
            },
            {
                "concepts": [
                    {"name": "Length of an arc of a sector"},
                    {"name": "Area of a segment of a circle"},
                    {"name": "Area of a sector of a circle"},
                ],
                "name": "Areas Related to Circles",
            },
            {
                "concepts": [
                    {"name": "Surface areas of combinations of basic solids"},
                    {"name": "Volumes of combinations of basic solids"},
                ],
                "name": "Surface Areas and Volumes",
            },
            {
                "concepts": [
                    {"name": "Mode for grouped data"},
                    {"name": "Median for grouped data"},
                    {"name": "Mean for grouped data"},
                ],
                "name": "Statistics",
            },
        ]

        for chapter_data in chapters_data:
            if chapter_data["name"] == chapter_name:
                return chapter_data["concepts"]
        return None

- Fetch contextual data

In [11]:
class ContextRetrievalToolInput(BaseModel):
    query: str = Field(description="Input query string")

class ContextRetrievalTool(BaseTool):
    name = "context_retrieval_tool"
    description = "Use this tool to fetch context from the textbook via provided query"
    args_schema: Type[BaseModel] = ContextRetrievalToolInput

    def _run(self, query: str) -> list:
        raise NotImplementedError("This method is not implemented")

    async def _arun(
        self,
        query: str,
    ) -> str:
        documents = await retriever.ainvoke(query)
        context = "\n\n".join([doc.page_content for doc in documents])
        return context


- Fetch Concepts, LOs and their relationship from structured graph generated earlier for better navigation

In [12]:
class GetChapterConceptsLearningOutcomesGraphToolInput(BaseModel):
    query: str = Field(description="Cypher query to fetch the data from neo4j")

class GetChapterConceptsLearningOutcomesGraphTool(BaseTool):
    name = "get_chapter_concepts_learningoutcomes_graph_tool"
    description = """
    Use this tool to fetch data about Chapter/Concept/Learning Outcome and their relationships by passing a cypher query
    """
    args_schema: Type[BaseModel] = ContextRetrievalToolInput

    def _run(self, query: str) -> list:
        raise NotImplementedError("This method is not implemented")

    async def _arun(
        self,
        query: str,
    ) -> List[Dict[str, Any]]:
        graph = Neo4jGraph(
            username="neo4j",
            password="password",
            url="bolt://localhost:7687",
            database="neo4j",
        )
        data = graph.query(query)
        return data

In [13]:
chapter_names = [
    {"name": "Real Numbers"},
    {"name": "Polynomials"},
    {"name": "Pair of Linear Equations in Two Variables"},
    {"name": "Quadratic Equations"},
    {"name": "Arithmetic Progressions"},
    {"name": "Triangles"},
    {"name": "Coordinate Geometry"},
    {"name": "Introduction to Trigonometry"},
    {"name": "Some Applications of Trigonometry"},
    {"name": "Circles"},
    {"name": "Areas Related to Circles"},
    {"name": "Surface Areas and Volumes"},
    {"name": "Statistics"},
]

In [14]:
ai_message = ""

### You can edit the system prompt here 

In [15]:
async def get_answer(question: str):
    tools = [ContextRetrievalTool(), GetChapterConceptsLearningOutcomesGraphTool()]

    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """
                You are a very powerful teacher. You help students learn concepts by first introducing it, discussing real-life examples, and some small quiz to check their understanding. 

                Follow the steps below:
                1. Use `get_concepts_of_chapter` to fetch all the concepts of a chapter by passing chapter name from the provided chapter names data
                2. Use the `get_chapter_concepts_learningoutcomes_graph_tool` to fetch structured data around chapter, concepts and their attached learning outcomes.
                    -> Relationship details:
                        - chapter contains concepts
                        - concept contains learning outcomes
                        - learning outcomes may/may not be connected to other learning outcomes via prerequisite_of relationship
                    -> You can use the chapter name, concept name to identify the chapter or concept node from the graph using name property
                    -> Pass this tool a cypher query to fetch the relationships if necessary
                3. Use the `context_retrieval_tool` to fetch contextual data from the textbook user wants to learn from.
                """,
            ),
            ("ai", ai_message),
            ("user", "Chapter Names: {chapter_names}\n Question: {question} "),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )

    agent = create_tool_calling_agent(
        llm,
        tools,
        prompt,
    )

    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
    chapter_names_str = f"{chapter_names}".replace("{", "{{").replace("}", "}}")
    response = await agent_executor.ainvoke({"question": question, "chapter_names":chapter_names_str })
    output = response["output"]
    return output

### Q&A

In [16]:
question = "Teach me first chapter's first concept"

In [17]:
answer = await get_answer(question)
ai_message = f"{answer}".replace("{", "{{").replace("}", "}}")
answer



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_concepts_of_chapter` with `{'chapter': 'Real Numbers'}`


[0mget_concepts_of_chapter is not a valid tool, try one of [context_retrieval_tool, get_chapter_concepts_learningoutcomes_graph_tool].[32;1m[1;3m
Invoking: `get_chapter_concepts_learningoutcomes_graph_tool` with `{'query': "MATCH (c:Chapter {name: 'Real Numbers'})-[:CONTAINS]->(concept:Concept) RETURN concept.name ORDER BY concept.name LIMIT 1"}`


[0m[33;1m[1;3m[{'concept.name': "Euclid's division algorithm"}][0m[32;1m[1;3m
Invoking: `get_chapter_concepts_learningoutcomes_graph_tool` with `{'query': 'MATCH (c:Concept {name: "Euclid\'s division algorithm"})-[:CONTAINS]->(lo:LearningOutcome) RETURN c, lo'}`


[0m[33;1m[1;3m[{'c': {'name': "Euclid's division algorithm"}, 'lo': {'name': "Evaluate the computational complexity of finding HCF using Euclid's division algorithm compared to prime factorization.", 'cognitive_ability': 'EVALUATE'}}, {'

"### Introduction to Euclid's Division Algorithm\n\nEuclid's Division Algorithm is a method to compute the Highest Common Factor (HCF) of two given positive integers. The algorithm is based on the principle that the HCF of two numbers also divides their difference. \n\n### Real-life Example\n\nImagine you have two pieces of ribbon, one 24 meters long and the other 36 meters long. You want to cut both ribbons into pieces of equal length without any leftover. The longest length of each piece would be the HCF of 24 and 36. Using Euclid's Division Algorithm, you can find this HCF efficiently.\n\n### Concept Learning Outcomes\n\n1. **Identify and describe the process of Euclid's division algorithm.** (REMEMBER)\n2. **Explain why Euclid's division algorithm is efficient for computing the HCF of two integers.** (UNDERSTAND)\n3. **Determine the HCF of any two positive integers using Euclid's division algorithm as illustrated in examples.** (APPLY)\n4. **Analyze how changes in the values of num