In [1]:
# First, let's import the necessary libraries and set up our environment
%load_ext autoreload
%autoreload 2
import sys
import os

# Add the project root directory to the Python path
sys.path.append(os.path.abspath(os.path.join("../../..")))  

from IPython.display import display, Latex
import re
from src.utils.file_utils import read_proof
from src.phase1.extract_triplets import extract_calculation_graph
from src.utils.neo4j_utils import Neo4JUtils

# Load the course content for addition and multiplication
addition_course_latex = read_proof("../../data/courses/addition/course_2.tex")
multiplication_course_latex = read_proof("../../data/courses/multiplication/course_2.tex")

# Hierarchical Pattern Extraction for Mathematical Operations

In this notebook, we'll extract knowledge graph patterns for mathematical operations in a hierarchical manner:

1. First, we'll extract the pattern for "Addition by Recursion" from the addition course
2. Then, we'll use this pattern to inform the extraction of the "Multiplication by Recursion" pattern
3. This approach will create a hierarchical knowledge representation where multiplication builds upon addition

This hierarchical approach reflects the natural relationship between mathematical operations, where multiplication can be defined in terms of repeated addition.

In [2]:
# Define the system prompt for our LLM
SYSTEM_PROMPT = """You are an expert in mathematical calculation analysis, specializing in extracting structured knowledge graph from mathematical calculation texts. Your task is to identify quantom/detailed progress/procedural steps in a mathematical calculation process and represent them as a fine-grained knowledge graph with explicit step-by-step reasoning."""

# Define the prompt for extracting the addition course pattern
ADDITION_COURSE_PATTERN_PROMPT = """
Given the following mathematical course content in LaTeX format, extract a VERY DETAILED step-by-step explanatory chain that represents the calculation process. Create a knowledge graph with fine-grained steps that shows exactly how calculations proceed from start to finish.

Focus on identifying:
1. Every individual calculation step, no matter how small (e.g., "Add 2 to both sides", "Apply distributive property", etc.)
2. The precise mathematical operations performed at each step
3. The exact sequence of operations, with clear predecessor-successor relationships
4. Intermediate results at each calculation stage
5. The mathematical justification for each step (e.g., "By the associative property", "By substituting value from step 3")

IMPORTANT: If you see examples in the course content, do not extract them as separate graphs. The examples are only included to help you understand the calculation process better. Focus only on extracting the general calculation pattern/process.

The final knowledge graph MUST:
1. Have clearly marked START node(s) (the initial example statement)
2. Have clearly marked END node(s) (the final result)
3. Include ALL intermediate calculation steps with no gaps in reasoning
4. Form a single connected component with a clear directional flow
5. Use relationship types that precisely describe the mathematical operation performed (e.g., "applies_distributive_property", "substitutes_value", "simplifies_expression")

Extract triplets in the given form of structured output that represent this detailed calculation process.

Note:
- Entities or Nodes should be specified with short mathematical expressions.
- Relations at this mathematical calculation graph is only grounding or simple reasoning connecting one expression to another.
- Steps are as much as possible explaining all the minor steps of the reasoning of the calculation.

Course Content:
```
{course_latex}
```
"""

# Extract the addition course pattern
addition_course_pattern = extract_calculation_graph(
    custom_prompt=ADDITION_COURSE_PATTERN_PROMPT.format(
        course_latex=addition_course_latex
    ),
    system_message=SYSTEM_PROMPT,
)
print("Addition Course Pattern:")
print(addition_course_pattern)

[SystemMessage(content='You are an expert in mathematical calculation analysis, specializing in extracting structured knowledge graph from mathematical calculation texts. Your task is to identify quantom/detailed progress/procedural steps in a mathematical calculation process and represent them as a fine-grained knowledge graph with explicit step-by-step reasoning.', additional_kwargs={}, response_metadata={}), HumanMessage(content='\nGiven the following mathematical course content in LaTeX format, extract a VERY DETAILED step-by-step explanatory chain that represents the calculation process. Create a knowledge graph with fine-grained steps that shows exactly how calculations proceed from start to finish.\n\nFocus on identifying:\n1. Every individual calculation step, no matter how small (e.g., "Add 2 to both sides", "Apply distributive property", etc.)\n2. The precise mathematical operations performed at each step\n3. The exact sequence of operations, with clear predecessor-successor 

## Hierarchical Pattern Extraction for Multiplication

Now that we have extracted the pattern for addition by recursion, we'll use it to inform the extraction of the multiplication pattern. 

The key insight is that multiplication can be defined as repeated addition, so the multiplication pattern should incorporate the addition pattern as a sub-component. This creates a hierarchical relationship between the two operations.

In [3]:
# Define the prompt for extracting the multiplication course pattern using the addition pattern
MULTIPLICATION_COURSE_PATTERN_PROMPT = """
Given the following mathematical course content in LaTeX format and a previously extracted course pattern in the knowledge graph triplets format, extract a VERY DETAILED step-by-step explanatory chain for this course. Create a reasoning graph with fine-grained steps that shows exactly how calculations proceed from start to finish.

The previously extracted course pattern is:
```
{addition_pattern}
```

IMPORTANT: Courses builds upon each other. For example, a pattern can incorporate multiple pattern of the child pattern. The extracted course pattern should incorporate the child patterns and use them even if they are expelicitly stated in the course content.

The final knowledge graph MUST:
1. Have clearly marked START node(s) (the initial calculation problem)
2. Have clearly marked END node(s) (the final result)
3. Include ALL intermediate calculation steps with no gaps in reasoning
4. Form a single connected component with a clear directional flow
5. Use relationship types that precisely describe the mathematical operations

Extract triplets in the given form of structured output that represent this detailed calculation process.

Note:
- Entities or Nodes should be specified with short mathematical expressions.
- Steps should explain all the minor steps of the reasoning.

Course Content:
```
{course_latex}
```
"""

# Extract the multiplication course pattern using the addition pattern
multiplication_course_pattern = extract_calculation_graph(
    custom_prompt=MULTIPLICATION_COURSE_PATTERN_PROMPT.format(
        addition_pattern=addition_course_pattern,
        course_latex=multiplication_course_latex,
    ),
    system_message=SYSTEM_PROMPT,
)
print("Multiplication Course Pattern:")
print(multiplication_course_pattern)

[SystemMessage(content='You are an expert in mathematical calculation analysis, specializing in extracting structured knowledge graph from mathematical calculation texts. Your task is to identify quantom/detailed progress/procedural steps in a mathematical calculation process and represent them as a fine-grained knowledge graph with explicit step-by-step reasoning.', additional_kwargs={}, response_metadata={}), HumanMessage(content='\nGiven the following mathematical course content in LaTeX format and a previously extracted course pattern in the knowledge graph triplets format, extract a VERY DETAILED step-by-step explanatory chain for this course. Create a reasoning graph with fine-grained steps that shows exactly how calculations proceed from start to finish.\n\nThe previously extracted course pattern is:\n```\nsteps=[MathStep(id=\'step1\', expression=\'a + b\', operation=\'initial_expression\', is_start=True, is_end=False), MathStep(id=\'step2\', expression=\'(a + (b-1)) + 1\', oper

## Storing Patterns in Neo4j

Now we'll store both patterns in Neo4j to visualize the hierarchical relationship between addition and multiplication. This will allow us to see how multiplication operations build upon addition operations.

In [4]:
# Store the patterns in Neo4j
neo4j = Neo4JUtils("bolt://localhost:7687", ("neo4j", "password"))
neo4j.clean_database()


# Function to sanitize LaTeX commands in relationship names
def sanitize_latex(text):
    # Replace LaTeX commands with plain text alternatives
    sanitized = text.replace("\\underbrace", "")
    sanitized = sanitized.replace("\\text", "")
    sanitized = sanitized.replace("\\cdots", "...")
    sanitized = sanitized.replace("\\times", "×")
    # Remove LaTeX subscripts and superscripts
    sanitized = re.sub(r"_{.*?}", "", sanitized)
    sanitized = re.sub(r"\^{.*?}", "", sanitized)
    return sanitized


# Sanitize relationship names in multiplication pattern
for step in multiplication_course_pattern.steps:
    step.expression = sanitize_latex(step.expression)

# # Sanitize entity names in multiplication pattern
# for entity in multiplication_course_pattern.entities:
#     entity.name = sanitize_latex(entity.name)
#     if hasattr(entity, "label") and entity.label:
#         entity.label = sanitize_latex(entity.label)

# Store the addition pattern
neo4j.store_calculation_graph(addition_course_pattern, "addition_course")

# Store the multiplication pattern
neo4j.store_calculation_graph(multiplication_course_pattern, "multiplication_course")

print("Patterns stored in Neo4j database.")

Patterns stored in Neo4j database.


## Visualization Queries

Here are some Neo4j queries that can be used to visualize the hierarchical relationship between addition and multiplication patterns:

1. View the complete knowledge graph:
```cypher
MATCH (n) 
RETURN n
```

2. View the addition pattern:
```cypher
MATCH (n)-[r]-(m)
WHERE n.graph_id = 'addition_course' OR m.graph_id = 'addition_course'
RETURN n, r, m
```

3. View the multiplication pattern:
```cypher
MATCH (n)-[r]-(m)
WHERE n.graph_id = 'multiplication_course' OR m.graph_id = 'multiplication_course'
RETURN n, r, m
```

4. View the hierarchical relationship:
```cypher
MATCH path = (start)-[*]-(end)
WHERE start.graph_id = 'multiplication_course' 
  AND end.graph_id = 'addition_course'
RETURN path
```

In [5]:
# Example: Load a test problem and apply the hierarchical pattern
test_problem = "5 × 3"

# Define a prompt to apply the hierarchical pattern to the test problem
TEST_PROBLEM_PROMPT = """
Given the following multiplication problem and the hierarchical patterns for addition and multiplication, 
create a detailed step-by-step solution that follows these patterns.

Addition Pattern:
```
{addition_pattern}
```

Multiplication Pattern:
```
{multiplication_pattern}
```

Problem: {problem}

Provide a detailed solution that:
1. First applies the multiplication pattern to break down the problem
2. Then applies the addition pattern for the necessary addition steps
3. Shows all intermediate steps and calculations
4. Maintains the hierarchical relationship between multiplication and addition

Extract triplets in the form <Source Entity, Relationship, Target Entity> that represent this solution.
"""

# Apply the hierarchical pattern to the test problem
test_solution = extract_calculation_graph(
    custom_prompt=TEST_PROBLEM_PROMPT.format(
        addition_pattern=addition_course_pattern,
        multiplication_pattern=multiplication_course_pattern,
        problem=test_problem,
    ),
    system_message=SYSTEM_PROMPT,
)

print("Solution for test problem:")
print(test_solution)

# Store the test solution in Neo4j
neo4j.store_calculation_graph(test_solution, "test_solution")
print("Test solution stored in Neo4j database.")

[SystemMessage(content='You are an expert in mathematical calculation analysis, specializing in extracting structured knowledge graph from mathematical calculation texts. Your task is to identify quantom/detailed progress/procedural steps in a mathematical calculation process and represent them as a fine-grained knowledge graph with explicit step-by-step reasoning.', additional_kwargs={}, response_metadata={}), HumanMessage(content='\nGiven the following multiplication problem and the hierarchical patterns for addition and multiplication, \ncreate a detailed step-by-step solution that follows these patterns.\n\nAddition Pattern:\n```\nsteps=[MathStep(id=\'step1\', expression=\'a + b\', operation=\'initial_expression\', is_start=True, is_end=False), MathStep(id=\'step2\', expression=\'(a + (b-1)) + 1\', operation=\'decompose_b\', is_start=False, is_end=False), MathStep(id=\'step3\', expression=\'((a + (b-2)) + 1) + 1\', operation=\'decompose_b\', is_start=False, is_end=False), MathStep(