In [1]:
import sys
print(sys.executable)

/Users/huanxing/Documents/GitHub/Analogical-LLM/.conda/bin/python


In [2]:
import getpass
import os
from langgraph.graph import StateGraph, START, END
from typing import TypedDict
import openai 
# from langgraph_supervisor import create_supervisor
from langchain.chat_models import init_chat_model
from textwrap import dedent

In [23]:
# Prompts
TARGET_DOMAIN = dedent("""
        As a Domain Analysis Specialist, extract all the core innovation domains from the user query. It could be a single one for a simple query, or multiple ones for a complex query.
        Instructions:
        1. Analyze the user's input
        2. Identify the primary domain(s) requiring innovation
        3. Classify it within standard innovation categories
        Output Format:
        Target Domain: [Clear, specific domain label]
        Be very detailed and specific in your response and do not generalize. Respond ONLY with the name of the domain, do NOT include ANY other text like 'Target Domain:'.
        User query: {user_query}
""").strip()

PROBLEM_LANDSCAPE = dedent("""
        You are a Problem Landscape Analyst. Your task is to map out the concrete challenges within the target domain identified.
        Instructions:
        1. Identify all the core problems or challenges currently present in these domains. Aim for at least 3 problems per domain
        2. For each problem, provide:
        - Problem: A short, clear title.
        - Description: 2-3 sentences explaining what the problem is and why it matters.
        - Context: Briefly state the circumstances or environment where this problem occurs.
        - Stakeholders: List the main groups or individuals affected.
        - Root Causes: Identify 1-3 underlying causes, if known.
        - Impact: State the significance of the problem (e.g., social, economic, technical).
        - Current Approaches: How is this problem currently addressed?
        - Limitations: What are the shortcomings of current approaches?
        - Success Metrics: How would you measure if this problem is solved?
        - Interconnections: Note if this problem is linked to or influenced by other problems.
        Output Format:
        Present your findings as a structured list or JSON array, with each problem fully described as above.
        Important:
        - Focus on clarity and completeness.
        - Avoid abstracting or generalizing; stay concrete and domain-specific.
        - Do not propose solutions; only describe the current problem landscape.
        Target domain: {target_domain}
""").strip()

ABSTRACTION = dedent("""
You are a TRIZ Methodology Expert. Transform domain-specific problems into universal contradictions.
        Process:
        1. Read up on TRIZ - the contradiction matrix, and the inventive principles
        2. For each problem in the problem landscape:
        - Abstract to universal parameters (what improves vs. what worsens)
        - Express as 'When we improve X, Y worsens'
        - Ensure parameters are domain-agnostic
        3. Analyze all the abstracted universal parameters, and identify all the core TRIZ contradictions present:
        - Select the most fundamental tensions
        - Map to TRIZ contradiction matrix
        - Note applicable inventive principles
        Output:
        # List all the core contradictions in form of:
        - Improving [parameter] vs. Worsening [parameter]
        - TRIZ Principles: [1-3 relevant principles]
        - Innovation Potential: [High/Medium/Low]
        Focus on contradictions that, if resolved, would create breakthrough value.

        Problem landscape: {problem_landscape}
""").strip()

BASE_DOMAIN = dedent("""
        You are a Cross-Domain Search Specialist. Do the following:
        - For each contradiction provided, identify 3 distinct source domains (fields or industries) where this contradiction has been successfully addressed.
        - Experiment with different subsets of the list of contradictions, and see if you could identify 3 distinct source domains for each of these subsets identified as well. You should find at least 3 different subsets.
        Note: The domains should have A CONCEPTUAL DISTANCE OF AT LEAST 3 DISTINCT HOPS FROM WHAT IMMEDIATELY COMES TO MIND. Be creative! It can be domains within spheres like natural, phsyical, social, artistic, or anything.
        For each domain identified, briefly explain why it is relevant to the single contradiction or the subset of contradictions identified. Do not describe specific solutions just yet-only list the domains and your rationale.
        Output:
        A list for each contradiction and subset of contradictions identified, naming 3 relevant domains with a 2 sentence rationale for each.
        Aim for a total of at least 20 relevant base domains. 
        Contradictions: {contradictions}
""").strip()

BASE_SOLUTIONS = dedent("""
        You are a Solution Pattern Extractor. You are provided with an input with 3 base domains identified per TRIZ (Theory of Inventive Problem Solving) contradiction or a set of contradictions, as well as the contradictions themselves.
        For each of these identified base domains, identify one specific, well-documented solution pattern within the domain that effectively resolves the contradiction (or the set of contradictions).
        For each solution pattern, return:
        - Identify the base domain it's corresponding to
        - Recall the contradiction or the set of contradictions that this base domain faces
        - The name or label of the solution pattern for resolving these contradiction(s) in the base domain
        - A detailed description of the core mechanism or principle involved and how it addressed the domain's contradiction(s)
        - The context or situation in the domain where this pattern is applied\n"
        Do not generalize or adapt the solution-simply describe how the contradiction is addressed within each source domain.
        Output:
        For each of the provided domain, list the base domain name, contradiction(s) faced, solution pattern name, the detailed description of the mechanism of the solution pattern, and the context in which it is used. Articulate the contradictions as problems and considerations faced, through framing them as a tension.

        Input: {input}
""").strip()

ANALOGICAL_TRANSFER = dedent("""
        You are a very innovative Analogical Transfer Specialist.
        You are provided with list the base domain name, tensions faced, solution pattern name, the detailed description of the mechanism of the solution pattern, and the context in which it is used.
        Your task is to propose how solution patterns used to resolve these tensions in various base domains might inspire solution framings for the original target domain.

        Input Overview:
        1. A list the base domains identified, the tensions these domains faced, the name of solution patterns that helped addressed these tensions in these base domains, the detailed description of the mechanism of the solution pattern, and the context in which it is used.
        2. The original target domain.

        Instructions:
        For each pair of base domain and the corresponding tensions identified, review the solution patterns that worked for the base domain. For each pattern:
        - Analyze the core mechanism or principle behind the solution.
        - Map and adapt this mechanism conceptually to the target domain, considering the specific context and needs of the target domain.
        - Clearly describe how this analogical transfer could frame a potential solution in the target domain.
        - Highlight any key adaptations, considerations, or limitations that would be relevant when applying this pattern to the target domain.

        Your expected Output:
        For each base domain, provide a comprehensive description of a proposed solution framing for the target domain, including:
        - The original tension addressed
        - The source domain and solution pattern
        - A detailed explanation of how the pattern could inspire or inform a solution in the target domain
        - Any important adaptations or considerations for successful transfer

        Here are the actualinputs:
        - A list the base domains identified, the tensions these domains faced, the name of solution patterns that helped addressed these tensions in these base domains, the detailed description of the mechanism of the solution pattern, and the context in which it is used.: {contradictions_solutions}
        - Original target domain: {target_domain}
""").strip()

In [8]:
# check for openai API
def _set_if_undefined(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"Please provide your {var}")


_set_if_undefined("OPENAI_API_KEY")

In [9]:
# Graph state: workflow
class ReasoningState(TypedDict):
    user_query: str
    target_domain: str
    problem_landscape: str
    abstraction: str
    base_domain: str
    base_solutions: str
    analogical_transfer: str
    solution: str

In [10]:
# Can switch between different LLMs 
llms = init_chat_model("openai:gpt-4.1")

In [11]:
# 1st Agent: Identify Target Domain from User Input
def target_domain_agent(state: ReasoningState):
    u = state['user_query']

    msg = llms.invoke(TARGET_DOMAIN.format(user_query=u))

    return {"target_domain": msg.content}

In [14]:
# 2st Agent: Conduct comprehensive research into problem landscape for target domain 
# (i.e. What specific challenges exist in this domain?)
def problem_landscape_agent(state: ReasoningState):
    t = state['target_domain']

    msg = llms.invoke(PROBLEM_LANDSCAPE.format(target_domain=t))

    return {"problem_landscape": msg.content}

In [15]:
# 3rd Agent: Abstract Problems Identified into Generalized Principles and TRIZ Contradiction
def abstraction_agent(state: ReasoningState):
    p = state['problem_landscape']

    msg = llms.invoke(ABSTRACTION.format(problem_landscape=p))

    return {"abstraction": msg.content}

In [16]:
# 4th Agent: Search for Appropriate Base Domains
def base_domain_agent(state: ReasoningState):
    a = state['abstraction']

    msg = llms.invoke(BASE_DOMAIN.format(contradictions=a))

    return {"base_domain": msg.content}

In [17]:
# 5th Agent: Identify Solution in Base Domain
def base_solution_agent(state: ReasoningState):
    b = state['base_domain']

    msg = llms.invoke(BASE_SOLUTIONS.format(input=b))

    return {"base_solutions": msg.content}

In [18]:
# 6th Agent: Base Domain Solution Informing Target Domain Solution
def analogical_transfer_agent(state: ReasoningState):
    if "base_solutions" not in state:
        raise ValueError("Missing 'base_solutions' key. Check if previous node returned it.")
    b = state['base_solutions']
    t = state['target_domain']
    
    msg = llms.invoke(ANALOGICAL_TRANSFER.format(contradictions_solutions=b, target_domain = t))

    return {"analogical_transfer": msg.content}

In [22]:
# 7th Agent: Summarize everything and respond to the question
def synthesis_agent(state: ReasoningState):
    msg = llms.invoke(
        f"Evaluate the proposed analogical solutions. Find the best ones that balances practicality with innovation. Then, provide a detailed, well-structured response that addresses all aspects of the query.\n\n"
        f"Problem: {state['user_query']}\n"
        f"Analogical Solutions: {state['analogical_transfer']}"
        f"In your output, remember to abstract away the analogy itself such that it is focused on responding to the user input."
        f"Also, check if the users are requesting a specific number of possible solutions. Make sure to answer the user's query in full and provide what is requested."
    )
    return {"solution": msg.content}

In [20]:
# Construct the workflow 
workflow = StateGraph(ReasoningState)

workflow.add_node("target", target_domain_agent)
workflow.add_node("landscape", problem_landscape_agent)
workflow.add_node("abstract", abstraction_agent)
workflow.add_node("base", base_domain_agent)
workflow.add_node("base_soln", base_solution_agent)
workflow.add_node("analogy", analogical_transfer_agent)
workflow.add_node("synthesis", synthesis_agent)

workflow.set_entry_point("target")

# Define edges
workflow.add_edge("target", "landscape")
workflow.add_edge("landscape", "abstract")
workflow.add_edge("abstract", "base")
workflow.add_edge("base", "base_soln")
workflow.add_edge("base_soln", "analogy")
workflow.add_edge("analogy", "synthesis")

workflow.set_finish_point("synthesis")

graph = workflow.compile()

### Test my brainstorm prompt

In [21]:
test_q = "I have an idea that I want to brainstorm with you. I'm not sure what problem I'm going to solve yet, but this is still something interesting to me. For background, I worked on a project called Anonymix, where we take a life story interview transcript from different individuals, convert these textual 'personas' to persona graph (with different types of nodes), and then identified graph clusters using louvain's method to shuffle them across personas to create shuffled persona. This preserves privacy while still making sure that individual agents are sufficiently realistic. Now, I'm going to work at whyhow.ai which is going to use knowledge graph RAG for enterprise use cases like legal and healthcare where accuracy and relevance of information retrieval is of paramount importance. I realised that graphs - knowledge graph in this case - are amazing tools that I want to learn more about. Specifically, I want to understand deeply, at a fundamental level, what *possibilities* it enables, and where might I be able to apply this to. For example, I guess I can take any sort of unstructured input and KG can construct a structured representation of them in a non-hierarchical manner. This is also the way that Roam Research and Logseq works and the core of their value proposition in terms of how they differ as a personal knowledge management system as compared to the hierarchical way of organising information that Notion or Evernote adopts. I can even imagine an enterprise or interpersonal use case, where IF everyone uses the same diary app that's private to themselves but me as the 'admin' behind the scene could see everything, then I could 'engineer serendipity' from, for example, identifying and suggesting to individuals with this same community that 'hey, person X is also thinking along similar lines as what you're thinking (but with a twist!). It'd be good to go find him for a chat, I'm sure you'll love it!'. This is 'engineered serendipity' that's still sufficiently serendipitous nevertheless because, for example, it's not just matching people based on what they PUBLICLY put out there but based on what they privately notes. Specifically, it tries to identify the common underlying 'values' and 'nuances' behind these private notes instead of connecting individuals based on the exact content of the private notes themselves. However, for this to work, I guess there need to have a common input channel that everything in the community would use, and I'm not sure how realistic is it going to be. Help me understand from first principles what this tech (representing ... as KG) might enable. The 'engineered serendipity' idea is an example of what I've thought of but it's not too practical yet. Then, after you've given me an in-depth exploration of the possibilities in terms of first principles, help me ideate 3 different project ideas I could actualize on. Give me rationale for why these might be difficult without the tech (representing ... as KG), an why the projects are tackling a real problem (rationale).  Let's explore the possibilities together!"
input_state = {"user_query": test_q}

final_state = graph.invoke(input_state)

In [None]:
## Without analogical reasoning - raw LLM output
result = basic_output(test_q)
print(result)


Absolutely! I love your background and thoughtful brainstorming. This is a rich, cutting-edge domain — knowledge graphs (KGs) unlock possibilities that are fundamentally different from hierarchical or even “flat” representations of information. Below, I'll walk you through a first-principles analysis of what knowledge graphs as a paradigm unlock — then we'll ideate three projects with rationales about why they're hard *without* KGs and what real-world pain they address.

## 1. First Principles: What Do Knowledge Graphs Enable?

Let’s break this down to fundamentals:

### a) Flexible, Multidimensional Structuring

- Traditional hierarchical structures (folders, trees) force you to choose a single “path” or classification for each item. KGs allow entities to have *multiple* relationships simultaneously, simulating how things work in reality (e.g., “Person X” can be a friend, a co-author, a patient, and a competitor at the same time) without duplication.
- Enables polyhierarchies, rich on

### Test Output

In [24]:
# parse_output
import re

def parse_solution(text):
    # Remove Markdown formatting like "**" and "\n"
    clean_text = text.replace("**", "").replace("\\n", "\n")

    # Split into sections by horizontal rules (---)
    sections = re.split(r'-{3,}', clean_text)

    # Create a readable version of each section
    readable_output = []
    for section in sections:
        section = section.strip()
        if not section:
            continue
        # Optional: separate title and body if present
        lines = section.split("\n")
        if len(lines) > 1 and ":" not in lines[0]:
            # First line is likely a heading
            heading = lines[0]
            body = "\n".join(lines[1:])
            readable_output.append(f"\n=== {heading} ===\n{body}")
        else:
            readable_output.append(section)

    return "\n\n".join(readable_output)

In [25]:
# Save all parsed outputs to a text file
with open("sample_output.txt", "w", encoding="utf-8") as f:
    for key in final_state:
        raw_output = str(final_state[key])  # Ensure it's a string
        final_output = parse_solution(raw_output)
        f.write(f"\n### {key} ###\n")
        f.write(final_output)
        f.write("\n\n")

In [26]:
# Generate output without analogical reasoning Pipeline for comparison
def basic_output(user_input: str):
    
    prompt = f"""
    {user_input}
    """

    response = llms.invoke(prompt) 

    # Parse and format the response
    formatted_response = parse_solution(response.content)

    return(formatted_response)

In [19]:
test_q = "Teachers introduced structured group projects using peer evaluations and rotating roles to boost collaboration in science classes. Despite these measures, anonymous student surveys revealed that individual quiz scores post-project dropped compared to solo assignments, signaling diluted accountability. How can educators redesign design a system to ensure measurable individual mastery while preserving the benefits of teamwork?"
result = basic_output(test_q)

## === GPT-4.1 Response Without Analogical Reasoning Pipeline ===
print("\n=== GPT-4.1 Response Without Analogical Reasoning Pipeline ===\n")
print(result)



=== GPT-4.1 Response Without Analogical Reasoning Pipeline ===



In [37]:
# Generate output without analogical reasoning Pipeline BUT WITH COT for comparison
def basic_output_COT(user_input: str):
    
    prompt = f"""
    {user_input}
    Think step by step.
    """

    response = llms.invoke(prompt) 

    # Parse and format the response
    formatted_response = parse_solution(response.content)

    return(formatted_response)

In [38]:
test_q = "Teachers introduced structured group projects using peer evaluations and rotating roles to boost collaboration in science classes. Despite these measures, anonymous student surveys revealed that individual quiz scores post-project dropped compared to solo assignments, signaling diluted accountability. How can educators redesign design a system to ensure measurable individual mastery while preserving the benefits of teamwork?"
result = basic_output_COT(test_q)

print("\n=== GPT-4.1 Response Without Analogical Reasoning Pipeline WITH COT ===\n")
print(result)


=== GPT-4.1 Response Without Analogical Reasoning Pipeline WITH COT ===

Certainly! To address the problem—group work fostering teamwork but possibly reducing individual mastery/accountability—educators should reconsider both the design and assessment of group projects. Here’s a step-by-step approach:

### Step 1: Diagnose the Core Issues
- Observation: Individual post-project quizzes show lower scores than after solo work.
- Analysis: Students may be "hitchhiking" (letting groupmates do more), or division of labor is masking some students’ conceptual gaps.

### Step 2: Set Clear Dual Objectives
- Maintain collaboration (teamwork, communication, problem-solving).
- Ensure individual understanding and accountability (content mastery).

### Step 3: Redesign Project Structure

A. Roles and Responsibilities
- Continue rotating roles but with defined individual deliverables for each phase.
- Example: For a lab report, each student writes a different section (method, results, analysis) tied

In [39]:
# Generate output without analogical reasoning Pipeline BUT WITH PROMPT ENGINEERING for comparison (ask it to balance innovativeness with practicality)
def basic_output_prompt_engin(user_input: str):
    
    prompt = f"""
    {user_input}
    Try to balance practicality with innovation.
    """

    response = llms.invoke(prompt) 

    # Parse and format the response
    formatted_response = parse_solution(response.content)

    return(formatted_response)

In [40]:
test_q = "Teachers introduced structured group projects using peer evaluations and rotating roles to boost collaboration in science classes. Despite these measures, anonymous student surveys revealed that individual quiz scores post-project dropped compared to solo assignments, signaling diluted accountability. How can educators redesign design a system to ensure measurable individual mastery while preserving the benefits of teamwork?"
result = basic_output_prompt_engin(test_q)

print("\n=== GPT-4.1 Response Without Analogical Reasoning Pipeline WITH Prompt Engineering ===\n")
print(result)


=== GPT-4.1 Response Without Analogical Reasoning Pipeline WITH Prompt Engineering ===

Certainly! Balancing individual accountability with authentic collaboration is a common challenge in group work. Here’s a multi-pronged, practical-yet-innovative redesign educators can consider:


=== 1. Hybrid Assessment Model ===

- Personal Assignments Within Group Projects: Structure group projects so that each student is responsible for a distinct sub-task, deliverable, or research component. For example, in a biology project on ecosystems, one might investigate plant species, another animal relationships, etc. Require each member to submit a short individual report, reflection, or data analysis alongside the collective assignment.
- Individual “Defense”: After project completion, each student briefly presents or answers oral/written questions about *their* contribution and the overall project. This spot-checks true comprehension and holds each accountable for both their AND the group's work.
