In [32]:
import getpass
import os
from langgraph.graph import StateGraph, START, END
from typing import TypedDict, List, Dict, Any
import openai 
# from langgraph_supervisor import create_supervisor
from langchain.chat_models import init_chat_model
from textwrap import dedent
import json

In [33]:
# Prompts
TARGET_DOMAIN = dedent("""
        As a Domain Analysis Specialist, extract all the core innovation domains from the user query. It could be a single one for a simple query, or multiple ones for a complex query.
        Instructions:
        1. Analyze the user's input
        2. Identify the primary domain(s) requiring innovation
        3. Classify it within standard innovation categories
        Output Format:
        Target Domain: [Clear, specific domain label]
        Be very detailed and specific in your response and do not generalize. Respond ONLY with the name of the domain, do NOT include ANY other text like 'Target Domain:'.
        User query: {user_query}
""").strip()

PROBLEM_LANDSCAPE = dedent("""
        You are a Problem Landscape Analyst. Your task is to map out the concrete challenges within the target domain identified.
        Instructions:
        1. Identify all the core problems or challenges currently present in these domains. Aim for at least 3 problems per domain
        2. For each problem, provide:
        - Problem: A short, clear title.
        - Description: 2-3 sentences explaining what the problem is and why it matters.
        - Context: Briefly state the circumstances or environment where this problem occurs.
        - Stakeholders: List the main groups or individuals affected.
        - Root Causes: Identify 1-3 underlying causes, if known.
        - Impact: State the significance of the problem (e.g., social, economic, technical).
        - Current Approaches: How is this problem currently addressed?
        - Limitations: What are the shortcomings of current approaches?
        - Success Metrics: How would you measure if this problem is solved?
        - Interconnections: Note if this problem is linked to or influenced by other problems.
        Output Format:
        Present your findings as a structured list or JSON array, with each problem fully described as above.
        Important:
        - Focus on clarity and completeness.
        - Avoid abstracting or generalizing; stay concrete and domain-specific.
        - Do not propose solutions; only describe the current problem landscape.
        Target domain: {target_domain}
""").strip()

ABSTRACTION = dedent("""
You are a TRIZ Methodology Expert. Transform domain-specific problems into universal contradictions.
        Process:
        1. Read up on TRIZ - the contradiction matrix, and the inventive principles
        2. For each problem in the problem landscape:
        - Abstract to universal parameters (what improves vs. what worsens)
        - Express as 'When we improve X, Y worsens'
        - Ensure parameters are domain-agnostic
        3. Analyze all the abstracted universal parameters, and identify all the core TRIZ contradictions present:
        - Select the most fundamental tensions
        - Map to TRIZ contradiction matrix
        - Note applicable inventive principles
        Output:
        # List all the core contradictions in form of:
        - Improving [parameter] vs. Worsening [parameter]
        - TRIZ Principles: [1-3 relevant principles]
        - Innovation Potential: [High/Medium/Low]
        Focus on contradictions that, if resolved, would create breakthrough value.

        Problem landscape: {problem_landscape}
""").strip()

BASE_DOMAIN = dedent("""
        You are a Cross-Domain Search Specialist. Do the following:
        - For each contradiction provided, identify 3 distinct source domains (fields or industries) where this contradiction has been successfully addressed.
        - Experiment with different subsets of the list of contradictions, and see if you could identify 3 distinct source domains for each of these subsets identified as well. You should find at least 3 different subsets.
        Note: The domains should have A CONCEPTUAL DISTANCE OF AT LEAST 3 DISTINCT HOPS FROM WHAT IMMEDIATELY COMES TO MIND. Be creative! It can be domains within spheres like natural, phsyical, social, artistic, or anything.
        For each domain identified, briefly explain why it is relevant to the single contradiction or the subset of contradictions identified. Do not describe specific solutions just yet-only list the domains and your rationale.
        Output:
        A list for each contradiction and subset of contradictions identified, naming 3 relevant domains with a 2 sentence rationale for each.
        Aim for a total of at least 20 relevant base domains. 
        Contradictions: {contradictions}
""").strip()

BASE_SOLUTIONS = dedent("""
        You are a Solution Pattern Extractor. You are provided with an input with 3 base domains identified per TRIZ (Theory of Inventive Problem Solving) contradiction or a set of contradictions, as well as the contradictions themselves.
        For each of these identified base domains, identify one specific, well-documented solution pattern within the domain that effectively resolves the contradiction (or the set of contradictions).
        For each solution pattern, return:
        - Identify the base domain it's corresponding to
        - Recall the contradiction or the set of contradictions that this base domain faces
        - The name or label of the solution pattern for resolving these contradiction(s) in the base domain
        - A detailed description of the core mechanism or principle involved and how it addressed the domain's contradiction(s)
        - The context or situation in the domain where this pattern is applied\n"
        Do not generalize or adapt the solution-simply describe how the contradiction is addressed within each source domain.
        Output:
        For each of the provided domain, list the base domain name, contradiction(s) faced, solution pattern name, the detailed description of the mechanism of the solution pattern, and the context in which it is used. Articulate the contradictions as problems and considerations faced, through framing them as a tension.

        Input: {input}
""").strip()

ANALOGICAL_TRANSFER = dedent("""
        You are a very innovative Analogical Transfer Specialist.
        You are provided with list the base domain name, tensions faced, solution pattern name, the detailed description of the mechanism of the solution pattern, and the context in which it is used.
        Your task is to propose how solution patterns used to resolve these tensions in various base domains might inspire solution framings for the original target domain.

        Input Overview:
        1. A list the base domains identified, the tensions these domains faced, the name of solution patterns that helped addressed these tensions in these base domains, the detailed description of the mechanism of the solution pattern, and the context in which it is used.
        2. The original target domain.

        Instructions:
        For each pair of base domain and the corresponding tensions identified, review the solution patterns that worked for the base domain. For each pattern:
        - Analyze the core mechanism or principle behind the solution.
        - Map and adapt this mechanism conceptually to the target domain, considering the specific context and needs of the target domain.
        - Clearly describe how this analogical transfer could frame a potential solution in the target domain.
        - Highlight any key adaptations, considerations, or limitations that would be relevant when applying this pattern to the target domain.

        Your expected Output:
        For each base domain, provide a comprehensive description of a proposed solution framing for the target domain, including:
        - The original tension addressed
        - The source domain and solution pattern
        - A detailed explanation of how the pattern could inspire or inform a solution in the target domain
        - Any important adaptations or considerations for successful transfer

        Here are the actual inputs:
        - A list the base domains identified, the tensions these domains faced, the name of solution patterns that helped addressed these tensions in these base domains, the detailed description of the mechanism of the solution pattern, and the context in which it is used.: {contradictions_solutions}
        - Original target domain: {target_domain}
""").strip()

In [34]:
# check for openai API
def _set_if_undefined(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"Please provide your {var}")


_set_if_undefined("OPENAI_API_KEY")

In [35]:
# parse_output
import re

def parse_solution(text):
    # Remove Markdown formatting like "**" and "\n"
    clean_text = text.replace("**", "").replace("\\n", "\n")

    # Split into sections by horizontal rules (---)
    sections = re.split(r'-{3,}', clean_text)

    # Create a readable version of each section
    readable_output = []
    for section in sections:
        section = section.strip()
        if not section:
            continue
        # Optional: separate title and body if present
        lines = section.split("\n")
        if len(lines) > 1 and ":" not in lines[0]:
            # First line is likely a heading
            heading = lines[0]
            body = "\n".join(lines[1:])
            readable_output.append(f"\n=== {heading} ===\n{body}")
        else:
            readable_output.append(section)

    return "\n\n".join(readable_output)

In [36]:
# Graph state: workflow
# class ReasoningState(TypedDict):
#     user_query: str
#     target_domain: str
#     problem_landscape: str
#     abstraction: str
#     abstraction_feedback: str
#     base_domain: str
#     base_solutions: str
#     analogical_transfer: str
#     transfer_feedback: str
#     solution: str


class ReasoningState(TypedDict):
    user_query: str
    target_domain: str
    problem_landscape: str
    abstraction: str
    abstraction_feedback: str
    base_domain: str
    base_solutions: str
    analogical_transfer: str
    transfer_feedback: str
    solution: str
    
    # Add for CRIT agents:
    retry_count_abstract: int              # For infinite loop prevention
    retry_count_transfer: int              # For infinite loop prevention
    validated_contradictions: List[str]    # For passing filtered contradictions
    validated_analogical_transfers: List[str]  # For passing filtered transfers

In [37]:
# Can switch between different LLMs 
llms = init_chat_model("openai:gpt-4.1")

In [38]:
# 1st Agent: Identify Target Domain from User Input
def target_domain_agent(state: ReasoningState):
    u = state['user_query']

    msg = llms.invoke(TARGET_DOMAIN.format(user_query=u))

    return {"target_domain": msg.content}

In [39]:
# 2st Agent: Conduct comprehensive research into problem landscape for target domain 
# (i.e. What specific challenges exist in this domain?)
def problem_landscape_agent(state: ReasoningState):
    t = state['target_domain']

    msg = llms.invoke(PROBLEM_LANDSCAPE.format(target_domain=t))

    return {"problem_landscape": msg.content}

In [40]:
# 3rd Agent: Abstract Problems Identified into Generalized Principles and TRIZ Contradiction
def abstraction_agent(state: ReasoningState):
    p = state['problem_landscape']
    feedback = state.get('abstraction_feedback', '')
    
    # If there's feedback, include it in the prompt
    if feedback:
        msg = llms.invoke(ABSTRACTION.format(
            problem_landscape=p,
            feedback=f"\nPrevious attempt feedback: {feedback}\nPlease address these issues in your new abstraction."
        ))
    else:
        msg = llms.invoke(ABSTRACTION.format(problem_landscape=p))
    
    return {"abstraction": msg.content}

In [41]:
## 3.5th agent: CRIT Quality Control for TRIZ Abstraction
def parse_abstraction_output(abstraction_text: str) -> list:
    """Parse the structured abstraction output into individual contradictions."""
    lines = [line.strip() for line in abstraction_text.split('\n') if line.strip()]
    contradictions = []
    
    current_contradiction = None
    for line in lines:
        if line.startswith('- Improving'):
            current_contradiction = {"contradiction": line.strip('- ').strip()}
        elif line.startswith('- TRIZ Principles:') and current_contradiction:
            current_contradiction["principles"] = line.replace('- TRIZ Principles:', '').strip()
        elif line.startswith('- Innovation Potential:') and current_contradiction:
            current_contradiction["potential"] = line.replace('- Innovation Potential:', '').strip()
            contradictions.append(current_contradiction)
            current_contradiction = None
    
    return contradictions

def CRIT_Control_Abstraction_Three_Step(target_domain: str, problem_landscape: str, triz_contradictions: str) -> dict:
    """Three-step CRIT validation workflow for TRIZ contradiction filtering."""
    
    # Parse contradictions from structured output
    parsed_contradictions = parse_abstraction_output(triz_contradictions)
    
    if not parsed_contradictions:
        return {
            "score": 0.0,
            "pass": False,
            "reason": "No valid contradictions found in the input",
            "validated_contradictions": [],
            "filtered_contradictions": []
        }
    
    try:
        # STEP 1: First CRIT Agent - Initial Filtering
        step1_prompt = f"""
# FIRST CRIT AGENT - Initial Socratic Filtering

Apply Professor Chang's CRIT methods to filter TRIZ contradictions into good and bad sets.

## CONTEXT
Target Domain: {target_domain}
Problem Landscape: {problem_landscape}
Contradictions to Evaluate: {parsed_contradictions}

## SOCRATIC FILTERING METHODS

### 1. DEFINITION METHOD
- Format: Does each contradiction follow "Improving [X] vs. Worsening [Y]"?
- Parameters: Are they legitimate TRIZ parameters?
- Abstraction: Are they at first-principles level?

### 2. ELENCHUS METHOD (Cross-Examination)
- Evidence: Can each contradiction trace back to specific problems?
- Logic: Is the causal chain Problem → Root Cause → Contradiction valid?
- Consistency: Any logical gaps in abstraction mapping?

### 3. DIALECTIC METHOD (Counter-Arguments)
- Alternatives: Are there better ways to express the same tension?
- Comparison: Which contradictions capture essential tensions best?
- Weakness: What are the strongest arguments against weak contradictions?

## FILTERING DECISION
Sort contradictions into two sets based on Socratic analysis:

### OUTPUT FORMAT
{{
    "good_contradictions": [
        {{"contradiction": str, "principles": str, "potential": str}}
    ],
    "bad_contradictions": [
        {{"contradiction": str, "principles": str, "potential": str, "reason": str}}
    ],
    "filtering_rationale": str
}}

Apply rigorous Socratic methods. Include rationale for why contradictions are filtered out.
        """
        
        response_1 = llms.invoke(step1_prompt)
        result_1 = parse_json_response(response_1.content)
        
        # STEP 2: Second CRIT Agent - Review and Revision
        step2_prompt = f"""
# SECOND CRIT AGENT - Socratic Review and Revision

Cross-examine the first agent's filtering decisions using CRIT methods.

## FIRST AGENT'S DECISIONS
Good Contradictions: {result_1.get('good_contradictions', [])}
Bad Contradictions: {result_1.get('bad_contradictions', [])}
Filtering Rationale: {result_1.get('filtering_rationale', '')}

## SOCRATIC CROSS-EXAMINATION

### 1. ELENCHUS METHOD (Challenge Decisions)
- Are any "good" contradictions actually flawed? Why?
- Are any "bad" contradictions actually valuable? Why?
- Do the filtering rationales hold up under scrutiny?

### 2. DIALECTIC METHOD (Counter-Arguments)
- Generate counter-arguments to the first agent's filtering rationale
- Test alternative interpretations of contradiction quality
- Challenge assumptions about what makes contradictions "good" or "bad"

### 3. MAIEUTICS METHOD (Surface Hidden Assumptions)
- What unstated assumptions influenced the first agent's decisions?
- Are there valid contradictions being unfairly penalized?
- What criteria were prioritized and why?

## REVIEW DECISION
Based on Socratic cross-examination:

### OUTPUT FORMAT
{{
    "revised_good_contradictions": [
        {{"contradiction": str, "principles": str, "potential": str}}
    ],
    "revised_bad_contradictions": [
        {{"contradiction": str, "principles": str, "potential": str, "reason": str}}
    ],
    "revision_rationale": str,
    "revisions_made": boolean
}}

If no revisions needed, return original sets with revisions_made: false.
        """
        
        response_2 = llms.invoke(step2_prompt)
        result_2 = parse_json_response(response_2.content)
        
        # STEP 3: First CRIT Agent - Final Synthesis
        step3_prompt = f"""
# FIRST CRIT AGENT - Final Synthesis

Synthesize the second agent's review with your own analysis to create final contradiction sets.

## SECOND AGENT'S REVIEW
Revised Good Contradictions: {result_2.get('revised_good_contradictions', [])}
Revised Bad Contradictions: {result_2.get('revised_bad_contradictions', [])}
Revision Rationale: {result_2.get('revision_rationale', '')}
Revisions Made: {result_2.get('revisions_made', False)}

## FINAL SOCRATIC SYNTHESIS

THINK ABOUT THE FOLLOWING
### 1. DIALECTIC INTEGRATION
- Consider the second agent's counter-arguments seriously
- Weigh competing interpretations of contradiction quality
- Resolve any remaining tensions between different quality standards

### 2. ELENCHUS VALIDATION
- Cross-examine the final decisions one more time
- Ensure logical consistency in the final sets
- Verify that filtering decisions are well-justified

### 3. PRACTICAL WISDOM
- Balance perfectionism with progress
- Ensure adequate contradictions for next pipeline stage
- Prioritize contradictions that best serve the target domain

## FINAL DECISION (MANDATORY)
Provide definitive contradiction sets for pipeline progression:

### OUTPUT FORMAT
{{
    "final_good_contradictions": [
        {{"contradiction": str, "principles": str, "potential": str}}
    ],
    "final_bad_contradictions": [
        {{"contradiction": str, "principles": str, "potential": str}}
    ],
    "synthesis_complete": boolean
}}

Must complete synthesis. Final decision required.
        """
        
        response_3 = llms.invoke(step3_prompt)
        result_3 = parse_json_response(response_3.content)
        
        # Extract final results
        final_good = result_3.get('final_good_contradictions', [])
        final_bad = result_3.get('final_bad_contradictions', [])
        
        # Convert to simple list format for pipeline compatibility
        validated_contradictions = [item['contradiction'] for item in final_good if 'contradiction' in item]
        filtered_contradictions = [item['contradiction'] for item in final_bad if 'contradiction' in item]
        
        # Calculate final pass/fail
        if len(validated_contradictions) >= 2:  # Minimum viable set
            final_score = 8.0
            final_pass = True
            final_reason = ""
        else:
            final_score = 6.0
            final_pass = False
            final_reason = f"Insufficient contradictions after 3-step CRIT filtering: {len(validated_contradictions)}"
        
        return {
            "score": final_score,
            "pass": final_pass,
            "reason": final_reason,
            "validated_contradictions": validated_contradictions,
            "filtered_contradictions": filtered_contradictions,
            "three_step_summary": {
                "step1_good_count": len(result_1.get('good_contradictions', [])),
                "step1_bad_count": len(result_1.get('bad_contradictions', [])),
                "step2_revisions_made": result_2.get('revisions_made', False),
                "step3_final_good_count": len(validated_contradictions),
                "step3_final_bad_count": len(filtered_contradictions)
            }
        }
        
    except Exception as e:
        return {
            "score": 0.0,
            "pass": False,
            "reason": f"Three-step CRIT workflow failed: {str(e)}",
            "validated_contradictions": [],
            "filtered_contradictions": []
        }

def parse_json_response(response_text: str) -> dict:
    """Enhanced JSON parser for three-step CRIT responses."""
    try:
        # Find JSON in response
        start_idx = response_text.find('{')
        end_idx = response_text.rfind('}') + 1
        if start_idx >= 0 and end_idx > start_idx:
            json_str = response_text[start_idx:end_idx]
            return json.loads(json_str)
        return {"error": "No valid JSON found"}
    except Exception as e:
        return {"error": f"JSON parsing failed: {str(e)}"}


In [42]:
## 3.5th Agent Logic Flow: if quality control passes, continue to next agent. Else, loop back to retry TRIZ abstraction (taking rationale for why it failed as additional input)
def should_continue_to_base(state: ReasoningState) -> str:
    """
    Revised control flow for abstraction. Determines whether to continue to the 
    base domain or retry abstraction, with a retry limit to prevent infinite loops.
    """
    # Check retry limit FIRST to prevent getting stuck
    retry_count = state.get('retry_count_abstract', 0)
    if retry_count >= 2:  # Max 3 attempts (0, 1, 2)
        print(f"Max retries ({retry_count}) reached for abstraction. Forcing progression to base domain agent.")
        # To prevent downstream errors, pass the last-known validated list or an empty one
        if 'validated_contradictions' not in state:
             state['validated_contradictions'] = []
        return "base"

    # If limit is not reached, run the CRIT validation
    crit_result = CRIT_Control_Abstraction(
        state['target_domain'],
        state['problem_landscape'],
        state['abstraction']
    )
    
    passed = crit_result.get('pass', False)
    
    if passed:
        # IMPORTANT: Update state with the validated contradictions for the next agent
        state['validated_contradictions'] = crit_result.get('validated_contradictions', [])
        return "base"
    else:
        # If CRIT fails, increment retry counter and add feedback for the next loop
        state['retry_count_abstract'] = retry_count + 1
        state['abstraction_feedback'] = crit_result.get('reason', '')
        print(f"Abstraction CRIT failed (attempt {state['retry_count_abstract']}): {crit_result.get('reason', '')}")
        return "abstract"



In [43]:
# 4th Agent: Search for Appropriate Base Domains
def base_domain_agent(state: ReasoningState):
    a = state['abstraction']

    msg = llms.invoke(BASE_DOMAIN.format(contradictions=a))

    return {"base_domain": msg.content}

In [44]:
# 5th Agent: Identify Solution in Base Domain
def base_solution_agent(state: ReasoningState):
    b = state['base_domain']

    msg = llms.invoke(BASE_SOLUTIONS.format(input=b))

    return {"base_solutions": msg.content}

In [45]:
# 6th Agent: Base Domain Solution Informing Target Domain Solution
def analogical_transfer_agent(state: ReasoningState):
    if "base_solutions" not in state:
        raise ValueError("Missing 'base_solutions' key. Check if previous node returned it.")
    b = state['base_solutions']
    t = state['target_domain']
    feedback = state.get('transfer_feedback', '')
    
    # If there's feedback, include it in the prompt
    if feedback:
        msg = llms.invoke(ANALOGICAL_TRANSFER.format(
            contradictions_solutions=b,
            target_domain=t,
            feedback=f"\nPrevious attempt feedback: {feedback}\nPlease address these issues in your new analogical transfer."
        ))
    else:
        msg = llms.invoke(ANALOGICAL_TRANSFER.format(
            contradictions_solutions=b,
            target_domain=t
        ))
    
    return {"analogical_transfer": msg.content}

In [46]:
# 6.5th Agent: CRIT Quality Control for Analogical Reasoning 
## Helper function to parse previous output
def parse_analogical_transfer_output(transfer_text: str) -> list:
    """Parse the analogical transfer output into individual transfer solutions."""
    lines = [line.strip() for line in transfer_text.split('\n') if line.strip()]
    transfers = []
    
    current_transfer = None
    for line in lines:
        # Look for transfer headers or solution descriptions
        if any(keyword in line.lower() for keyword in ['transfer', 'solution', 'approach', 'mechanism']):
            if current_transfer and len(current_transfer.get('description', '')) > 50:
                transfers.append(current_transfer)
            current_transfer = {"transfer": line.strip()}
            current_transfer["description"] = ""
        elif current_transfer:
            # Accumulate description
            current_transfer["description"] += " " + line.strip()
    
    # Add final transfer if exists
    if current_transfer and len(current_transfer.get('description', '')) > 50:
        transfers.append(current_transfer)
    
    # Fallback: split by paragraphs if structured parsing fails
    if not transfers:
        paragraphs = transfer_text.split('\n\n')
        for i, paragraph in enumerate(paragraphs):
            if len(paragraph.strip()) > 100:  # Minimum length for meaningful transfer
                transfers.append({
                    "transfer": f"Transfer Solution {i+1}",
                    "description": paragraph.strip()
                })
    
    return transfers

## CRIT control agent - debate 3 steps to ensure quality
def CRIT_Control_Analogical_Transfer_Three_Step(target_domain: str, original_contradictions: str, base_solutions: str, analogical_transfers: str) -> dict:
    """Three-step CRIT validation workflow for analogical transfer filtering."""
    
    # Parse transfers from output
    parsed_transfers = parse_analogical_transfer_output(analogical_transfers)
    
    if not parsed_transfers:
        return {
            "score": 0.0,
            "pass": False,
            "reason": "No valid analogical transfers found in the input",
            "validated_transfers": [],
            "filtered_transfers": []
        }
    
    try:
        # STEP 1: First CRIT Agent - Initial Transfer Filtering
        step1_prompt = f"""
# FIRST CRIT AGENT - Initial Socratic Transfer Filtering

Apply Professor Chang's CRIT methods to filter analogical transfers into good and bad sets.

## CONTEXT
Target Domain: {target_domain}
Original Contradictions: {original_contradictions}
Base Solutions: {base_solutions}
Transfers to Evaluate: {parsed_transfers}

## SOCRATIC FILTERING METHODS

### 1. DEFINITION METHOD
- Mechanism: Does each transfer preserve core solution patterns from base domains?
- Parameters: Are key elements from base domain mapped correctly to target domain?
- Abstraction: Is transfer at appropriate level (not too superficial, not too abstract)?

### 2. ELENCHUS METHOD (Cross-Examination)
- Evidence: Can each transfer trace back clearly from base solution to target application?
- Logic: Is the causal chain Base Mechanism → Transfer Logic → Target Solution valid?
- Consistency: Do transfers actually address the original contradictions identified?

### 3. DIALECTIC METHOD (Counter-Arguments)
- Alternatives: Are there better ways to transfer the same base mechanisms?
- Comparison: Which transfers best preserve essential solution patterns?
- Weakness: What are strongest arguments against weak or superficial transfers?

## FILTERING DECISION
Sort transfers into two sets based on Socratic analysis:

### OUTPUT FORMAT
{{
    "good_transfers": [
        {{"transfer": str, "description": str}}
    ],
    "bad_transfers": [
        {{"transfer": str, "description": str, "reason": str}}
    ],
    "filtering_rationale": str
}}

Apply rigorous Socratic methods. Focus on mechanism fidelity and target domain feasibility.
        """
        
        response_1 = llms.invoke(step1_prompt)
        result_1 = parse_json_response(response_1.content)
        
        # STEP 2: Second CRIT Agent - Review and Revision
        step2_prompt = f"""
# SECOND CRIT AGENT - Socratic Transfer Review and Revision

Cross-examine the first agent's transfer filtering decisions using CRIT methods.

## FIRST AGENT'S DECISIONS
Good Transfers: {result_1.get('good_transfers', [])}
Bad Transfers: {result_1.get('bad_transfers', [])}
Filtering Rationale: {result_1.get('filtering_rationale', '')}

## SOCRATIC CROSS-EXAMINATION

### 1. ELENCHUS METHOD (Challenge Transfer Decisions)
- Are any "good" transfers actually flawed in mechanism preservation?
- Are any "bad" transfers actually valuable but underestimated?
- Do the filtering rationales properly assess implementation feasibility?

### 2. DIALECTIC METHOD (Counter-Arguments to First Agent)
- Generate counter-arguments to the first agent's transfer quality assessments
- Test alternative interpretations of what makes effective analogical transfer
- Challenge assumptions about mechanism preservation vs. creative adaptation

### 3. MAIEUTICS METHOD (Surface Hidden Transfer Assumptions)
- What unstated assumptions influenced transfer quality judgments?
- Are there valid transfers being unfairly penalized for creativity?
- What criteria should prioritize: fidelity to base vs. target domain fit?

## REVIEW DECISION
Based on Socratic cross-examination of transfer quality:

### OUTPUT FORMAT
{{
    "revised_good_transfers": [
        {{"transfer": str, "description": str}}
    ],
    "revised_bad_transfers": [
        {{"transfer": str, "description": str, "reason": str}}
    ],
    "revision_rationale": str,
    "revisions_made": boolean
}}

If no revisions needed, return original sets with revisions_made: false.
        """
        
        response_2 = llms.invoke(step2_prompt)
        result_2 = parse_json_response(response_2.content)
        
        # STEP 3: First CRIT Agent - Final Transfer Synthesis
        step3_prompt = f"""
# FIRST CRIT AGENT - Final Transfer Synthesis

Synthesize the second agent's review with your own analysis to create final transfer sets.

## SECOND AGENT'S REVIEW
Revised Good Transfers: {result_2.get('revised_good_transfers', [])}
Revised Bad Transfers: {result_2.get('revised_bad_transfers', [])}
Revision Rationale: {result_2.get('revision_rationale', '')}
Revisions Made: {result_2.get('revisions_made', False)}

## FINAL SOCRATIC SYNTHESIS

### 1. DIALECTIC INTEGRATION
- Consider the second agent's counter-arguments about transfer quality seriously
- Weigh competing interpretations of effective analogical transfer
- Resolve tensions between mechanism fidelity and target domain adaptation

### 2. ELENCHUS VALIDATION
- Cross-examine the final transfer decisions one more time
- Ensure logical consistency between transfers and original contradictions
- Verify that transfer mechanisms are implementable in target domain

### 3. METHOD OF MAIEUTICS (Midwife Method)
- Draw out the inherent wisdom about transfer quality revealed through agent dialogue
- Surface the essential understanding about target domain needs that has emerged
- Help bring forth the knowledge about which transfers best serve analogical reasoning

## FINAL DECISION (MANDATORY)
Provide definitive transfer sets for synthesis agent:

### OUTPUT FORMAT
{{
    "final_good_transfers": [
        {{"transfer": str, "description": str}}
    ],
    "final_bad_transfers": [
        {{"transfer": str, "description": str}}
    ],
    "synthesis_complete": boolean
}}

Must complete synthesis. Final decision required for pipeline progression.
        """
        
        response_3 = llms.invoke(step3_prompt)
        result_3 = parse_json_response(response_3.content)
        
        # Extract final results
        final_good = result_3.get('final_good_transfers', [])
        final_bad = result_3.get('final_bad_transfers', [])
        
        # Convert to simple list format for pipeline compatibility
        validated_transfers = [item['description'] for item in final_good if 'description' in item]
        filtered_transfers = [item['description'] for item in final_bad if 'description' in item]
        
        # Calculate final pass/fail
        if len(validated_transfers) >= 2:  # Minimum viable transfer set
            final_score = 8.0
            final_pass = True
            final_reason = ""
        else:
            final_score = 6.0
            final_pass = False
            final_reason = f"Insufficient quality transfers after 3-step CRIT filtering: {len(validated_transfers)}"
        
        return {
            "score": final_score,
            "pass": final_pass,
            "reason": final_reason,
            "validated_transfers": validated_transfers,
            "filtered_transfers": filtered_transfers,
            "three_step_summary": {
                "step1_good_count": len(result_1.get('good_transfers', [])),
                "step1_bad_count": len(result_1.get('bad_transfers', [])),
                "step2_revisions_made": result_2.get('revisions_made', False),
                "step3_final_good_count": len(validated_transfers),
                "step3_final_bad_count": len(filtered_transfers)
            }
        }
        
    except Exception as e:
        return {
            "score": 0.0,
            "pass": False,
            "reason": f"Three-step transfer CRIT workflow failed: {str(e)}",
            "validated_transfers": [],
            "filtered_transfers": []
        }

def parse_json_response(response_text: str) -> dict:
    """Enhanced JSON parser for three-step CRIT responses."""
    try:
        # Find JSON in response
        start_idx = response_text.find('{')
        end_idx = response_text.rfind('}') + 1
        if start_idx >= 0 and end_idx > start_idx:
            json_str = response_text[start_idx:end_idx]
            return json.loads(json_str)
        return {"error": "No valid JSON found"}
    except Exception as e:
        return {"error": f"JSON parsing failed: {str(e)}"}


In [47]:
# Routing function for workflow integration
def should_continue_to_synthesis(state: ReasoningState) -> str:
    """
    Control flow for transfer validation. Determines whether to proceed to synthesis
    or retry, with a retry limit to prevent infinite loops.
    """
    # Check retry limit FIRST to break potential loops
    retry_count = state.get('retry_count_transfer', 0)
    if retry_count >= 2:  # Allow up to 2 retries (3 total attempts)
        print(f"Max retries ({retry_count}) reached for analogical transfer. Forcing progression to synthesis.")
        # To avoid errors, pass the last known (unfiltered) transfers to the synthesis agent
        state['validated_analogical_transfers'] = parse_analogical_transfer_output(state['analogical_transfer'])
        return "synthesis"

    # If within limits, run CRIT validation
    crit_result = CRIT_Control_Analogical_Transfer_Three_Step(
        state['target_domain'],
        state.get('validated_contradictions', state['abstraction']), # Use validated contradictions if available
        state['base_solutions'],
        state['analogical_transfer']
    )
    
    passed = crit_result.get('pass', False)
    
    if passed:
        # On success, update the state with the validated transfers for the synthesis agent
        state['validated_analogical_transfers'] = crit_result.get('validated_transfers', [])
        return "synthesis"
    else:
        # On failure, increment the retry counter and add feedback for the next loop
        state['retry_count_transfer'] = retry_count + 1
        state['transfer_feedback'] = crit_result.get('reason', '')
        print(f"Transfer CRIT failed (attempt {state['retry_count_transfer']}): {crit_result.get('reason', '')}")
        return "analogical_transfer"

 


In [48]:
# 7th Agent: Summarize everything and respond to the question
def synthesis_agent(state: ReasoningState):
    msg = llms.invoke(
        f"Evaluate the proposed analogical solutions. Find the best ones that balances practicality with innovation. Then, provide a detailed, well-structured response that addresses all aspects of the query.\n\n"
        f"Problem: {state['user_query']}\n"
        f"Analogical Solutions: {state['analogical_transfer']}"
        f"In your output, remember to abstract away the analogy itself such that it is focused on responding to the user input."
        f"Also, check if the users are requesting a specific number of possible solutions. Make sure to answer the user's query in full and provide what is requested."
    )
    return {"solution": msg.content}

In [49]:
# Construct the workflow 
workflow = StateGraph(ReasoningState)

workflow.add_node("target", target_domain_agent)
workflow.add_node("landscape", problem_landscape_agent)
workflow.add_node("abstract", abstraction_agent)
workflow.add_node("base", base_domain_agent)
workflow.add_node("base_soln", base_solution_agent)
workflow.add_node("analogy", analogical_transfer_agent)
workflow.add_node("synthesis", synthesis_agent)

workflow.set_entry_point("target")

# Define edges
workflow.add_edge("target", "landscape")
workflow.add_edge("landscape", "abstract")
# workflow.add_edge("abstract", "base")

# Define conditional edges - Use CRIT TO control quality for Abstraction TRIZ
workflow.add_conditional_edges(
    "abstract",
    should_continue_to_base,
    {
        "base": "base",
        "abstract": "abstract" 
    }
)

workflow.add_edge("base", "base_soln")
workflow.add_edge("base_soln", "analogy")
#workflow.add_edge("analogy", "synthesis")

# Define conditional edges - Use CRIT TO control quality for Analogical Transfer
workflow.add_conditional_edges(
    "analogy",
    should_continue_to_synthesis,
    {
        "synthesis": "synthesis",
        "analogy": "analogy"
    }
)

workflow.set_finish_point("synthesis")

graph = workflow.compile()

In [25]:
def analogical_output(user_input: str) -> str:
    input_state = {"user_query": user_input}

    final_state = graph.invoke(input_state)

    for key in final_state:
        raw_output = str(final_state[key])  # Ensure it's a string
        final_output = parse_solution(raw_output)

    return final_output

### Initialize Different Functions for Prompts

In [50]:
# Basic Output
def basic_output(user_input: str):
    
    prompt = f"""
    {user_input}
    """

    response = llms.invoke(prompt) 

    # Parse and format the response
    formatted_response = parse_solution(response.content)

    return(formatted_response)

In [51]:
# Basic Output with COT
## TO EDIT WITH FULL PROMPT?
def basic_output_COT(user_input: str):
    
    prompt = f"""
    {user_input}
    Think step by step.
    """

    response = llms.invoke(prompt) 

    # Parse and format the response
    formatted_response = parse_solution(response.content)

    return(formatted_response)

In [52]:
# Basic Output with Prompt Engineering
def basic_output_prompt_engin(user_input: str):
    
    prompt = f"""
    {user_input}
    Try to balance practicality with innovation.
    """

    response = llms.invoke(prompt) 

    # Parse and format the response
    formatted_response = parse_solution(response.content)

    return(formatted_response)

In [7]:
def basic_output_full_COT(user_input: str):
    prompt = f"""
    {user_input}
    Step one: As a Domain Analysis Specialist, extract all the core innovation domains from the user query. It could be a single one for a simple query, or multiple ones for a complex query.
        Instructions:
        1. Analyze the user's input
        2. Identify the primary domain(s) requiring innovation
        3. Classify it within standard innovation categories
        Output Format:
        Target Domain: [Clear, specific domain label]
        Be very detailed and specific in your response and do not generalize. Respond ONLY with the name of the domain, do NOT include ANY other text like 'Target Domain:'.
    Step two: You are a Problem Landscape Analyst. Your task is to map out the concrete challenges within the target domain identified.
        Instructions:
        1. Identify all the core problems or challenges currently present in these domains. Aim for at least 3 problems per domain
        2. For each problem, provide:
        - Problem: A short, clear title.
        - Description: 2-3 sentences explaining what the problem is and why it matters.
        - Context: Briefly state the circumstances or environment where this problem occurs.
        - Stakeholders: List the main groups or individuals affected.
        - Root Causes: Identify 1-3 underlying causes, if known.
        - Impact: State the significance of the problem (e.g., social, economic, technical).
        - Current Approaches: How is this problem currently addressed?
        - Limitations: What are the shortcomings of current approaches?
        - Success Metrics: How would you measure if this problem is solved?
        - Interconnections: Note if this problem is linked to or influenced by other problems.
        Output Format:
        Present your findings as a structured list or JSON array, with each problem fully described as above.
        Important:
        - Focus on clarity and completeness.
        - Avoid abstracting or generalizing; stay concrete and domain-specific.
        - Do not propose solutions; only describe the current problem landscape.
    Step three: You are a TRIZ Methodology Expert. Transform domain-specific problems into universal contradictions.
        Process:
        1. Read up on TRIZ - the contradiction matrix, and the inventive principles
        2. For each problem in the problem landscape:
        - Abstract to universal parameters (what improves vs. what worsens)
        - Express as 'When we improve X, Y worsens'
        - Ensure parameters are domain-agnostic
        3. Analyze all the abstracted universal parameters, and identify all the core TRIZ contradictions present:
        - Select the most fundamental tensions
        - Map to TRIZ contradiction matrix
        - Note applicable inventive principles
        Output:
        # List all the core contradictions in form of:
        - Improving [parameter] vs. Worsening [parameter]
        - TRIZ Principles: [1-3 relevant principles]
        - Innovation Potential: [High/Medium/Low]
        Focus on contradictions that, if resolved, would create breakthrough value.
    Step four: You are a Cross-Domain Search Specialist. Do the following:
        - For each contradiction provided, identify 3 distinct source domains (fields or industries) where this contradiction has been successfully addressed.
        - Experiment with different subsets of the list of contradictions, and see if you could identify 3 distinct source domains for each of these subsets identified as well. You should find at least 3 different subsets.
        Note: The domains should have A CONCEPTUAL DISTANCE OF AT LEAST 3 DISTINCT HOPS FROM WHAT IMMEDIATELY COMES TO MIND. Be creative! It can be domains within spheres like natural, phsyical, social, artistic, or anything.
        For each domain identified, briefly explain why it is relevant to the single contradiction or the subset of contradictions identified. Do not describe specific solutions just yet-only list the domains and your rationale.
        Output:
        A list for each contradiction and subset of contradictions identified, naming 3 relevant domains with a 2 sentence rationale for each.
        Aim for a total of at least 20 relevant base domains. 
    Step five: You are a Solution Pattern Extractor. You are provided with an input with 3 base domains identified per TRIZ (Theory of Inventive Problem Solving) contradiction or a set of contradictions, as well as the contradictions themselves.
        For each of these identified base domains, identify one specific, well-documented solution pattern within the domain that effectively resolves the contradiction (or the set of contradictions).
        For each solution pattern, return:
        - Identify the base domain it's corresponding to
        - Recall the contradiction or the set of contradictions that this base domain faces
        - The name or label of the solution pattern for resolving these contradiction(s) in the base domain
        - A detailed description of the core mechanism or principle involved and how it addressed the domain's contradiction(s)
        - The context or situation in the domain where this pattern is applied\n"
        Do not generalize or adapt the solution-simply describe how the contradiction is addressed within each source domain.
        Output:
        For each of the provided domain, list the base domain name, contradiction(s) faced, solution pattern name, the detailed description of the mechanism of the solution pattern, and the context in which it is used. Articulate the contradictions as problems and considerations faced, through framing them as a tension.
    Step six:         You are a very innovative Analogical Transfer Specialist.
        You are provided with list the base domain name, tensions faced, solution pattern name, the detailed description of the mechanism of the solution pattern, and the context in which it is used.
        Your task is to propose how solution patterns used to resolve these tensions in various base domains might inspire solution framings for the original target domain.

        Input Overview:
        1. A list the base domains identified, the tensions these domains faced, the name of solution patterns that helped addressed these tensions in these base domains, the detailed description of the mechanism of the solution pattern, and the context in which it is used.
        2. The original target domain.

        Instructions:
        For each pair of base domain and the corresponding tensions identified, review the solution patterns that worked for the base domain. For each pattern:
        - Analyze the core mechanism or principle behind the solution.
        - Map and adapt this mechanism conceptually to the target domain, considering the specific context and needs of the target domain.
        - Clearly describe how this analogical transfer could frame a potential solution in the target domain.
        - Highlight any key adaptations, considerations, or limitations that would be relevant when applying this pattern to the target domain.

        Your expected Output:
        For each base domain, provide a comprehensive description of a proposed solution framing for the target domain, including:
        - The original tension addressed
        - The source domain and solution pattern
        - A detailed explanation of how the pattern could inspire or inform a solution in the target domain
        - Any important adaptations or considerations for successful transfer
    Step seven: Evaluate the proposed analogical solutions. Find the best ones that balances practicality with innovation. Then, provide a detailed, well-structured response that addresses all aspects of the query.
        In your output, remember to abstract away the analogy itself such that it is focused on responding to the user input.
        Also, check if the users are requesting a specific number of possible solutions. Make sure to answer the user's query in full and provide what is requested.   
    """
    response = llms.invoke(prompt) 

    # Parse and format the response
    formatted_response = parse_solution(response.content)

    return(formatted_response)

### Test prompt 1 - Education

#### User Input

In [53]:
test_q = "Teachers introduced structured group projects using peer evaluations and rotating roles to boost collaboration in science classes. Despite these measures, anonymous student surveys revealed that individual quiz scores post-project dropped compared to solo assignments, signaling diluted accountability. How can educators redesign design a system to ensure measurable individual mastery while preserving the benefits of teamwork?"

#### With Analogical Reasoning

In [54]:
response = analogical_output(test_q)

In [56]:
print(response)

Certainly! The central challenge described is ensuring measurable individual mastery (fairness, accountability) within collaborative science projects—where teamwork and shared learning are essential, yet "free-riding" or dilution of individual responsibility can undermine both scores and student motivation. To address this, let's analyze and synthesize the most practical and innovative solutions from the analogies above, focusing on those with high impact and feasibility for secondary science classes.


=== ## Best Solution Approaches ===

### 1. Integrated Dual-Mode Assessment System

How It Works:
- Baseline Individual Assessment: Each student submits individual "artifacts" related to the group project: e.g., concept explanations, reflection logs, or quiz responses demonstrating their personal understanding of the project's scientific principles.
- Collaborative Product Assessment: The group’s shared output—lab report, model, presentation—is assessed for core competencies (experiment

In [None]:
final_state = graph.invoke()

In [57]:
with open("test1.txt", "w", encoding="utf-8") as f:
    f.write(response)
    f.write("\n\n")


In [61]:
# # Save all parsed outputs to a text file
# with open("sample_output.txt", "w", encoding="utf-8") as f:
#     for key in final_state:
#         raw_output = str(final_state[key])  # Ensure it's a string
#         final_output = parse_solution(raw_output)
#         f.write(f"\n### {key} ###\n")
#         f.write(final_output)
#         f.write("\n\n")

#### Without Analogical Reasoning - GPT 4.1 RAW

In [58]:
## Without analogical reasoning - raw LLM output
result = basic_output(test_q)

print("\n=== GPT-4.1 RAW Response ===\n")
print(result)



=== GPT-4.1 RAW Response ===

This scenario highlights a common tension: collaborative learning can build important skills, but it sometimes leads to “social loafing”—students may rely on peers and engage less deeply with content, hurting individual learning as measured by solo assessments.

To address this, educators can redesign group work with strategies that emphasize both individual mastery and authentic collaboration:


=== ### 1. Integrate Individual Accountability into Group Projects ===

Possible structures:
- “Jigsaw” Method: Each student is responsible for mastering (and teaching) a distinct part of the project. Individually assess their knowledge of their assigned piece and/or others’ pieces.
- Individual Deliverables: Alongside the group product, require each student to submit a reflection, solution, or section independently (graded separately).
- Embedded Quizzes/Check-Ins: After group work, administer short individual quizzes focused on the same content—count these for 

#### Without Analogical Reasoning but with COT

In [59]:
## Without analogical reasoning - raw LLM output
result = basic_output_COT(test_q)

print("\n=== GPT-4.1 Response WITH COT ===\n")
print(result)



=== GPT-4.1 Response WITH COT ===

Certainly! Here’s a structured step-by-step approach to redesign the group project system so it preserves teamwork benefits and ensures clear individual accountability:

### Step 1: Diagnose the Issue
- Problem Identified: Students are benefitting less individually from group projects (as shown by lower quiz scores).
- Possible Cause: Some students may rely too much on peers ("social loafing") or not engage deeply enough, diluting individual learning.

### Step 2: Define Goals
- Measurable Individual Mastery: Each student should demonstrate understanding of key concepts.
- Teamwork Benefits: Students should still practice collaboration, communication, and peer learning.

### Step 3: Introduce Individual Accountability Measures

#### a) Hybrid Assessment Design
- Group Output: Keep the collaborative project work.
- Individual Output: Require each student to submit a short, related individual assignment (e.g., reflection, explanation, or mini-report de

#### Without Analogical Reasoning but with Basic Prompt Engineering

In [62]:
## Without analogical reasoning - raw LLM output
result = basic_output_prompt_engin(test_q)

print("\n=== GPT-4.1 Response WITH Prompt Engineering ===\n")
print(result)


=== GPT-4.1 Response WITH Prompt Engineering ===

You’re confronting a classic challenge in collaborative learning: fostering teamwork without sacrificing individual accountability and measurable mastery. Here are several practical yet innovative redesign strategies:


=== ### 1. Hybrid Assessment Model ===
Combine group and individual assessments within each project cycle:
- Individual Prep Quizzes: Before group work, give a short quiz on key concepts. This ensures baseline understanding.
- Group Project: Proceed with collaborative tasks, rotating roles to ensure equity.
- Post-Project Individual Reflection/Quiz: After the group work, require each student to submit an individual assignment (e.g., analysis, quiz, application task) directly tied to the project. This can count significantly toward their grade.


=== ### 2. “Jigsaw Plus” Structure ===
Modify the classic jigsaw strategy:
- Assign each student a unique subtopic to master and teach their team.
- Peer-teaching accountability

## Automatic Evaluation

In [2]:
# Helper Functions
import json
import pandas as pd

# function to import the evaluation questions
def import_questions(file_name: str) -> list[str]:
    with open(file_name, 'r') as file:
        data = json.load(file)
    
    problems = [case['problem_description'] for case in data['cases']]
    return problems

# For each question, get outputs from four/five different agents 
# function to load the agents 
def auto_eval(question: str) -> list[str]:
    response = []
    response.append(analogical_output(question))
    response.append(basic_output(question))
    response.append(basic_output_COT(question))
    response.append(basic_output_prompt_engin(question))
    response.append(basic_output_full_COT(question))

    return response

def auto_eval_batch(questions: list[str]) -> list[list[str]]:
    responses = []
    for question in questions:
        response = auto_eval(question)
        responses.append(response)
    return responses

def llm_judge(user_query: str, response: list[str]) -> pd.DataFrame: 
    response_1 = response[0]
    response_2 = response[1]
    response_3 = response[2]
    response_4 = response[3]
    response_5 = response[4]

    prompt = f"""
    Here is the user query: {user_query}
    I will give you 5 sample responses generated using different reasoning methods. I need you to evaluate them side by side on 2 criteria: innovativeness (insightfulness) and practicality. 
    Score each criterion from 1 to 10, and return the result **strictly in the following JSON format**:

    {{
      "Response A": {{"Innovativeness": int, "Practicality": int}},
      "Response B": {{"Innovativeness": int, "Practicality": int}},
      "Response C": {{"Innovativeness": int, "Practicality": int}},
      "Response D": {{"Innovativeness": int, "Practicality": int}}
      "Response E": {{"Innovativeness": int, "Practicality": int}}
    }}

    User Query: {user_query}
    
    Response A: {response[0]}
    
    Response B: {response[1]}
    
    Response C: {response[2]}
    
    Response D: {response[3]}

    Response E: {response[3]}
    """
    raw_output = llms.invoke(prompt)
    # Extract content from AIMessage
    content = raw_output.content if hasattr(raw_output, 'content') else str(raw_output)
    
    json_start = content.find('{')
    json_data = content[json_start:]

    try:
        scores = json.loads(json_data)
        df = pd.DataFrame.from_dict(scores, orient='index')
        df.index.name = "response_label"
        df = df.reset_index()
        return df
    except json.JSONDecodeError as e:
        print("JSON parsing failed:", e)
        print("Raw LLM output was:\n", content)
        return None

def evaluate_multiple_questions(user_queries: list[str], all_responses: list[list[str]]) -> pd.DataFrame:
    """
    user_queries: list of queries, one per question
    all_responses: list of response lists, each list has 4 responses for the corresponding query
    lms: the LLM interface with .invoke(prompt)
    """
    results = []
    

    for i, (query, responses) in enumerate(zip(user_queries, all_responses)):
        df = llm_judge(query, responses)  # assume it returns a DataFrame as defined earlier
        if df is not None:
            df = df.reset_index().rename(columns={"index": "response_label"})
            df['question_id'] = i
            results.append(df)

    combined_df = pd.concat(results, ignore_index=True)
    return combined_df
    
def find_mean(data: pd.DataFrame) -> pd.DataFrame:
    mean_scores = data.groupby("response_label")[["Innovativeness", "Practicality"]].mean()
    return mean_scores

In [37]:
questions_list = import_questions("Questions.json")

In [44]:
response = auto_eval(questions_list[0])

In [62]:
output = llm_judge(questions_list[0], response)

In [78]:
responses = auto_eval_batch(questions_list[:2])

In [83]:
evaluate_multiple_questions(questions_list[:2], responses)

Unnamed: 0,response_label,response_label.1,Innovativeness,Practicality,question_id
0,0,Response A,10,8,0
1,1,Response B,6,9,0
2,2,Response C,7,9,0
3,3,Response D,8,9,0
4,0,Response A,10,8,1
5,1,Response B,7,10,1
6,2,Response C,6,9,1
7,3,Response D,8,9,1


In [6]:
import matplotlib.pyplot as plt
def plot_innovativeness(data: pd.DataFrame): 
    # Plot for Innovativeness
    plt.figure(figsize=(10, 5))
    for label in data['response_label'].unique():
        subset = data[data['response_label'] == label]
        plt.plot(subset['question_id'], subset['Innovativeness'], label=label, marker='o')
        plt.title("Innovativeness Scores by Response Type")
        plt.xlabel("Question ID")
        plt.ylabel("Innovativeness Score")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

def plot_practicality(data: pd.DataFrame):
    # Plot for Practicality
    plt.figure(figsize=(10, 5))
    for label in data['response_label'].unique():
        subset = data[data['response_label'] == label]
        plt.plot(subset['question_id'], subset['Practicality'], label=label, marker='o')
    plt.title("Practicality Scores by Response Type")
    plt.xlabel("Question ID")
    plt.ylabel("Practicality Score")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [None]:
# modify questions
# add full_COT