In [21]:
qa_pairs = [
    {
        "user_input": "What is the forward problem?",
        "reference": "The forward problem can be thought of as: given x what is y?"
    },
    {
        "user_input": """
        How are over-determined and under-determined systems both addressed using optimisation techniques, and how do their respective solution strategies differ?
        """,
        "reference": """
        Over-determined systems, where there are more equations than unknowns (m>n), are typically addressed using the Least Squares method. This involves finding the solution that minimizes the sum of the squared differences between the observed and predicted values—effectively projecting the observation vector onto the column space of the matrix.
        Under-determined systems, where there are fewer equations than unknowns (m<n), require finding a solution that satisfies the equations but also minimizes the norm of the solution vector—this is called the Minimum Norm Solution. This choice imposes an additional optimisation criterion because the solution space is not unique.
        Both scenarios transform the inversion problem into an optimisation one: least squares minimizes residuals, while the minimum norm approach selects the "smallest" solution from an infinite set.
        """
    }
]

In [22]:
import sys
import os
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from ragas import evaluate, EvaluationDataset
from pipeline.orchestrator_agent_wise_feedback import pipeline

sys.path.append(os.path.abspath("tests"))

In [23]:
metrics = [faithfulness, answer_relevancy, context_precision, context_recall]

results = []

for qa_pair in qa_pairs:
    question = qa_pair["user_input"]
    answer = qa_pair["reference"]
    result = pipeline(question)
    ragas_result = {
        "user_input": question,
        "retrieved_contexts": result.get("contexts"),
        "response": result.get("answers"),
        "reference": answer,
    }

    results.append(ragas_result)

evaluation_dataset = EvaluationDataset.from_list(results)
ragas_results = evaluate(evaluation_dataset, metrics=metrics)

INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 1 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem in physics engineering"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem example"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"solving the forward problem steps in an example"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"difference between forward problem and inverse problem"}
INFO:a

Time taken for DecisionAgent: 1.41s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 1 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 4.00
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 6.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.00
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 2 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem in structural mechanics"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem example calculating displacements under a given load"}
INFO:agents.brain_agent

Time taken for DecisionAgent: 9.95s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 2 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 5.80
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.60
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 3 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem in structural mechanics"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"unique solutions concept implications"}
INFO:agents.brain_agent:Executing tool: vector_search


Time taken for DecisionAgent: 5.05s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 3 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 3.60
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.20
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 1 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"least squares solution over-determined systems"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"existence and uniqueness of solutions for under-determined systems constraints"}
INFO:a

Time taken for DecisionAgent: 8.80s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 1 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 4.33
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.44
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 2 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"normal equations approach for solving under-determined systems"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"least-squares solution over-determined systems"}
INFO:agents.brain_agen

Time taken for DecisionAgent: 7.26s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 2 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 6.17
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 7.06
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 3 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"normal equations under-determined systems"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"least-squares solution for over-determined systems"}
INFO:agents.brain_agent:Executing tool:

Time taken for DecisionAgent: 6.09s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 3 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 4.67
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.56
Evaluating: 100%|██████████| 8/8 [00:28<00:00,  3.60s/it]


In [24]:
results_aw = []

for qa_pair in qa_pairs:
    question = qa_pair["user_input"]
    answer = qa_pair["reference"]
    result = pipeline(question)
    ragas_result = {
        "user_input": question,
        "retrieved_contexts": result.get("contexts"),
        "response": result.get("answers"),
        "reference": answer,
    }

    results_aw.append(ragas_result)

evaluation_dataset_aw = EvaluationDataset.from_list(results_aw)
ragas_results_aw = evaluate(evaluation_dataset_aw, metrics=metrics)

INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 1 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem example heat conduction"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem numerical simulation Finite Element Analysis"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"role of discretization and approximation in solving the forward problem"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"m

Time taken for DecisionAgent: 1.93s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 1 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 3.20
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 6.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 5.40
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 2 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem definition in physics engineering finance"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"core steps in solving the forward problem"}
INFO:agents.brain_agent:Executin

Time taken for DecisionAgent: 1.87s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 2 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 6.60
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 6.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.87
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 3 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"forward problem definition in physics engineering biology"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"inputs required to solve forward problem"}
INFO:agents.brain_agent:Executing

Time taken for DecisionAgent: 2.41s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 3 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 3.80
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 5.93
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 1 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"methods for solving over-determined systems"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"properties of solutions in over-determined systems existence uniqueness error representati

Time taken for DecisionAgent: 12.50s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 1 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 4.43
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.81
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 2 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"existence and uniqueness of solutions for under-determined systems"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"methods to obtain solutions in under-determined systems Gaussian el

Time taken for DecisionAgent: 7.54s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 2 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 6.17
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 7.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 7.06
INFO:pipeline.orchestrator_agent_wise_feedback:=== Pipeline Iteration 3 ===
INFO:agents.decomposition_agent:User query detected, processing...
INFO:agents.decomposition_agent:Rewriting query...
INFO:agents.decomposition_agent:Choosing the best rewrite
INFO:agents.decomposition_agent:Decomposing into sub-tasks...
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"existence and uniqueness of solutions for under-determined systems"}
INFO:agents.brain_agent:Executing tool: vector_search
INFO:agents.brain_agent:Tool decision: use tool 'vector_search', arguments: {"query":"iterative methods for over-determined systems gradient descent Leve

Time taken for DecisionAgent: 5.67s


INFO:pipeline.orchestrator_agent_wise_feedback:Iteration 3 scores:
INFO:pipeline.orchestrator_agent_wise_feedback:Decomposition: 8.00
INFO:pipeline.orchestrator_agent_wise_feedback:Retrieval avg: 5.33
INFO:pipeline.orchestrator_agent_wise_feedback:Generation: 6.00
INFO:pipeline.orchestrator_agent_wise_feedback:Total: 6.44
Evaluating: 100%|██████████| 8/8 [00:19<00:00,  2.40s/it]


In [25]:
print(ragas_results)
print(ragas_results_aw)

{'faithfulness': 0.9643, 'answer_relevancy': 0.9230, 'context_precision': 1.0000, 'context_recall': 1.0000}
{'faithfulness': 1.0000, 'answer_relevancy': 0.9402, 'context_precision': 0.9167, 'context_recall': 1.0000}


In [27]:
print(results_aw)

[{'user_input': 'What is the forward problem?', 'retrieved_contexts': ['[{\'id\': 8, \'score\': 0.727762, \'payload\': {\'original_id\': \'uploads\\\\L1_Introduction.ipynb_cell_8_md\', \'type\': \'markdown\', \'content\': \'- The <a class="definition" href="#definitions" id="forwardproblem">forward problem</a> can be thought of as: given $x$ what is $y$?\', \'source_document\': \'uploads\\\\L1_Introduction.ipynb\', \'cell_number\': 8}}]', "I need more context or specifics about the steps you're referring to in order to provide an explanation of the output. Could you please specify the steps or the process you have in mind?", '[{\'id\': 8, \'score\': 0.7539221, \'payload\': {\'original_id\': \'uploads\\\\L1_Introduction.ipynb_cell_8_md\', \'type\': \'markdown\', \'content\': \'- The <a class="definition" href="#definitions" id="forwardproblem">forward problem</a> can be thought of as: given $x$ what is $y$?\', \'source_document\': \'uploads\\\\L1_Introduction.ipynb\', \'cell_number\': 8