In [1]:
import sys
import os
from openai import OpenAI

# Get the parent directory of the current script
parent_dir = os.path.abspath("..")  # Move one level up
sibling1_path = os.path.join(parent_dir, "server")
# Add sibling directory to sys.path
sys.path.append(sibling1_path)
os.chdir("/Users/samytlee/Documents/projects/TaskDecomposition")
import server.custom_types as custom_types
import server.decomposer as decomposer
import server.executor as executor
import server.evaluator as evaluator
import json

* 'orm_mode' has been renamed to 'from_attributes'
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import json
async def iter_response(
        root, node_dict, goal, next_selection, eval_definitions, eval_few_shot_examples, model, api_key, save_file_path
    ):  # (1)
        async for (
            new_root,
            node_dict,
            next_selection,
            max_value_path,
        ) in decomposer.stream_MCTS(
            root,
            node_dict,
            goal,
            next_selection=next_selection,
            eval_definitions=eval_definitions,
            eval_few_shot_examples=eval_few_shot_examples,
            model=model,
            api_key=api_key,
        ):
            if next_selection is None:
                break
            root = new_root
            result = {
                "node_dict": {
                    k: v.model_dump(mode="json") for k, v in node_dict.items()
                },
                "next_node": next_selection.model_dump(mode="json"),
                "max_value_path": max_value_path,
            }
            save_json(result, save_file_path)
            yield result
        yield None

def save_json(data, filename):
    with open(filename, "w") as f:
        json.dump(data, f, indent=4, ensure_ascii=False)

api_key = open("evaluation_experiments/openai_api_key").read()
os.environ["OPENAI_API_KEY"] = api_key
os.environ["ANTHROPIC_API_KEY"] = open("server/anthropic_api_key").read()
os.environ["GEMINI_API_KEY"] = open("server/gemini_api_key").read()

In [3]:
def extract_max_value_path(node_dict, max_value_path):
    max_value_path_nodes = [node_dict[node_id] for node_id in max_value_path if node_id != "-1"]
    # sort by depth
    max_value_path_nodes.sort(key=lambda x: x['level'])
    max_value_path_nodes = [
        {k: node[k] for k in ['label', 'description', 'explanation', 'level']} for node in max_value_path_nodes
    ]
    return max_value_path_nodes

async def decomposition_to_primitive_tasks(semantic_tasks, save_filepath, api_key, model="gpt-4o-mini"):
    semantic_tasks = [custom_types.Node.model_validate(t) for t in semantic_tasks]
    semantic_tasks = list(
        filter(lambda t: t.label != "END" and t.id != "root", semantic_tasks)
    )
    primitive_task_list = json.load(
        open("server/decomposer/primitive_task_defs.json")
    )
    primitive_tasks = await decomposer.one_shot_decomposition_to_primitive_task(
        semantic_tasks=semantic_tasks,
        primitive_task_list=primitive_task_list,
        model=model,
        api_key=api_key,
    )
    save_json(primitive_tasks, save_filepath)
    # save_json(primitive_tasks, "evaluation_experiments/results/searching/primitive_tasks/LLooM/1.json")
import random
def ask_oracle(goal, manual_pipeline_str, generated_pipeline_str, client):
    revert = random.choice([True, False])
    if revert:
        node_path_strings = [manual_pipeline_str, generated_pipeline_str]
    else:
        node_path_strings = [generated_pipeline_str, manual_pipeline_str]
    messages = [
        {
            "role": "user",
            "content": """I have a text analytics goal: {goal}.
            I have come up with two solutions: 
            Solution 1: {node_path_string_1}.
            Solution 2: {node_path_string_2}.
            The solutions intentionally stay at a high level and not get into the technical details.
            What do you think about my solutions? Give me the pros and cons of each solution in a bullet list, each bullet should have a clear criteria explicitly specified.
            """.format(goal=goal, node_path_string_1=node_path_strings[0], node_path_string_2=node_path_strings[1])
        }
    ]
    response = client.chat.completions.create(
        model="o3-mini", 
        reasoning_effort="medium",
        messages=messages
    )
    return response.choices[0].message.content, revert

node_path_string_generator = lambda node: f"""
<node>
    <label> {node['label']} </label>
    <description> {node['description']} </description>
    <explanation> {node['explanation']} </explanation>
    <step> {node['level']} </step>
</node>
"""
openai_client = OpenAI(api_key=api_key)
from pprint import pprint

## Chose an evaluation target (LLooM or TnT-LLM)

In [4]:
lloom_pipeline = json.load(open("evaluation_experiments/data/lloom/LLooM_semantic_path_manual.json"))
tnt_llm_pipeline = json.load(open("evaluation_experiments/data/tntllm/tntllm_semantic_path_manual.json"))
evaluation_target, goal, save_file_path = lloom_pipeline, \
    "I have a dataset of UIST paper abstracts. I want to extract high-level concepts from the abstracts to understand research topics.", \
    "evaluation_experiments/results/searching/LLooM_full_tree.json"
# evaluation_target, goal, save_file_path = tnt_llm_pipeline, \
#     "I have a dataset of user conversation with the Microsoft's Bing Consumer Copilot system. I want to understand the user's intents when interacting with the chatbot.", \
#     "evaluation_experiments/results/searching/tnt_llm_semantic_path_manual_evaluation.json"

## Generate and Evaluate the MCTS pipeline

In [None]:
# Generate the pipeline through MCTS
async def generate_MCTS_pipeline(goal, save_file_path):
    user_root = decomposer.init_MCTS()
    node_dict = {user_root.MCT_id: user_root}
    next_selection = None
    eval_definitions = {
                "complexity": evaluator.complexity_definition,
                "coherence": evaluator.coherence_definition,
                "importance": evaluator.importance_definition,
            }
    eval_few_shot_examples = []
    model = "gpt-4o-mini"

    while True:
        response = await anext(iter_response(
            user_root, node_dict, goal, next_selection, eval_definitions, eval_few_shot_examples, model, api_key, save_file_path
        ))
        if response is None:
            break
        print(response["next_node"]["label"])
await generate_MCTS_pipeline(goal, save_file_path)

Keyword Extraction
Concept Clustering
Topic Modeling
Concept Extraction
Keyword Analysis
Concept Clustering
Concept Extraction
Concept Clustering
Concept Summarization
Concept Summarization
Semantic Analysis
Concept Ranking
Concept Summarization
Topic Modeling
Concept Refinement
Concept Clustering
Research Gap Analysis
Concept Clustering
Topic Modeling
Concept Mapping
Topic Identification
Topic Modeling
Concept Summarization
Theme Consolidation
Concept Mapping
Concept Evaluation
Clustering Analysis
Concept Mapping
Thematic Synthesis
Concept Synthesis
Concept Summarization
Concept Clustering
Concept Interpretation
Concept Synthesis
Concept Hierarchy
Concept Summarization
Concept Refinement
Concept Mapping
Concept Analysis
Topic Synthesis
Concept Synthesis
Theme Analysis
Research Prioritization
Concept Validation
Concept Validation
Concept Validation
Research Gap Analysis


In [None]:
# Compare the generated pipeline with the manual one
def generated_pipeline_nodes(max_value_path_nodes):
    node_path_string = "\n".join([node_path_string_generator(node) for node in max_value_path_nodes])
    return node_path_string

def save_max_value_path(full_tree, save_filepath):
    node_dict = full_tree['node_dict']
    max_value_path = full_tree['max_value_path'][0]
    max_value_path_nodes = extract_max_value_path(node_dict, max_value_path)
    save_json(max_value_path_nodes, save_filepath)

# save the max value path of the full tree
full_tree = json.load(open("evaluation_experiments/results/searching/LLooM_full_tree.json"))
index = 0
save_max_value_path(full_tree, "evaluation_experiments/results/searching/semantic_tasks/{i}.json".format(i=index))

# read a max value path
max_value_path = json.load(open("evaluation_experiments/results/searching/semantic_tasks/{i}.json").format(index))
generated_pipeline = generated_pipeline_nodes(max_value_path)
# read a manual pipeline
manual_pipeline = "\n".join([node_path_string_generator(node) for node in evaluation_target])

# compare the two pipelines
evaluation, revert = ask_oracle(goal, generated_pipeline, manual_pipeline, openai_client)
# print the evaluation
if revert:
    order = ["generated", "manual"]
else:
    order = ["manual", "generated"]
print("""
Solution 1: {}
Solution 2: {}
""".format(order[0], order[1]))
pprint(evaluation)

In [None]:
eval_definitions = {
            "complexity": evaluator.complexity_definition,
            "coherence": evaluator.coherence_definition,
            "importance": evaluator.importance_definition,
        }
eval_few_shot_examples = []
model = "gpt-4o-mini"
api_key = open("evaluation_experiments/openai_api_key").read()
root_node = {
    "id": "-1",
    "label": "Root",
    "description": "Root node",
    "explanation": "Root node",
    "parentIds": [],
    "MCT_id": "-1",
    "MCT_parent_id": None,
    "level": 0
}
results = []
for index, node in enumerate(lloom_pipeline):
    parent_node = root_node if index == 0 else lloom_pipeline[index - 1]
    eval_params = [
        [goal, node, parent_node]
    ]
    eval_results, eval_reasons = await evaluator.run_all_evaluations(goal, eval_params, eval_definitions=eval_definitions, eval_few_shot_examples=eval_few_shot_examples, model=model, api_key=api_key)
    results.append([eval_results, eval_reasons])

In [None]:
results