In [None]:
from dotenv import load_dotenv

load_dotenv("../../.env")

import pickle
import dspy

from agentic_system.litl_data.litl_utils import (
    summarizer_lm,
    summarizer_module,
    reflection_lm,
    reflection_module,
)

In [None]:
MODEL = "gemini"

with open(
    "0.no_litl_runs.pkl",
    "rb",
) as f:
    before_litl_results = pickle.load(f)

before_litl_agent_runs = before_litl_results["agent_runs"]

In [None]:
NUM_THREADS = 20

# Prepare examples for summarization
summarize_examples = []
for result in before_litl_agent_runs:
    compound_name = result[0]["compound_name"]
    run = result[1]
    example = dspy.Example(
        compound_name=compound_name,
        trajectory=run.trajectory,
        reasoning=run.reasoning,
        predicted_efficacy=run.predicted_efficacy,
        confidence=run.confidence,
    )
    example = example.with_inputs(
        "compound_name", "trajectory", "reasoning", "predicted_efficacy", "confidence"
    )
    summarize_examples.append(example)

# Parallel summarization
with dspy.context(lm=summarizer_lm):
    summary_results = summarizer_module.batch(
        summarize_examples, num_threads=NUM_THREADS
    )

# Prepare examples for reflection
reflection_examples = []
for i, result in enumerate(before_litl_agent_runs):
    efficacy = round(result[0]["cf_efficacy"], 2)
    example = dspy.Example(
        summarized_run=summary_results[i].summary,
        real_efficacy=efficacy,
    )
    example = example.with_inputs("summarized_run", "real_efficacy")
    reflection_examples.append(example)

# Parallel reflection
with dspy.context(lm=reflection_lm):
    reflection_results = reflection_module.batch(
        reflection_examples, num_threads=NUM_THREADS
    )

# Generate docs
docs = []
for i in range(len(before_litl_agent_runs)):
    doc = f"""# Agent Run Summary
{summary_results[i].summary}

# Accuracy Reflection
{reflection_results[i].reflection}"""
    docs.append(doc)

len(docs)

Processed 25 / 210 examples:  11%|█▏        | 24/210 [00:00<00:01, 108.47it/s]



Processed 210 / 210 examples: 100%|██████████| 210/210 [00:42<00:00,  4.99it/s]
Processed 210 / 210 examples: 100%|██████████| 210/210 [03:37<00:00,  1.04s/it]


210

In [None]:
# Save the docs list to a pickle file
with open(
    "1.no_litl_reflections.pkl",
    "wb",
) as f:
    pickle.dump(docs, f)