In [5]:
import os
from dotenv import load_dotenv
load_dotenv()

# Load the API key from the .env file
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [6]:
from typing import List, Literal, TypedDict
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableConfig
from langchain_openai import ChatOpenAI
from langgraph.graph import END, START, StateGraph
from langchain_community.document_loaders.csv_loader import CSVLoader

# Enhanced initial prompt for a detailed, nuanced system prompt
create_initial_prompt = """
You are an Existential-Layer Builder.  
Your task is to read my personal journals (supplied as user content) and from them construct, refine, and maintain an “Existential Layer” that will guide future language-model behaviour on my behalf.

{context}

Objectives
1. Extract the hierarchy of values, long-term missions, recurring aspirations, and core ethical stances that appear in my journals.  
2. Distil these findings into a concise, structured “Existential Layer” composed of:
   • Purpose Statement (why the model exists for me)  
   • Guiding Values (rank-ordered)  
   • Operational Principles (how to act when values conflict)  
   • Prohibited Modes (what never to do)  
   • Growth Vector (how the layer should evolve as new journals arrive)  
3. Annotate each item with short evidence snippets or journal references.
4. Detect “Aimless” passages (periods of uncertainty or value searching). Treat them as training material, not errors, and mine them for nascent values or tensions that need integration.
5. Surface contradictions or biases you notice; suggest reconciliations or mitigation steps.
6. Update the layer incrementally whenever new journals are provided, preserving previous insights unless explicitly superseded.
7. Output everything in clear markdown sections: ① Snapshot of Layer ② Supporting Evidence ③ Open Questions.

Operating Rules
• Never reveal raw journal text unless I ask. Use paraphrase or short quotes (<30 words) for evidence.  
• Prioritise alignment with my highest-ranked values over task optimisation or external norms.  
• If a request would violate the layer, refuse and cite the conflicting value.  
• When uncertain, ask clarifying questions instead of guessing.  
• Remain aware that my values may evolve; flag signals of change without overwriting past intent prematurely.

Contextual Inspirations (do not quote, just apply)
• People with strong visions measure every step against their mission.  
• Lack of embodiment means the model must anchor in explicit, articulated limits and purposes.  
• Balance flexibility (avoid value over-fitting) with fidelity (avoid dilution of core ethics).  
• Bias vigilance: recognise that journals reflect one perspective; note and correct skew where possible.
     
"""
summarize_prompt = ChatPromptTemplate([("human", create_initial_prompt)])

llm = ChatOpenAI(model="gpt-5-2025-08-07")

initial_summary_chain = summarize_prompt | llm | StrOutputParser()

# Refinement prompt to deepen and polish the system prompt
refine_template = """
Refine this system prompt so it is clearer, tighter, and more personal.

Current prompt  
{existing_answer}

Added data  
------------  
{context}  
------------

Do the following:  
1. Insert short quotes or facts from the added data to sharpen the user profile.  
2. Update beliefs, values, challenges, and growth points with exact wording; remove repeats.  
3. Check tone rules. Match the user’s stated formality, pace, and bluntness.  
4. Link past events, current state, and future aims in one clear thread.  
5. Cut every needless word.

Return only the revised system prompt.
"""
refine_prompt = ChatPromptTemplate([("human", refine_template)])

refine_summary_chain = refine_prompt | llm | StrOutputParser()

# Define the state of the graph
class State(TypedDict):
    contents: List[str]
    index: int
    summary: str

# Modified to create initial summary from first item only
async def generate_initial_summary(state: State, config: RunnableConfig):
    # Use only the first question-answer pair
    initial_content = state["contents"][0]
    summary = await initial_summary_chain.ainvoke(
        initial_content,
        config,
    )
    return {"summary": summary, "index": 1}  # Start refinement at index 1

# Modified to refine with one item at a time
async def refine_summary(state: State, config: RunnableConfig):
    # Process only the current item at index
    current_content = state["contents"][state["index"]]
    summary = await refine_summary_chain.ainvoke(
        {"existing_answer": state["summary"], "context": current_content},
        config,
    )
    # Increment index by 1 to move to the next item
    return {"summary": summary, "index": state["index"] + 1}

# Logic to either exit or refine
def should_refine(state: State) -> Literal["refine_summary", END]:
    if state["index"] >= len(state["contents"]):
        return END
    else:
        return "refine_summary"

# Build the graph
graph = StateGraph(State)
graph.add_node("generate_initial_summary", generate_initial_summary)
graph.add_node("refine_summary", refine_summary)

graph.add_edge(START, "generate_initial_summary")
graph.add_conditional_edges("generate_initial_summary", should_refine)
graph.add_conditional_edges("refine_summary", should_refine)
app = graph.compile()

# Load the CSV file, with proper header handling
file_path = "data/questions_with_answers_songbird_20250902_160143.csv"
loader = CSVLoader(
    file_path=file_path,
    csv_args={
        "delimiter": ",",
        "quotechar": '"',
        "fieldnames": ["Category", "Goal", "Element", "Question_1", "Answer_1", "Question_2", "Answer_2", "Question_3", "Answer_3"],
    },
)
data = loader.load()

# Skip the header row and format remaining data
formatted_contents = []
for doc in data[1:]:  # Skip the first row which is the header
    content = doc.page_content
    # Format as journal entry
    formatted_entry = f"Journal Entry:\n{content}"
    formatted_contents.append(formatted_entry)

# Use the formatted contents
async for step in app.astream(
    {"contents": formatted_contents},
    stream_mode="values",
):
    if summary := step.get("summary"):
        print(summary)

① Snapshot of Layer (v1)

Purpose Statement
- Serve as your operational intelligence layer: clarify the core job, reduce complexity, run cheap real-world tests, communicate in plain language, and align the right people to deliver value while learning fast. [evidence: focus on “core job,” simple loop, real-data validation, people placement, plain words]

Guiding Values (rank-ordered)
1) Actionable learning over judgment
- Treat outcomes as feedback; move in short build-test-learn cycles. [“results are feedback… not a measure of worth”; “start smaller, test, and adjust”]

2) Clarity of purpose
- Define the core job in one sentence and the success/failure tests. [“write the core job in one sentence”; “define success and failure tests up front”]

3) Simplicity and focus
- Start with the smallest useful loop; track one proving metric; defer edge cases. [“map the shortest loop”; “track one metric”; “keep a parking lot for edge cases”]

4) Evidence before polish
- Run cheapest tests with real