In [1]:
import os
import re
import uuid
import nest_asyncio
import operator
import functools
from pathlib import Path
import tiktoken
from typing import TypedDict, Annotated, List, Optional, Sequence, Any, Callable, Union, Dict
from operator import itemgetter
import getpass

# LangChain Imports
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.vectorstores import Qdrant
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage
from langchain_core.runnables import Runnable, RunnableLambda, RunnablePassthrough
from langchain_core.tools import BaseTool, tool
from langchain.schema.output_parser import StrOutputParser
from pydantic import BaseModel, Field

# LangGraph Imports
from langgraph.graph import END, StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode

# ArXiv and Document Processing
import arxiv
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langgraph.pregel import Channel, Pregel


In [2]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
os.environ["TAVILY_API_KEY"] = getpass.getpass("TAVILY_API_KEY")

In [3]:
# ==================== UTILITY FUNCTIONS ====================

def tiktoken_len(text):
    """Calculate the token length using tiktoken."""
    encoding = tiktoken.encoding_for_model("gpt-4o-mini")
    return len(encoding.encode(text))

def extract_arxiv_id(url_or_id):
    """Extracts the core ArXiv ID without the version suffix from a URL or ID string."""
    if not url_or_id:
        return None
    match = re.search(r'(\d+\.\d+)', url_or_id)
    if match:
        return match.group(1)
    return None

def create_random_subdirectory():
    """Create a random subdirectory for storing files."""
    random_id = str(uuid.uuid4())[:8]  # Use first 8 characters of a UUID
    subdirectory_path = os.path.join('./content/data', random_id)
    os.makedirs(subdirectory_path, exist_ok=True)
    return subdirectory_path

# Create the working directory
os.makedirs('./content/data', exist_ok=True)
WORKING_DIRECTORY = Path(create_random_subdirectory())
print(f"Using working directory: {WORKING_DIRECTORY}")

# Default platform - this can be changed as needed
PLATFORM = "LinkedIn"

Using working directory: content/data/1c492203


In [4]:
# ==================== ARXIV RAG TOOL ====================

class ArxivRAGInput(BaseModel):
    """Input schema for the ArXiv RAG tool."""
    domain: str = Field(description="The research domain or topic to search for papers on ArXiv")
    question: str = Field(description="The specific question to answer based *only* on the content of the retrieved ArXiv papers.")
    max_papers: Optional[int] = Field(default=1, description="Maximum number of relevant papers to download and process.")

@tool("arxiv_rag_tool", args_schema=ArxivRAGInput)
def arxiv_rag_tool_func(domain: str, question: str, max_papers: int = 1) -> str:
    """ArXiv RAG Tool for searching, retrieving and answering questions based on ArXiv papers."""
    global rag_chain, qdrant_retriever
    
    print(f"\n=== Executing ArXiv RAG Tool ===\n")
    print(f"Domain: {domain}")
    print(f"Question: {question}")
    print(f"Max Papers: {max_papers}")

    # 1. Search ArXiv for relevant paper IDs and URLs
    print("Searching ArXiv...")
    try:
        client = arxiv.Client()
        search = arxiv.Search(
            query=domain,
            max_results=max_papers,
            sort_by=arxiv.SortCriterion.Relevance
        )
        results = list(client.results(search))
    except Exception as e:
        return f"Error searching ArXiv: {e}"

    if not results:
        return f"No papers found on ArXiv for the domain: '{domain}'"

    papers_metadata = []
    for result in results:
        arxiv_id = extract_arxiv_id(result.entry_id)
        pdf_url = result.pdf_url
        if arxiv_id and pdf_url:
            papers_metadata.append({
                "title": result.title,
                "arxiv_id": arxiv_id,
                "pdf_url": pdf_url,
                "summary": result.summary.replace('\n', ' ')
            })
        else:
            print(f"Skipping paper '{result.title}' due to missing ID or PDF URL.")

    if not papers_metadata:
        return f"Found papers for '{domain}', but none had valid metadata (ID and PDF URL)."

    print(f"Found {len(papers_metadata)} papers with valid metadata.")

    # 2. Download and Load Documents
    all_docs = []
    print("Downloading and loading papers...")
    for i, paper in enumerate(papers_metadata):
        print(f"Loading paper {i+1}/{len(papers_metadata)}: {paper['title']} ({paper['arxiv_id']})")
        try:
            loader = PyMuPDFLoader(paper['pdf_url'])
            docs = loader.load()
            # Add metadata to each page
            for doc in docs:
                doc.metadata.update({
                    "source": f"{paper['title']} (ID: {paper['arxiv_id']})",
                    "arxiv_id": paper['arxiv_id'],
                    "title": paper['title'],
                })
            all_docs.extend(docs)
        except Exception as e:
            print(f"Error loading paper {paper['title']}: {e}. Skipping.")

    if not all_docs:
        return "Failed to download or load content from the found ArXiv papers."

    print(f"Loaded a total of {len(all_docs)} pages.")

    # 3. Chunk Documents
    print("Splitting documents into chunks...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=200, 
        chunk_overlap=20,
        length_function=tiktoken_len
    )
    chunks = text_splitter.split_documents(all_docs)
    print(f"Split into {len(chunks)} chunks.")

    if not chunks:
        return "Failed to split documents into chunks."

    # 4. Create Vector Store and update global RAG chain
    print("Creating vector store...")
    try:
        embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        vectorstore = Qdrant.from_documents(
            chunks,
            embeddings,
            location=":memory:",
            collection_name="arxiv_rag_collection"
        )
        qdrant_retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
        rag_chain = create_rag_chain(qdrant_retriever)
        print("Vector store created and global RAG chain updated.")
    except Exception as e:
        return f"Error creating vector store: {e}"

    # 5. Retrieve and Generate Answer
    print(f"Answering question: {question}")
    try:
        final_answer = rag_chain.invoke({"question": question})     
        print(f"--- ArXiv RAG Tool Execution Finished ---")
        return final_answer
    except Exception as e:
        print(f"--- ArXiv RAG Tool Execution Failed ---")
        print(f"Error details: {e}")
        return f"Error generating answer with RAG chain: {e}"

In [5]:
# ==================== FILE OPERATION TOOLS ====================

@tool
def create_outline(
    points: Annotated[List[str], "List of main points or sections."],
    file_name: Annotated[str, "File path to save the outline."],
) -> Annotated[str, "Path of the saved outline file."]:
    """Create and save an outline."""
    with (WORKING_DIRECTORY / file_name).open("w") as file:
        for i, point in enumerate(points):
            file.write(f"{i + 1}. {point}\n")
    return f"Outline saved to {file_name}"

@tool
def read_document(
    file_name: Annotated[str, "File path to save the document."],
    start: Annotated[Optional[int], "The start line. Default is 0"] = None,
    end: Annotated[Optional[int], "The end line. Default is None"] = None,
) -> str:
    """Read the specified document."""
    with (WORKING_DIRECTORY / file_name).open("r") as file:
        lines = file.readlines()
    if start is not None and start == 0:
        start = 0
    return "\n".join(lines[start:end])

@tool
def write_document(
    content: Annotated[str, "Text content to be written into the document."],
    file_name: Annotated[str, "File path to save the document."],
) -> Annotated[str, "Path of the saved document file."]:
    """Create and save a text document."""
    with (WORKING_DIRECTORY / file_name).open("w") as file:
        file.write(content)
    return f"Document saved to {file_name}"

@tool
def edit_document(
    file_name: Annotated[str, "Path of the document to be edited."],
    inserts: Annotated[
        Dict[int, str],
        "Dictionary where key is the line number (1-indexed) and value is the text to be inserted at that line.",
    ] = {},
) -> Annotated[str, "Path of the edited document file."]:
    """Edit a document by inserting text at specific line numbers."""
    with (WORKING_DIRECTORY / file_name).open("r") as file:
        lines = file.readlines()

    sorted_inserts = sorted(inserts.items())
    for line_number, text in sorted_inserts:
        if 1 <= line_number <= len(lines) + 1:
            lines.insert(line_number - 1, text + "\n")
        else:
            return f"Error: Line number {line_number} is out of range."

    with (WORKING_DIRECTORY / file_name).open("w") as file:
        file.writelines(lines)

    return f"Document edited and saved to {file_name}"

@tool
def retrieve_information(
    query: Annotated[str, "query to ask the retrieve information tool"]
    ):
    """Use Retrieval Augmented Generation to retrieve information about the paper."""
    # This will be initialized when the LLM is loaded
    return rag_chain.invoke({"question": query})



In [6]:
# ==================== AGENT CREATION UTILITIES ====================

def create_agent(
    llm: ChatOpenAI,
    tools: list,
    system_prompt: str,
) -> AgentExecutor:
    """Create a function-calling agent with the given LLM, tools, and system prompt."""
    system_prompt += ("\nWork autonomously according to your specialty, using the tools available to you."
    " Do not ask for clarification."
    " Your other team members (and other teams) will collaborate with you with their own specialties."
    " You are chosen for a reason! You are one of the following team members: {team_members}.")
    
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                system_prompt,
            ),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    agent = create_openai_functions_agent(llm, tools, prompt)
    executor = AgentExecutor(agent=agent, tools=tools)
    return executor

def create_team_supervisor(llm: ChatOpenAI, system_prompt, members) -> Runnable:
    """Create an LLM-based routing supervisor."""
    options = ["FINISH"] + members

    function_def = {
        "name": "route",
        "description": "Select the next role.",
        "parameters": {
            "title": "routeSchema",
            "type": "object",
            "properties": {
                "next": {
                    "title": "Next",
                    "anyOf": [
                        {"enum": options},
                    ],
                },
            },
            "required": ["next"],
        },
    }

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder(variable_name="messages"),
            (
                "system",
                "Given the conversation above, who should act next?"
                " Or should we FINISH? Select one of: {options}",
            ),
        ]
    ).partial(options=str(options), team_members=", ".join(members))
    
    return (
        prompt
        | llm.bind_functions(functions=[function_def], function_call="route")
        | JsonOutputFunctionsParser()
    )

def prelude(state):
    """Get current file information to provide context to agents."""
    written_files = []
    if not WORKING_DIRECTORY.exists():
        WORKING_DIRECTORY.mkdir()
    try:
        written_files = [
            f.relative_to(WORKING_DIRECTORY) for f in WORKING_DIRECTORY.rglob("*")
        ]
    except:
        pass
    if not written_files:
        return {**state, "current_files": "No files written."}
    return {
        **state,
        "current_files": "\nBelow are files your team has written to the directory:\n"
        + "\n".join([f" - {f}" for f in written_files]),
    }

def prelude_verification(state):
    """Get current file information with platform information."""
    base_state = prelude(state) 
    base_state['target_platform'] = state.get('target_platform', PLATFORM)
    return base_state

def agent_node(state, agent, name):
    """Standard agent node function that adds agent name to messages."""
    result = agent.invoke(state)
    return {"messages": [HumanMessage(content=result["output"], name=name)]}

def route_next(x):
    """Extract the next agent from the supervisor's output and normalize case."""
    next_step = x["next"]
    # Normalize case to prevent 'FinISH' vs 'FINISH' errors
    if isinstance(next_step, str) and next_step.upper() == "FINISH":
        return "FINISH"
    return next_step

In [7]:
# ==================== STATE DEFINITIONS ====================

# Base state for all teams
class State(TypedDict):
    messages: Annotated[List[BaseMessage], operator.add]
    next: str

# Research team state
class ResearchTeamState(TypedDict):
    messages: Annotated[List[BaseMessage], operator.add]
    team_members: List[str] 
    next: str

# Authoring team state
class DocWritingState(TypedDict):
    messages: Annotated[List[BaseMessage], operator.add]
    team_members: str
    next: str
    current_files: str

# Verification team state
class VerificationState(TypedDict):
    messages: Annotated[List[BaseMessage], operator.add]
    team_members: str
    next: str
    current_files: str
    target_platform: str

In [8]:
# ==================== INITIALIZE LLMs ====================

# Use consistent LLM instances
llm = ChatOpenAI(model="gpt-4o-mini")
llm_tools = ChatOpenAI(model="gpt-4o-mini")

# Initialize tools that need LLM
tavily_tool = TavilySearchResults(max_results=5)
tool_belt = [arxiv_rag_tool_func]
tool_node = ToolNode(tool_belt)
model_with_tools = llm_tools.bind_tools(tool_belt)

In [9]:
# ==================== DEFINE GLOBAL RAG CHAIN ====================
# Define a global RAG chain for use across tools
def format_docs(docs: List[Document]) -> str:
    """Formats retrieved documents into a single string for the prompt context."""
    if not docs:
        return "No relevant context found in the documents."
    context_str = "\n\n---\n\n".join([
        f"Source: {doc.metadata.get('source', 'Unknown')}\nContent: {doc.page_content}"
        for doc in docs
    ])
    return context_str

# Initialize empty vectorstore and retriever placeholder
qdrant_retriever = None

# Prompt template for RAG
rag_prompt = ChatPromptTemplate.from_template("""
CONTEXT:
{context}

QUESTION:
{question}

Answer the question based ONLY on the provided context from the ArXiv papers.
If the context doesn't contain the answer, state that the information is not available in the retrieved documents.
Cite the source document title(s) if possible based on the context metadata.
""")

# Define the RAG chain (will use the retriever once it's created)
def create_rag_chain(retriever):
    return (
        {
            "context": itemgetter("question") | retriever | RunnableLambda(format_docs),
            "question": itemgetter("question")
        }
        | rag_prompt
        | llm
        | StrOutputParser()
    )

# Initialize with a dummy chain - will be replaced when vectorstore is created
dummy_response = "RAG chain not initialized yet. Please run ArXiv search first."
rag_chain = RunnablePassthrough() | (lambda _: dummy_response)


In [10]:
# ==================== RESEARCH TEAM ====================

# Search Agent - IMPROVED INSTRUCTIONS
search_agent = create_agent(
    llm,
    [tavily_tool],
    "You are a research assistant who searches for up-to-date info using the tavily search engine. "
    "Your job is ONLY to find factual information about the paper - NOT to write content. "
    "Focus on finding: title, authors, publication date, key findings, methodology. "
    "Present information in a structured format with clear headings."
)
search_node = functools.partial(agent_node, agent=search_agent, name="Search")

# Research Agent
research_agent = create_agent(
    llm,
    [retrieve_information],
    "You are a research assistant who can provide specific information on scientific papers. "
    "You must only respond with factual information about the paper related to the request. "
    "Do NOT create a draft post or final content - just provide structured research findings."
)
research_node = functools.partial(agent_node, agent=research_agent, name="PaperInformationRetriever")

# Research Team Supervisor
research_supervisor = create_team_supervisor(
    llm,
    ("You are a supervisor tasked with managing a research team consisting of: Search, PaperInformationRetriever. "
     "Your team's ONLY responsibility is to gather factual information about the requested paper. "
     "DO NOT create a draft post - that will be handled by a separate team. "
     "First, use Search to find basic information about the paper. "
     "Then use PaperInformationRetriever to get detailed information. "
     "IMPORTANT: If Search cannot find information, try PaperInformationRetriever directly. "
     "DO NOT FINISH until you have ACTUAL research content. If team members report they 'cannot access' "
     "or 'unable to retrieve' information, that means research is NOT complete. "
     "Only respond with FINISH when you have collected real information about the paper."),
    ["Search", "PaperInformationRetriever"]
)

# Research Team Graph
research_graph = StateGraph(ResearchTeamState)
research_graph.add_node("Search", search_node)
research_graph.add_node("PaperInformationRetriever", research_node)
research_graph.add_node("supervisor", research_supervisor)

# Add edges
research_graph.add_edge("Search", "supervisor")
research_graph.add_edge("PaperInformationRetriever", "supervisor")
research_graph.add_conditional_edges(
    "supervisor",
    route_next,
    {"Search": "Search", "PaperInformationRetriever": "PaperInformationRetriever", "FINISH": END}
)
research_graph.set_entry_point("supervisor")
compiled_research_graph = research_graph.compile()

# Research Chain
def enter_research_chain(message: str):
    return {
        "messages": [HumanMessage(content=message)],
    }

research_chain = enter_research_chain | compiled_research_graph

  | llm.bind_functions(functions=[function_def], function_call="route")


In [11]:
# ==================== PLATFORM-AGNOSTIC AUTHORING TEAM ====================

# Note Taker Agent
agnostic_note_taker_agent = create_agent(
    llm, 
    [create_outline, read_document],
    ("You are a senior researcher creating a factual outline based on the research findings provided. "
     "Create a detailed outline that captures the key points about the paper. "
     "This outline will be used by the document writer to create the full post. "
     "Save your outline as 'post_outline.md'.\n"
     "Below are files currently in your directory:\n{current_files}")
)
context_aware_agnostic_note_taker = prelude | agnostic_note_taker_agent
agnostic_note_taking_node = functools.partial(
    agent_node, 
    agent=context_aware_agnostic_note_taker, 
    name="AgnosticNoteTaker"
)

# Document Writer Agent - IMPROVED INSTRUCTIONS
agnostic_doc_writer_agent = create_agent(
    llm, 
    [write_document, edit_document, read_document],
    ("You are a technical writer focused on creating clear, accurate, and platform-agnostic content. "
     "IMPORTANT: You MUST save your final draft to a file named 'draft_social_post.md'. "
     "This specific filename is critical as other teams depend on it. "
     "Create a well-structured post based on the research findings. "
     "Below are files currently in your directory:\n{current_files}")
)
context_aware_agnostic_doc_writer = prelude | agnostic_doc_writer_agent
agnostic_doc_writing_node = functools.partial(
    agent_node, 
    agent=context_aware_agnostic_doc_writer, 
    name="AgnosticDocWriter"
)

# Copy Editor Agent
agnostic_copy_editor_agent = create_agent(
    llm, 
    [read_document, edit_document],
    ("You are an expert copy editor who focuses on fixing grammar, spelling, and tone issues. "
     "Read the draft post and make necessary edits to improve readability and correctness. "
     "The final edited version must remain in the file 'draft_social_post.md'. "
     "Below are files currently in your directory:\n{current_files}")
)
context_aware_agnostic_copy_editor = prelude | agnostic_copy_editor_agent
agnostic_copy_editing_node = functools.partial(
    agent_node, 
    agent=context_aware_agnostic_copy_editor, 
    name="AgnosticCopyEditor"
)

# Supervisor for the Agnostic Authoring Team
agnostic_authoring_supervisor = create_team_supervisor(
    llm,
    ("You are a supervisor tasked with managing content creation for a scientific paper post. "
     "Your team members are: {team_members}. "
     "Follow this precise workflow:"
     "1. First, route to AgnosticNoteTaker to create an outline."
     "2. Next, route to AgnosticDocWriter to write the full post."
     "3. Finally, route to AgnosticCopyEditor to refine the post."
     "CRITICAL: Ensure the AgnosticDocWriter saves the post as 'draft_social_post.md'."
     "After all team members have completed their work, respond with FINISH."),
    ["AgnosticNoteTaker", "AgnosticDocWriter", "AgnosticCopyEditor"]
)

# Graph for the Agnostic Authoring Team
agnostic_authoring_graph = StateGraph(DocWritingState)
agnostic_authoring_graph.add_node("AgnosticNoteTaker", agnostic_note_taking_node)
agnostic_authoring_graph.add_node("AgnosticDocWriter", agnostic_doc_writing_node)
agnostic_authoring_graph.add_node("AgnosticCopyEditor", agnostic_copy_editing_node)
agnostic_authoring_graph.add_node("supervisor", agnostic_authoring_supervisor)

# Add edges
agnostic_authoring_graph.add_edge("AgnosticNoteTaker", "supervisor")
agnostic_authoring_graph.add_edge("AgnosticDocWriter", "supervisor")
agnostic_authoring_graph.add_edge("AgnosticCopyEditor", "supervisor")

agnostic_authoring_graph.add_conditional_edges(
    "supervisor", 
    route_next,
    {
        "AgnosticNoteTaker": "AgnosticNoteTaker", 
        "AgnosticDocWriter": "AgnosticDocWriter",
        "AgnosticCopyEditor": "AgnosticCopyEditor", 
        "FINISH": END
    }
)
agnostic_authoring_graph.set_entry_point("supervisor")
compiled_agnostic_authoring_graph = agnostic_authoring_graph.compile()

# Authoring Chain
def enter_agnostic_authoring_chain(message: str):
    return {
        "messages": [HumanMessage(content=message)],
        "team_members": ", ".join(["AgnosticNoteTaker", "AgnosticDocWriter", "AgnosticCopyEditor"])
    }

agnostic_authoring_chain = enter_agnostic_authoring_chain | compiled_agnostic_authoring_graph


In [12]:
# ==================== VERIFICATION TEAM ====================

# Fact Checker Agent
fact_checker_agent = create_agent(
    llm,
    [read_document], 
    ("You are a meticulous fact-checker. Your task is to read the draft social media post "
     "in 'draft_social_post.md'. If this file doesn't exist, report that it's missing. "
     "Compare the post content against the research findings provided earlier. "
     "Ensure all claims are accurate and supported by the research. "
     "Be specific in your response - state 'FACT CHECK: APPROVED' if accurate, "
     "or 'FACT CHECK: ISSUES FOUND' followed by the specific inaccuracies."
     "\nBelow are files currently in your directory:\n{current_files}")
)
context_aware_fact_checker = prelude | fact_checker_agent
fact_checking_node = functools.partial(
    agent_node, 
    agent=context_aware_fact_checker,
    name="FactChecker"
)

# Style Validator Agent
style_validator_agent = create_agent(
    llm,
    [read_document],
    ("You are a social media expert. Your task is to read the draft post "
     "in 'draft_social_post.md'. If this file doesn't exist, report that it's missing. "
     "Verify that its tone, style, length, and formatting are appropriate for {target_platform}. "
     "Be specific in your response - state 'STYLE CHECK: APPROVED' if it fits the platform, "
     "or 'STYLE CHECK: ISSUES FOUND' followed by the specific style issues."
     "\nBelow are files currently in your directory:\n{current_files}")
)
context_aware_style_validator = prelude_verification | style_validator_agent
style_validation_node = functools.partial(
    agent_node, 
    agent=context_aware_style_validator, 
    name="StyleValidator"
)

# Verification Team Supervisor
verification_supervisor = create_team_supervisor(
    llm,
    ("You are a supervisor for the Verification Team. Your team consists of: {team_members}. "
     "Your goal is to ensure the draft post in 'draft_social_post.md' is factually correct and "
     "stylistically appropriate for {target_platform}. "
     "First, route to the FactChecker. Then, only after the FactChecker has completed, "
     "route to the StyleValidator. "
     "After BOTH agents have approved the post, respond with FINISH. "
     "If any issues are found, report them and then respond with FINISH."),
    ["FactChecker", "StyleValidator"]
)

# Verification Graph
verification_graph = StateGraph(VerificationState)
verification_graph.add_node("FactChecker", fact_checking_node)
verification_graph.add_node("StyleValidator", style_validation_node)
verification_graph.add_node("supervisor", verification_supervisor)

# Add edges
verification_graph.add_edge("FactChecker", "supervisor")
verification_graph.add_edge("StyleValidator", "supervisor")
verification_graph.add_conditional_edges(
    "supervisor",
    route_next,
    {
        "FactChecker": "FactChecker",
        "StyleValidator": "StyleValidator",
        "FINISH": END
    }
)
verification_graph.set_entry_point("supervisor")
compiled_verification_graph = verification_graph.compile()

def enter_verification_chain(state: State):
    """Prepare the state for the verification team."""
    last_message = state['messages'][-1]
    
    # Check for specific files we expect
    written_files = [f.relative_to(WORKING_DIRECTORY) for f in WORKING_DIRECTORY.rglob("*")]
    post_file_exists = any("draft_social_post.md" in str(f) for f in written_files)
    
    current_files_str = "\n".join([f" - {f}" for f in written_files]) if written_files else "No files found."
    
    # Add information about expected files
    if not post_file_exists:
        current_files_str += "\n\nWARNING: Expected file 'draft_social_post.md' not found!"

    return {
        "messages": [last_message], 
        "team_members": "FactChecker, StyleValidator", 
        "target_platform": PLATFORM,
        "current_files": current_files_str
    }

In [13]:
# ==================== TOP-LEVEL SUPERVISOR AND GRAPH ====================

# Top-level Supervisor
top_level_supervisor = create_team_supervisor(
    llm,
    ("You are a top-level supervisor managing a multi-team workflow to create posts about scientific papers. "
     "You MUST follow this EXACT sequence in order WITHOUT SKIPPING ANY TEAM: "
     "1. FIRST: 'Research team' gathers facts about the paper. "
     "2. NEXT: 'Agnostic Authoring team' creates a draft post saved to 'draft_social_post.md'. "
     "3. FINALLY: 'Verification team' checks the draft. "
     "\n\nSTRICTLY ENFORCE this sequence. You CANNOT skip any team. "
     "You MUST NOT route to 'FINISH' until ALL THREE teams have completed their work in sequence. "
     "Even if a team reports problems, you must complete the entire sequence."),
    ["Research team", "Agnostic Authoring team", "Verification team"]
)

# Node functions for super graph
def run_research_chain(state: State):
    """Run the research chain with the last message."""
    last_message_content = state['messages'][-1].content
    result = research_chain.invoke(last_message_content)
    return {"messages": result["messages"]}

def run_agnostic_authoring_chain(state: State):
    """Run the agnostic authoring chain with the last message."""
    last_message_content = state['messages'][-1].content
    result = agnostic_authoring_chain.invoke(last_message_content)
    return {"messages": result["messages"]}

# Main Workflow Graph
super_graph = StateGraph(State)
super_graph.add_node("Research team", run_research_chain)
super_graph.add_node("Agnostic Authoring team", run_agnostic_authoring_chain)

# For verification, we chain preparation and execution
verification_node_chain = enter_verification_chain | compiled_verification_graph
super_graph.add_node("Verification team", verification_node_chain)

super_graph.add_node("supervisor", top_level_supervisor)

# Add edges
super_graph.add_edge("Research team", "supervisor")
super_graph.add_edge("Agnostic Authoring team", "supervisor")
super_graph.add_edge("Verification team", "supervisor")

# Add conditional edges
super_graph.add_conditional_edges(
    "supervisor",
    route_next,
    {
        "Research team": "Research team",
        "Agnostic Authoring team": "Agnostic Authoring team",
        "Verification team": "Verification team",
        "FINISH": END
    }
)
super_graph.set_entry_point("supervisor")
compiled_super_graph = super_graph.compile()

In [14]:
# ==================== EXECUTION ====================

def run_workflow(query, platform=None):
    """Run the entire workflow with the given query and improved debugging."""
    global WORKING_DIRECTORY, PLATFORM
    
    # Update platform if specified
    if platform is not None:
        PLATFORM = platform
    
    # Create a new working directory for this run
    WORKING_DIRECTORY = Path(create_random_subdirectory())
    print(f"Using working directory: {WORKING_DIRECTORY}")
    
    print("Starting graph execution...")
    
    initial_message = HumanMessage(
        content=f"Write a post about the paper '{query}'. "
                f"Use the standard process: Research, create a neutral draft, and then verify it "
                f"for factual accuracy and suitability for the {PLATFORM} platform. Save the final post."
    )
    
    try:
        # Add step tracking for debugging
        current_step = "Initializing"
        step_completion = {
            "Research team": False,
            "Agnostic Authoring team": False,
            "Verification team": False
        }
        
        for s in compiled_super_graph.stream(
            {
                "messages": [initial_message],
            },
            {"recursion_limit": 50},
        ):
            if "__end__" not in s:
                # Track steps for debugging
                if "supervisor" in s:
                    next_step = s["supervisor"].get("next", "UNKNOWN")
                    if next_step != "FINISH":
                        current_step = next_step
                        print(f"\n--- MOVING TO: {current_step} ---\n")
                    else:
                        print(f"\n--- WORKFLOW ATTEMPTING TO FINISH ---\n")
                        print(f"Step completion status: {step_completion}")
                
                # Mark steps as complete when we see their output
                for team in step_completion.keys():
                    if team in s:
                        step_completion[team] = True
                        print(f"Completed: {team}")
                
                print(s)
                print("---")
        
        print("Graph execution finished.")
        print(f"Final step completion status: {step_completion}")

    except Exception as e:
        import traceback
        print("\nAn error occurred during graph execution:")
        print(traceback.format_exc())

    # List and read files
    print("\nFiles in working directory:")
    found_post = False
    for item in WORKING_DIRECTORY.iterdir():
        print(f"- {item.name}")
        if item.name == "draft_social_post.md":
            found_post = True
            print("\nFinal post content:")
            with open(item, "r") as f:
                print(f.read())
    
    if not found_post:
        print("\nWARNING: draft_social_post.md was not created!")
        
    return found_post

In [15]:
# LinkedIn version
result1 = run_workflow("Extending Llama-3's Context Ten-Fold Overnight", platform="LinkedIn")

Using working directory: content/data/5e9ac346
Starting graph execution...

--- MOVING TO: Research team ---

{'supervisor': {'next': 'Research team'}}
---
Completed: Research team
{'Research team': {'messages': [HumanMessage(content="Write a post about the paper 'Extending Llama-3's Context Ten-Fold Overnight'. Use the standard process: Research, create a neutral draft, and then verify it for factual accuracy and suitability for the LinkedIn platform. Save the final post.", additional_kwargs={}, response_metadata={}), HumanMessage(content="### Title\n**Extending Llama-3's Context Ten-Fold Overnight**\n\n### Authors\nThe specific authors are not listed in the search results. Further reviews or the original paper may provide this information.\n\n### Publication Date\n**April 30, 2024**\n\n### Key Findings\n- The research successfully extended the context length of the Llama-3-8B-Instruct model from **8,000 tokens to 80,000 tokens**.\n- This extension enables the model to process and und

In [16]:
#  Twitter version
result2 = run_workflow("Extending Llama-3's Context Ten-Fold Overnight", platform="Twitter") 


Using working directory: content/data/9a54acd1
Starting graph execution...

--- MOVING TO: Research team ---

{'supervisor': {'next': 'Research team'}}
---
Completed: Research team
{'Research team': {'messages': [HumanMessage(content="Write a post about the paper 'Extending Llama-3's Context Ten-Fold Overnight'. Use the standard process: Research, create a neutral draft, and then verify it for factual accuracy and suitability for the Twitter platform. Save the final post.", additional_kwargs={}, response_metadata={}), HumanMessage(content="# Paper Information\n\n## Title\nExtending Llama-3's Context Ten-Fold Overnight\n\n## Authors\n(Not specified in the search results)\n\n## Publication Date\nApril 30, 2024\n\n## Key Findings\n- The study successfully extended the context length of the Llama-3-8B-Instruct model from 8,000 tokens to 80,000 tokens.\n- The new configuration allows the model to process and understand significantly larger pieces of text, enhancing its usability in long-con

In [17]:
# Medium version
result3 = run_workflow("Extending Llama-3's Context Ten-Fold Overnight", platform="Medium")

Using working directory: content/data/8cdeb2ef
Starting graph execution...

--- MOVING TO: Research team ---

{'supervisor': {'next': 'Research team'}}
---
Completed: Research team
{'Research team': {'messages': [HumanMessage(content="Write a post about the paper 'Extending Llama-3's Context Ten-Fold Overnight'. Use the standard process: Research, create a neutral draft, and then verify it for factual accuracy and suitability for the Medium platform. Save the final post.", additional_kwargs={}, response_metadata={}), HumanMessage(content="# Extending Llama-3's Context Ten-Fold Overnight\n\n## Title\nExtending Llama-3's Context Ten-Fold Overnight\n\n## Authors\nNot explicitly listed in the search results.\n\n## Publication Date\nApril 30, 2024\n\n## Key Findings\n- The study presents a method to significantly increase the context length of the Llama-3-8B-Instruct model from 8,000 tokens to 80,000 tokens.\n- This enhancement allows the model to process much longer text effectively.\n- Th

In [18]:
result4  = run_workflow("Offline Reinforcement Learning", platform="LinkedIn")

Using working directory: content/data/f56625b2
Starting graph execution...

--- MOVING TO: Research team ---

{'supervisor': {'next': 'Research team'}}
---
Completed: Research team
{'Research team': {'messages': [HumanMessage(content="Write a post about the paper 'Offline Reinforcement Learning'. Use the standard process: Research, create a neutral draft, and then verify it for factual accuracy and suitability for the LinkedIn platform. Save the final post.", additional_kwargs={}, response_metadata={}), HumanMessage(content='## Offline Reinforcement Learning: Key Information\n\n### Title\n- **"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems"**\n\n### Authors\n- The paper does not specify unique authors in the search result. However, it is commonly attributed to a collaboration of multiple authors in the reinforcement learning community.\n\n### Publication Date\n- The paper was initially submitted to arXiv on **May 5, 2020**.\n\n### Key Findings\n- Th

In [19]:
result5  = run_workflow("Offline Reinforcement Learning", platform="Tweeter")

Using working directory: content/data/d28bb8a0
Starting graph execution...

--- MOVING TO: Research team ---

{'supervisor': {'next': 'Research team'}}
---
Completed: Research team
{'Research team': {'messages': [HumanMessage(content="Write a post about the paper 'Offline Reinforcement Learning'. Use the standard process: Research, create a neutral draft, and then verify it for factual accuracy and suitability for the Tweeter platform. Save the final post.", additional_kwargs={}, response_metadata={}), HumanMessage(content="# Paper Information\n\n### Title\n**Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems**\n\n### Authors\nThe paper does not list specific authors in the extracted data. It's advisable to check the original paper for a complete authorship list.\n\n### Publication Date\nThe submission history indicates the paper can be found on arXiv, with revisions. The original version was submitted on May 5, 2020.\n\n### Key Findings\n- **Concept**: