TASK 7

In [2]:
# langgraph_dual_llm_history_with_checkpoint.py
#
# Dual-LLM agent (Llama <-> Qwen) with shared canonical history, sanitization,
# and persistent checkpointing for crash recovery.
#
# Interactive commands:
#   verbose / quiet   -- toggle tracing
#   history           -- print canonical history
#   reset             -- clear history (keeps initial system message)
#   save_and_exit     -- save checkpoint and exit cleanly
#   quit / exit / q   -- exit (state saved automatically)
#
# NOTE: SIGINT and SIGTERM are trapped and will trigger a checkpoint save before exit.

import os
import sys
import json
import re
import signal
import atexit
import tempfile
import traceback
from typing import TypedDict, List, Dict, Any, Tuple

# Defensive imports (torch / transformers). If unavailable the script runs with fallback models.
TRANSFORMERS_OK = True
try:
    import torch
except Exception as e:
    TRANSFORMERS_OK = False
    print("[WARN] torch import failed; running in fallback mode.")
    traceback.print_exception(type(e), e, e.__traceback__, file=sys.stdout)

if TRANSFORMERS_OK:
    try:
        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
    except Exception as e:
        TRANSFORMERS_OK = False
        print("[WARN] transformers import failed; running in fallback mode.")
        traceback.print_exception(type(e), e, e.__traceback__, file=sys.stdout)

# langgraph
try:
    from langgraph.graph import StateGraph, START, END
except Exception as e:
    print("[ERROR] Could not import langgraph.graph. Install langgraph or run in an environment with it available.")
    traceback.print_exception(type(e), e, e.__traceback__, file=sys.stdout)
    raise

# -------------------------
# Types & checkpoint config
# -------------------------
SpeakerMessage = Dict[str, str]  # {"speaker": "Human"|"Llama"|"Qwen"|"Tool"|"System", "content": "..."}

class AgentState(TypedDict):
    user_input: str
    should_exit: bool
    last_model: str
    verbose: bool
    messages: List[SpeakerMessage]
    llama_response: str
    qwen_response: str

CHECKPOINT_PATH = os.environ.get("LG_CHECKPOINT_PATH", "lg_checkpoint.json")

# -------------------------
# Checkpoint helpers
# -------------------------
def atomic_write_json(path: str, data: dict):
    """Write JSON atomically to avoid partial files."""
    dirnm = os.path.dirname(os.path.abspath(path)) or "."
    with tempfile.NamedTemporaryFile(mode="w", dir=dirnm, delete=False, encoding="utf-8") as tf:
        json.dump(data, tf, ensure_ascii=False, indent=2)
        tmpname = tf.name
    # Rename to target atomically
    os.replace(tmpname, path)

def save_checkpoint(state: AgentState, path: str = CHECKPOINT_PATH):
    """Persist minimal state needed for recovery: messages, verbose, last_model."""
    try:
        data = {
            "messages": state.get("messages", []),
            "verbose": bool(state.get("verbose", False)),
            "last_model": state.get("last_model", ""),
        }
        atomic_write_json(path, data)
        if state.get("verbose", False):
            print(f"[TRACE] Checkpoint saved to {path} (messages={len(data['messages'])})")
    except Exception as e:
        print("[ERROR] Failed to save checkpoint:", e)
        traceback.print_exc()

def load_checkpoint(path: str = CHECKPOINT_PATH) -> Dict[str, Any]:
    """Load checkpoint if available; return dict with keys messages, verbose, last_model."""
    if not os.path.exists(path):
        return {"messages": None, "verbose": False, "last_model": ""}
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
        # Validate shape
        messages = data.get("messages", [])
        verbose = bool(data.get("verbose", False))
        last_model = data.get("last_model", "")
        return {"messages": messages, "verbose": verbose, "last_model": last_model}
    except Exception as e:
        print("[ERROR] Failed to load checkpoint:", e)
        traceback.print_exc()
        return {"messages": None, "verbose": False, "last_model": ""}

# Ensure save on process exit
def _on_exit_save(state: AgentState):
    try:
        # state may be mutated; just attempt to persist what's currently set
        save_checkpoint(state)
    except Exception:
        pass

# We'll register signal handlers below once we have the running state.

# -------------------------
# Sanitization utilities
# -------------------------
def sanitize_model_output(text: Any, speaker: str) -> str:
    if text is None:
        return ""
    if not isinstance(text, str):
        try:
            text = str(text)
        except Exception:
            text = ""
    text = text.strip()
    sp = re.escape(speaker)
    text = re.sub(rf'^(?:\s*(?:{sp}|Assistant)\s*:)+\s*', '', text, flags=re.IGNORECASE)
    text = re.sub(rf'(?:\s*(?:{sp}|Assistant)\s*:\s*)+$', '', text, flags=re.IGNORECASE)
    text = re.sub(r'\n\s*\n+', '\n\n', text).strip()
    return text

def ensure_speaker_prefix(content: str, speaker_label: str) -> str:
    if content is None:
        content = ""
    content = content.strip()
    if content == "":
        return content
    if re.match(rf'^\s*{re.escape(speaker_label)}\s*:', content, flags=re.IGNORECASE):
        return content
    if re.match(r'^\s*Human\s*:', content, flags=re.IGNORECASE):
        return content
    return f"{speaker_label}: {content}"

# -------------------------
# Device selection, fallback models, HF loader
# -------------------------
def get_device() -> str:
    if TRANSFORMERS_OK:
        try:
            if torch.cuda.is_available():
                print("Using CUDA (NVIDIA GPU)")
                return "cuda"
            elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
                print("Using MPS (Apple Silicon)")
                return "mps"
        except Exception:
            pass
    print("Using CPU")
    return "cpu"

class SimpleFallbackModel:
    def __init__(self, name: str):
        self.name = name
    def invoke(self, prompt: str) -> str:
        preview = (prompt.replace("\n", " ")[:240] + "...") if prompt else "[no prompt]"
        return f"[{self.name.upper()}-FALLBACK] No HF model available. Prompt preview: {preview}"

class PipelineAdapter:
    def __init__(self, pipe):
        self.pipe = pipe
    def invoke(self, prompt: str) -> str:
        try:
            out = self.pipe(prompt)
            if isinstance(out, list) and len(out) > 0 and isinstance(out[0], dict):
                txt = out[0].get("generated_text")
                if isinstance(txt, str):
                    return txt
            if isinstance(out, str):
                return out
            return str(out)
        except Exception as e:
            return f"[MODEL-ERROR] pipeline invocation failed: {e}"

def load_and_wrap(model_id: str, device: str):
    if not TRANSFORMERS_OK:
        raise RuntimeError("Transformers not available")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=(None if device == "cpu" else None),
        device_map="auto" if device == "cuda" else None,
    )
    if device == "mps":
        try:
            model = model.to(device)
        except Exception:
            pass
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        pad_token_id=getattr(tokenizer, "eos_token_id", None),
    )
    return PipelineAdapter(pipe)

def create_models() -> Tuple[Any, Any]:
    device = get_device()
    # Llama
    if TRANSFORMERS_OK:
        try:
            print("Loading Llama (meta-llama/Llama-3.2-1B-Instruct) ...")
            llama_llm = load_and_wrap("meta-llama/Llama-3.2-1B-Instruct", device)
            print("Llama loaded.")
        except Exception as e:
            print("[WARN] Failed to load Llama, falling back to gpt2 or SimpleFallbackModel:", e)
            traceback.print_exc()
            try:
                llama_llm = load_and_wrap("gpt2", device)
                print("gpt2 loaded as Llama fallback.")
            except Exception:
                llama_llm = SimpleFallbackModel("llama")
    else:
        llama_llm = SimpleFallbackModel("llama")

    # Qwen - attempt but allow disabled (None)
    qwen_llm = None
    if TRANSFORMERS_OK:
        try:
            print("Attempting to load Qwen (Qwen/Qwen2.5-1.5B-Instruct) ...")
            qwen_llm = load_and_wrap("Qwen/Qwen2.5-1.5B-Instruct", device)
            print("Qwen loaded.")
        except Exception as e:
            print("[WARN] Could not load Qwen (it will be treated as disabled):", e)
            traceback.print_exc()
            qwen_llm = None
    else:
        qwen_llm = None

    return llama_llm, qwen_llm

# -------------------------
# Role-message conversion & prompt building
# -------------------------
# --- REPLACEMENT: build_role_messages_for_target ---
def build_role_messages_for_target(target: str, canonical_messages: List[SpeakerMessage]) -> List[Dict[str, str]]:
    """
    Convert canonical history into role-messages for a given target ('Llama' or 'Qwen').

    Differences from prior version:
     - DO NOT include canonical 'System' messages here; the caller will prepend exactly one
       target-specific system prompt (system_message_for_target). This prevents duplicate system lines.
     - Human -> role:user with content "Human: <content>"
     - Prior LLM utterance -> assistant if it was from target; otherwise user with "<Speaker>: <content>"
     - Uses ensure_speaker_prefix to avoid double-labeling.
    """
    role_msgs: List[Dict[str, str]] = []

    # iterate canonical messages but skip system messages (they will be added separately)
    for m in canonical_messages:
        sp = m.get("speaker", "")
        content = m.get("content", "") or ""
        sp_norm = sp.lower()

        if sp_norm == "system":
            # Skip canonical system entries here; the per-target system prompt is injected by the caller.
            continue

        if sp_norm == "tool":
            role_msgs.append({"role": "tool", "content": content})
            continue

        if sp_norm == "human":
            # Human messages become user messages prefixed with "Human: "
            cont = content
            if not re.match(r'^\s*Human\s*:', cont, flags=re.IGNORECASE):
                cont = f"Human: {cont}"
            role_msgs.append({"role": "user", "content": cont})
            continue

        # For prior LLM utterances (Llama/Qwen), avoid double prefixes
        speaker_label = sp  # e.g., "Llama" or "Qwen"
        prefixed = ensure_speaker_prefix(content, speaker_label)
        if speaker_label.lower() == target.lower():
            # prior utterance by same model -> assistant role
            role_msgs.append({"role": "assistant", "content": prefixed})
        else:
            # prior utterance by other model -> user role with speaker prefix
            role_msgs.append({"role": "user", "content": prefixed})

    return role_msgs


# --- REPLACEMENT: prompt_from_role_messages ---
def prompt_from_role_messages(role_messages: List[Dict[str, str]]) -> str:
    """
    Build prompt text from role-based messages.
    - system messages should already have been placed first by the caller (if any).
    - For prior assistant messages: include their content verbatim (they are already prefixed
      like "Llama: ..." or "Qwen: ...") â€” DO NOT prefix with "Assistant: " again.
    - For user/tool messages: format as "User: ..." / "[Tool] ...".
    - End the prompt with a single "Assistant:" cue for the model to complete.
    """
    lines: List[str] = []

    # system lines first (if present)
    for rm in role_messages:
        if rm["role"] == "system":
            lines.append(f"[System] {rm['content']}")

    # then the rest in order
    for rm in role_messages:
        role = rm["role"]
        content = rm["content"]
        if role == "system":
            continue
        if role == "user":
            lines.append(f"User: {content}")
        elif role == "tool":
            lines.append(f"[Tool] {content}")
        elif role == "assistant":
            # IMPORTANT: do not prefix with "Assistant:" here because content already carries speaker prefix.
            # Example: content might be "Llama: Vanilla is best." â€” include verbatim.
            lines.append(content)
    # final cue for the model to respond
    lines.append("Assistant:")
    return "\n".join(lines)

def system_message_for_target(target: str) -> SpeakerMessage:
    if target.lower() == "llama":
        return {"speaker": "System", "content": "You are Llama (assistant). Participants: Human and Qwen. When others speak, their name will be prefixed (e.g. 'Qwen: ...'). Answer helpfully and concisely."}
    else:
        return {"speaker": "System", "content": "You are Qwen (assistant). Participants: Human and Llama. When others speak, their name will be prefixed (e.g. 'Llama: ...'). Answer helpfully and concisely."}

# -------------------------
# Graph & nodes (with checkpointing calls)
# -------------------------
def create_graph(llama_llm, qwen_llm):
    def get_user_input(state: AgentState) -> dict:
        if state.get("verbose", False):
            print("[TRACE] Entering get_user_input")
        print("\n" + "=" * 60)
        print("Enter text (or 'quit' to exit). Type 'verbose' or 'quiet' to toggle tracing.")
        print("Special commands: history, reset, save_and_exit")
        print("=" * 60)
        print("> ", end="")
        raw = input()

        low = raw.strip().lower()
        if low == "verbose":
            return {"user_input": raw, "should_exit": False, "verbose": True}
        if low == "quiet":
            return {"user_input": raw, "should_exit": False, "verbose": False}
        if low in ("quit", "exit", "q"):
            return {"user_input": raw, "should_exit": True}
        if low == "history":
            msgs = state.get("messages", [])
            print("\n[HISTORY] canonical messages (most recent last):")
            for m in msgs:
                print(f"  {m.get('speaker')}: {m.get('content')}")
            return {"user_input": "", "should_exit": False}
        if low == "reset":
            sys_msg = {"speaker": "System", "content": "You are an assistant participating in a multi-agent chat: Human, Llama, Qwen."}
            print("[NOTICE] History reset.")
            new_state = {"user_input": "", "should_exit": False, "messages": [sys_msg], "last_model": ""}
            # Save checkpoint immediately so reset is durable
            save_checkpoint(new_state)  # safe: save minimal fields
            return new_state
        if low == "save_and_exit":
            # persist and exit gracefully
            new_state = {"user_input": raw, "should_exit": True}
            # Save full state now
            full_state = {
                "messages": state.get("messages", []),
                "verbose": bool(state.get("verbose", False)),
                "last_model": state.get("last_model", "")
            }
            atomic_write_json(CHECKPOINT_PATH, full_state)
            print(f"[NOTICE] Checkpoint saved to {CHECKPOINT_PATH}; exiting.")
            sys.exit(0)

        # Normal input - append human message to canonical history
        msgs = list(state.get("messages", []))
        msgs.append({"speaker": "Human", "content": raw})

        # immediately persist the human turn before heavy model work (durable checkpoint)
        new_state = {"user_input": raw, "should_exit": False, "messages": msgs, "last_model": ""}
        try:
            # Save checkpoint with current canonical messages & flags
            ck = {"messages": msgs, "verbose": bool(state.get("verbose", False)), "last_model": ""}
            atomic_write_json(CHECKPOINT_PATH, ck)
            if state.get("verbose", False):
                print(f"[TRACE] Checkpoint saved after human turn (messages={len(msgs)})")
        except Exception as e:
            print("[ERROR] Failed to checkpoint after human input:", e)
            traceback.print_exc()

        return new_state

    def route_after_input(state: AgentState) -> str:
        if state.get("verbose", False):
            print("[TRACE] route_after_input - user_input:", repr(state.get("user_input")))
        if state.get("should_exit", False):
            return END
        raw = str(state.get("user_input", "") or "")
        if raw.strip() == "":
            print("[NOTICE] Empty input received â€” please type something.")
            return "get_user_input"
        return "call_model"

    def call_model(state: AgentState) -> dict:
        if state.get("verbose", False):
            print("[TRACE] call_model invoked")

        raw = str(state.get("user_input", "") or "")
        canonical_msgs = list(state.get("messages", []))

        # Determine target
        target_initial = "Qwen" if raw.lstrip().lower().startswith("hey qwen") else "Llama"
        target = target_initial

        model_obj = qwen_llm if target.lower() == "qwen" else llama_llm
        if target.lower() == "qwen" and model_obj is None:
            # append tool notice and route to Llama
            canonical_msgs.append({"speaker": "Tool", "content": "Qwen is disabled in this runtime; routed to Llama instead."})
            if state.get("verbose", False):
                print("[TRACE] Qwen disabled; appended Tool message and routing to Llama")
            target = "Llama"
            model_obj = llama_llm

        # Build role messages for target and prepend system message
        role_msgs = build_role_messages_for_target(target, canonical_msgs)
        sys_msg = system_message_for_target(target)
        role_msgs_with_sys = [{"role": "system", "content": sys_msg["content"]}] + role_msgs

        # If original human message started with Hey Qwen, remove that trigger from the prompt copy
        if raw.lstrip().lower().startswith("hey qwen"):
            role_msgs_mod = [dict(rm) for rm in role_msgs]
            for i in range(len(role_msgs_mod) - 1, -1, -1):
                rm = role_msgs_mod[i]
                if rm["role"] == "user" and rm["content"].lower().startswith("human:"):
                    after = rm["content"][len("Human:"):].lstrip()
                    if after.lower().startswith("hey qwen"):
                        new_after = after[len("hey qwen"):].lstrip()
                        rm["content"] = f"Human: {new_after}" if new_after != "" else rm["content"]
                    break
            role_msgs_with_sys = [{"role": "system", "content": sys_msg["content"]}] + role_msgs_mod

        prompt_text = prompt_from_role_messages(role_msgs_with_sys)
        if state.get("verbose", False):
            print("[TRACE] Prompt for target", target, "(truncated):")
            print(prompt_text[:1200])

        # Call the model (may take time)
        try:
            response_text = model_obj.invoke(prompt_text)
        except Exception as e:
            response_text = f"[MODEL-ERROR] {e}"
            print("[ERROR] Model invocation failed:", e)
            traceback.print_exc()

        # Sanitize model output and append
        response_text_clean = sanitize_model_output(response_text, target)
        canonical_msgs.append({"speaker": target, "content": response_text_clean})

        # Save checkpoint after model reply (durable)
        try:
            ck = {"messages": canonical_msgs, "verbose": bool(state.get("verbose", False)), "last_model": target}
            atomic_write_json(CHECKPOINT_PATH, ck)
            if state.get("verbose", False):
                print(f"[TRACE] Checkpoint saved after model reply (messages={len(canonical_msgs)})")
        except Exception as e:
            print("[ERROR] Failed to checkpoint after model reply:", e)
            traceback.print_exc()

        # Return updated state
        llama_snip = response_text_clean if target.lower() == "llama" else ""
        qwen_snip = response_text_clean if target.lower() == "qwen" else ""
        return {
            "messages": canonical_msgs,
            "last_model": target,
            "llama_response": llama_snip,
            "qwen_response": qwen_snip
        }

    def print_response(state: AgentState) -> dict:
        if state.get("verbose", False):
            print("[TRACE] print_response invoked; last_model=", state.get("last_model"))

        last_model = state.get("last_model", "")
        llama_text = (state.get("llama_response") or "").strip()
        qwen_text = (state.get("qwen_response") or "").strip()

        printed = False
        if last_model.lower() == "llama" and llama_text:
            print("\n" + "=" * 70)
            print("ðŸ¦™ LLaMA Response")
            print("=" * 70)
            print(llama_text)
            printed = True
        elif last_model.lower() == "qwen" and qwen_text:
            print("\n" + "=" * 70)
            print("ðŸ§  Qwen Response")
            print("=" * 70)
            print(qwen_text)
            printed = True
        else:
            msgs = state.get("messages", []) or []
            if msgs:
                last_msg = msgs[-1]
                sp = last_msg.get("speaker", "")
                cont = (last_msg.get("content") or "").strip()
                if sp.lower() in ("llama", "qwen") and cont:
                    header = "ðŸ¦™ LLaMA Response" if sp.lower() == "llama" else "ðŸ§  Qwen Response"
                    print("\n" + "=" * 70)
                    print(header)
                    print("=" * 70)
                    print(cont)
                    printed = True

        if not printed:
            print("\n[NOTICE] No model produced output this turn.")
            print("[DIAGNOSTIC] last_model:", repr(last_model))
            msgs = state.get("messages", [])
            print("[DIAGNOSTIC] messages count:", len(msgs))
            if len(msgs) > 0:
                print("[DIAGNOSTIC] last messages (most recent last):")
                for m in msgs[-6:]:
                    print(f"  {m.get('speaker')}: {m.get('content')!s}")

        # Clear snippet fields
        return {"last_model": "", "llama_response": "", "qwen_response": ""}

    # Graph construction
    graph = StateGraph(AgentState)
    graph.add_node("get_user_input", get_user_input)
    graph.add_node("call_model", call_model)
    graph.add_node("print_response", print_response)

    graph.add_edge(START, "get_user_input")
    graph.add_conditional_edges(
        "get_user_input",
        route_after_input,
        {
            "get_user_input": "get_user_input",
            "call_model": "call_model",
            END: END,
        },
    )
    graph.add_edge("call_model", "print_response")
    graph.add_edge("print_response", "get_user_input")

    return graph.compile()

# -------------------------
# Main
# -------------------------
def main():
    print("=" * 80)
    print("Dual-LLM LangGraph Agent with Checkpointing (Llama <-> Qwen)")
    print("=" * 80)

    # Load checkpoint if present
    ck = load_checkpoint()
    restored_messages = ck.get("messages")
    restored_verbose = ck.get("verbose", False)
    restored_last_model = ck.get("last_model", "")

    # Create models
    llama_llm, qwen_llm = create_models()
    graph = create_graph(llama_llm, qwen_llm)

    # Build initial state from checkpoint or defaults
    if restored_messages is None:
        system_msg = {"speaker": "System", "content": "You are an assistant participating in a multi-agent chat: Human, Llama, Qwen."}
        initial_messages = [system_msg]
    else:
        initial_messages = restored_messages

    initial_state: AgentState = {
        "user_input": "",
        "should_exit": False,
        "last_model": restored_last_model or "",
        "verbose": bool(restored_verbose),
        "messages": initial_messages,
        "llama_response": "",
        "qwen_response": "",
    }

    # Register exit handlers that persist the current checkpoint (wrap in closure)
    def _save_on_exit():
        try:
            state_snapshot = {
                "messages": initial_state.get("messages", []),
                "verbose": bool(initial_state.get("verbose", False)),
                "last_model": initial_state.get("last_model", "")
            }
            atomic_write_json(CHECKPOINT_PATH, state_snapshot)
            print(f"[INFO] Saved final checkpoint to {CHECKPOINT_PATH}")
        except Exception as e:
            print("[ERROR] Could not save final checkpoint on exit:", e)
            traceback.print_exc()

    atexit.register(_save_on_exit)

    # Signal handlers: attempt to save (best-effort)
    def _handle_signal(sig, frame):
        print(f"\n[INFO] Caught signal {sig}; saving checkpoint and exiting...")
        try:
            state_snapshot = {
                "messages": initial_state.get("messages", []),
                "verbose": bool(initial_state.get("verbose", False)),
                "last_model": initial_state.get("last_model", "")
            }
            atomic_write_json(CHECKPOINT_PATH, state_snapshot)
            print(f"[INFO] Checkpoint saved to {CHECKPOINT_PATH}")
        except Exception as e:
            print("[ERROR] Failed to save checkpoint on signal:", e)
            traceback.print_exc()
        # exit after saving
        os._exit(0)

    signal.signal(signal.SIGINT, _handle_signal)
    try:
        signal.signal(signal.SIGTERM, _handle_signal)
    except Exception:
        # some environments don't allow setting SIGTERM
        pass

    if not TRANSFORMERS_OK or qwen_llm is None:
        print("\n[NOTICE] Running with fallbacks or Qwen disabled. Checkpointing still active.\n")

    # Run the graph interactively
    graph.invoke(initial_state)

if __name__ == "__main__":
    main()

Dual-LLM LangGraph Agent with Checkpointing (Llama <-> Qwen)
Using CUDA (NVIDIA GPU)
Loading Llama (meta-llama/Llama-3.2-1B-Instruct) ...


Loading weights:   0%|          | 0/146 [00:00<?, ?it/s]

Llama loaded.
Attempting to load Qwen (Qwen/Qwen2.5-1.5B-Instruct) ...


Loading weights:   0%|          | 0/338 [00:00<?, ?it/s]

Qwen loaded.

Enter text (or 'quit' to exit). Type 'verbose' or 'quiet' to toggle tracing.
Special commands: history, reset, save_and_exit
> 

 Hi


Both `max_new_tokens` (=256) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



ðŸ¦™ LLaMA Response
[System] You are Llama (assistant). Participants: Human and Qwen. When others speak, their name will be prefixed (e.g. 'Qwen: ...'). Answer helpfully and concisely.
User: Human: Hey
Llama: [System] You are Llama (assistant). Participants: Human and Qwen. When others speak, their name will be prefixed (e.g. 'Qwen: ...'). Answer helpfully and concisely.
User: Human: Hey
Assistant: Hello! How can I help you today?
User: Human: Hey Qwen, what about you?
User: Qwen: [System] You are Qwen (assistant). Participants: Human and Llama. When others speak, their name will be prefixed (e.g. 'Llama: ...'). Answer helpfully and concisely.
User: Human: Hey
User: Llama: [System] You are Llama (assistant). Participants: Human and Qwen. When others speak, their name will be prefixed (e.g. 'Qwen: ...'). Answer helpfully and concisely.
User: Human: Hey
Assistant: Hello! How can I help you today?
User: Human: , what about you?
Assistant: As an AI language model, my purpose is to assist 

 quit
