TASK 1

In [10]:
# langgraph_simple_agent.py
# Simple LangGraph agent using a Hugging Face LLM.
# Runtime tracing toggle: type "verbose" to enable tracing, "quiet" to disable.

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_huggingface import HuggingFacePipeline
from langgraph.graph import StateGraph, START, END
from typing import TypedDict


# =============================================================================
# DEVICE SELECTION
# =============================================================================

def get_device():
    """
    Pick best available device: cuda > mps > cpu
    """
    if torch.cuda.is_available():
        print("Using CUDA (NVIDIA GPU) for inference")
        return "cuda"
    elif torch.backends.mps.is_available():
        print("Using MPS (Apple Silicon) for inference")
        return "mps"
    else:
        print("Using CPU for inference")
        return "cpu"


# =============================================================================
# STATE DEFINITION
# =============================================================================

class AgentState(TypedDict):
    """
    State that flows through nodes.
    """
    user_input: str
    should_exit: bool
    llm_response: str
    verbose: bool


# =============================================================================
# LLM CREATION
# =============================================================================

def create_llm():
    """
    Load Llama-3.2-1B-Instruct via Hugging Face, wrap with HuggingFacePipeline.
    Apply defensive fixes to top-level langchain module attributes that langchain_core expects.
    """

    device = get_device()
    model_id = "meta-llama/Llama-3.2-1B-Instruct"

    print(f"Loading model: {model_id}")
    print("This may take a moment on first run...")

    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        dtype=torch.float16 if device != "cpu" else torch.float32,
        device_map=device if device == "cuda" else None,
    )

    if device == "mps":
        # Move model to MPS explicitly
        model = model.to(device)

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id,
    )

    # ------------------------------------------------------------------
    # Defensive compatibility fixes for langchain_core
    # Some installations of `langchain` do not expose top-level attributes
    # that langchain_core expects (verbose, debug, llm_cache). Ensure they exist.
    # ------------------------------------------------------------------
    try:
        import langchain

        # ensure verbose/debug exist
        if not hasattr(langchain, "verbose"):
            langchain.verbose = False
        if not hasattr(langchain, "debug"):
            langchain.debug = False

        # ensure llm_cache exists (langchain_core may check this)
        if not hasattr(langchain, "llm_cache"):
            # Default to None (no global cache). This is safe.
            langchain.llm_cache = None

    except Exception:
        # If anything goes wrong while trying to set attributes, ignore and continue.
        # The worst case is the subsequent LangChain calls may raise errors; those will show up later.
        pass
    # ------------------------------------------------------------------

    llm = HuggingFacePipeline(pipeline=pipe)

    print("Model loaded successfully!")
    return llm


# =============================================================================
# GRAPH CREATION
# =============================================================================

def create_graph(llm):
    """
    Build the LangGraph state graph with nodes:
      - get_user_input
      - call_llm
      - print_response
    and a conditional route after get_user_input.
    """

    # --------------------------
    # Node: get_user_input
    # --------------------------
    def get_user_input(state: AgentState) -> dict:
        if state.get("verbose", False):
            print("[TRACE] Entering node: get_user_input")

        print("\n" + "=" * 50)
        print("Enter your text (or 'quit' to exit):")
        print("=" * 50)
        print("\n> ", end="")

        user_input = input()
        lowered = user_input.strip().lower()

        # Toggle tracing
        if lowered == "verbose":
            print("Verbose tracing enabled.")
            return {"user_input": user_input, "should_exit": False, "verbose": True}

        if lowered == "quiet":
            print("Verbose tracing disabled.")
            return {"user_input": user_input, "should_exit": False, "verbose": False}

        # Exit commands
        if lowered in ["quit", "exit", "q"]:
            print("Goodbye!")
            return {"user_input": user_input, "should_exit": True}

        # Default: continue to LLM; preserve existing verbose flag if present
        return {"user_input": user_input, "should_exit": False}

    # --------------------------
    # Node: call_llm
    # --------------------------
    def call_llm(state: AgentState) -> dict:
        if state.get("verbose", False):
            print("[TRACE] Entering node: call_llm")
            print(f"[TRACE] State input user_input={repr(state.get('user_input'))}")

        user_input = state["user_input"]
        prompt = f"User: {user_input}\nAssistant:"

        if state.get("verbose", False):
            print(f"[TRACE] Prepared prompt (truncated to 120 chars): {repr(prompt[:120])}")
            print("[TRACE] Invoking LLM...")

        print("\nProcessing your input...")

        # Use the wrapped HuggingFacePipeline LLM
        response = llm.invoke(prompt)

        if state.get("verbose", False):
            print("[TRACE] LLM returned response (truncated to 120 chars):")
            print(repr(response[:120]))

        return {"llm_response": response}

    # --------------------------
    # Node: print_response
    # --------------------------
    def print_response(state: AgentState) -> dict:
        if state.get("verbose", False):
            print("[TRACE] Entering node: print_response")
            print(f"[TRACE] llm_response length = {len(state.get('llm_response', ''))}")

        print("\n" + "-" * 50)
        print("LLM Response:")
        print("-" * 50)
        print(state["llm_response"])

        if state.get("verbose", False):
            print("[TRACE] Exiting node: print_response")

        return {}

    # --------------------------
    # Router after input
    # --------------------------
    def route_after_input(state: AgentState) -> str:
        if state.get("verbose", False):
            print("[TRACE] route_after_input evaluating...")
            print(f"[TRACE] should_exit={state.get('should_exit', False)} user_input={repr(state.get('user_input'))}")

        if state.get("should_exit", False):
            if state.get("verbose", False):
                print("[TRACE] route_after_input -> END")
            return END

        if state.get("verbose", False):
            print("[TRACE] route_after_input -> call_llm")
        return "call_llm"

    # Build graph
    graph_builder = StateGraph(AgentState)

    graph_builder.add_node("get_user_input", get_user_input)
    graph_builder.add_node("call_llm", call_llm)
    graph_builder.add_node("print_response", print_response)

    graph_builder.add_edge(START, "get_user_input")

    graph_builder.add_conditional_edges(
        "get_user_input",
        route_after_input,
        {
            "call_llm": "call_llm",
            END: END
        }
    )

    graph_builder.add_edge("call_llm", "print_response")
    graph_builder.add_edge("print_response", "get_user_input")

    graph = graph_builder.compile()
    return graph


# =============================================================================
# SAVE GRAPH IMAGE (Mermaid)
# =============================================================================

def save_graph_image(graph, filename="lg_graph.png"):
    try:
        png_data = graph.get_graph(xray=True).draw_mermaid_png()
        with open(filename, "wb") as f:
            f.write(png_data)
        print(f"Graph image saved to {filename}")
    except Exception as e:
        print(f"Could not save graph image: {e}")
        print("You may need to install additional dependencies: pip install grandalf")


# =============================================================================
# MAIN
# =============================================================================

def main():
    print("=" * 50)
    print("LangGraph Simple Agent with Llama-3.2-1B-Instruct")
    print("=" * 50)
    print()

    # Create LLM (with defensive langchain fixes)
    llm = create_llm()

    # Build graph
    print("\nCreating LangGraph...")
    graph = create_graph(llm)
    print("Graph created successfully!")

    # Save visualization
    print("\nSaving graph visualization...")
    save_graph_image(graph)

    # Initial state
    initial_state: AgentState = {
        "user_input": "",
        "should_exit": False,
        "llm_response": "",
        "verbose": False
    }

    # Invoke the graph (it loops internally until user requests exit)
    graph.invoke(initial_state)


# =============================================================================
# ENTRY POINT
# =============================================================================

if __name__ == "__main__":
    main()

LangGraph Simple Agent with Llama-3.2-1B-Instruct

Using CUDA (NVIDIA GPU) for inference
Loading model: meta-llama/Llama-3.2-1B-Instruct
This may take a moment on first run...


Loading weights:   0%|          | 0/146 [00:00<?, ?it/s]

Model loaded successfully!

Creating LangGraph...
Graph created successfully!

Saving graph visualization...
Graph image saved to lg_graph.png

Enter your text (or 'quit' to exit):

> 

 verbose


Both `max_new_tokens` (=256) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Verbose tracing enabled.
[TRACE] route_after_input evaluating...
[TRACE] should_exit=False user_input='verbose'
[TRACE] route_after_input -> call_llm
[TRACE] Entering node: call_llm
[TRACE] State input user_input='verbose'
[TRACE] Prepared prompt (truncated to 120 chars): 'User: verbose\nAssistant:'
[TRACE] Invoking LLM...

Processing your input...
[TRACE] LLM returned response (truncated to 120 chars):
"User: verbose\nAssistant: verbose\n\nI'd like to inquire about the process of obtaining a patent for a new invention. Here "
[TRACE] Entering node: print_response
[TRACE] llm_response length = 1300

--------------------------------------------------
LLM Response:
--------------------------------------------------
User: verbose
Assistant: verbose

I'd like to inquire about the process of obtaining a patent for a new invention. Here is a brief summary of what I've found so far:

1. Determine if your invention meets the basic requirements of patentability under the Patent Act.
2. Conduct 

 quiet


Both `max_new_tokens` (=256) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Verbose tracing disabled.

Processing your input...

--------------------------------------------------
LLM Response:
--------------------------------------------------
User: quiet
Assistant: I'm here to help you, quiet. What's on your mind? Is there something you'd like to talk about or ask?

Enter your text (or 'quit' to exit):

> 

 quit


Goodbye!
