In [1]:
#@title Install required packages
!uv pip install -U pse # proxy structuring engine
!uv pip install sentencepiece
!uv pip install accelerate
!uv pip install transformers
!uv pip install torch
!uv pip install numpy
!uv pip install bitsandbytes
!uv pip install sentencepiece
!uv pip install protobuf
!uv pip install -U tqdm
!uv pip install ipywidgets

[2mUsing Python 3.12.7 environment at /Users/jckwind/Documents/proxy-structuring-engine/.venv[0m
[2K[2mResolved [1m26 packages[0m [2min 387ms[0m[0m                                        [0m
[2mAudited [1m26 packages[0m [2min 0.10ms[0m[0m
[2mUsing Python 3.12.7 environment at /Users/jckwind/Documents/proxy-structuring-engine/.venv[0m
[2mAudited [1m1 package[0m [2min 1ms[0m[0m
[2mUsing Python 3.12.7 environment at /Users/jckwind/Documents/proxy-structuring-engine/.venv[0m
[2mAudited [1m1 package[0m [2min 2ms[0m[0m
[2mUsing Python 3.12.7 environment at /Users/jckwind/Documents/proxy-structuring-engine/.venv[0m
[2mAudited [1m1 package[0m [2min 2ms[0m[0m
[2mUsing Python 3.12.7 environment at /Users/jckwind/Documents/proxy-structuring-engine/.venv[0m
[2mAudited [1m1 package[0m [2min 1ms[0m[0m
[2mUsing Python 3.12.7 environment at /Users/jckwind/Documents/proxy-structuring-engine/.venv[0m
[2mAudited [1m1 package[0m [2min 1ms[0m[0m
[2mU

In [2]:
# @title Setup Llama 3.2 1B
import torch
from transformers import AutoTokenizer, LlamaForCausalLM

from pse.engine.structuring_engine import StructuringEngine
from pse.util.torch_mixin import PSETorchMixin


class PSE_Torch(PSETorchMixin, LlamaForCausalLM):
    pass


model_path = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = PSE_Torch.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

model.config.pad_token_id = model.config.eos_token_id[0]
if model.generation_config:
    model.generation_config.top_p = None
    model.generation_config.top_k = 8
    model.generation_config.do_sample = True
    model.generation_config.temperature = 0.9
    model.generation_config.pad_token_id = model.config.eos_token_id[0]
    model.generation_config.max_new_tokens = 1000

model.engine = StructuringEngine(tokenizer)

In [3]:

#@title Create engine and test json generation
import json

SIMPLE_JSON_SCHEMA = {
    "type": "object",
    "properties": {"value": {"type": "number"}},
    "required": ["value"],
}
model.engine.configure(SIMPLE_JSON_SCHEMA)
prompt = (
    "Please generate a json object with the value 9.11, with the following schema:\n"
)
prompt += json.dumps(SIMPLE_JSON_SCHEMA, indent=2)

messages = [{"role": "user", "content": prompt}]
input_ids = tokenizer.apply_chat_template(
    messages, return_tensors="pt", add_generation_prompt=True
)
assert isinstance(input_ids, torch.Tensor)
input_ids = input_ids.to(model.device)
assert isinstance(input_ids, torch.Tensor)
output = model.generate(
    input_ids,
    do_sample=True,
)
print("Output:\n" + 100 * "-")
print(tokenizer.decode(output[0]))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Output:
----------------------------------------------------------------------------------------------------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 13 Feb 2025

<|eot_id|><|start_header_id|>user<|end_header_id|>

Please generate a json object with the value 9.11, with the following schema:
{
  "type": "object",
  "properties": {
    "value": {
      "type": "number"
    }
  },
  "required": [
    "value"
  ]
}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{
  "value": 9.11
}


In [4]:
# @title Test advanced-json generation
ADVANCED_JSON_SCHEMA = {
    "type": "object",
    "properties": {
        "name": {"const": "metacognition"},
        "arguments": {
            "type": "object",
            "properties": {
                "chain_of_thoughts": {
                    "type": "array",
                    "description": "A sequence of step by step thoughts and reasoning.\n",
                    "items": {
                        "type": "string",
                        "minLength": 20,
                        "maxLength": 200,
                    },
                    "minItems": 1,  # floor the number of thoughts
                    "maxItems": 3,  # limit the number of thoughts
                },
            },
            "required": ["chain_of_thoughts"],
        },
    },
    "required": ["name", "arguments"],
}
raw_prompt = (
    f"This is a test of your abilities."
    f"Please format your response to follow the following schema:\n{json.dumps(ADVANCED_JSON_SCHEMA, indent=2)}\n"
    f"The assistant should use the metacognition structure to reason and respond."
)
model.engine.configure(ADVANCED_JSON_SCHEMA)
messages = [{"role": "user", "content": raw_prompt}]
input_ids = tokenizer.apply_chat_template(
    messages, return_tensors="pt", add_generation_prompt=True
)
assert isinstance(input_ids, torch.Tensor)
input_ids = input_ids.to(model.device)
assert isinstance(input_ids, torch.Tensor)
greedy_output = model.generate(
    input_ids,
    do_sample=True,
)
print("Output:\n" + 100 * "-")
print(tokenizer.decode(greedy_output[0]))


Output:
----------------------------------------------------------------------------------------------------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 13 Feb 2025

<|eot_id|><|start_header_id|>user<|end_header_id|>

This is a test of your abilities.Please format your response to follow the following schema:
{
  "type": "object",
  "properties": {
    "name": {
      "const": "metacognition"
    },
    "arguments": {
      "type": "object",
      "properties": {
        "chain_of_thoughts": {
          "type": "array",
          "description": "A sequence of step by step thoughts and reasoning.\n",
          "items": {
            "type": "string",
            "minLength": 20,
            "maxLength": 200
          },
          "minItems": 1,
          "maxItems": 3
        }
      },
      "required": [
        "chain_of_thoughts"
      ]
    }
  },
  "required": [
    "name",
    "arguments"
  ]
}
The assistant shoul

In [5]:
# @title Test pydantic generation
from pydantic import BaseModel


class CursorPositionModel(BaseModel):
    """
    An object representing the position and click state of a cursor.

    Attributes:
        x_pos: The horizontal position of the cursor in pixels
        y_pos: The vertical position of the cursor in pixels
        left_click: Whether the left mouse button is currently pressed. Default is False.
    """

    x_pos: int
    y_pos: int
    left_click: bool = False


json_schema: dict = model.engine.configure(
    CursorPositionModel, json_delimiters=("<cursor>", "</cursor>")
)
prompt = (
    "Please use the following schema to generate a cursor position:\n"
    f"{json.dumps(json_schema, indent=2)}.\n"
    "Pretend to move the cursor to x = 100 and y = 100, with the left mouse button clicked.\n"
    "Wrap your response in <cursor>CursorPositionModel</cursor>."
)
messages = [{"role": "user", "content": prompt}]
input_ids = tokenizer.apply_chat_template(
    messages, return_tensors="pt", add_generation_prompt=True
)
assert isinstance(input_ids, torch.Tensor)
input_ids = input_ids.to(model.device)
assert isinstance(input_ids, torch.Tensor)
output = model.generate(
    input_ids,
    do_sample=True,
)
print("Output:\n" + 100 * "-")
print(tokenizer.decode(output[0]))


Output:
----------------------------------------------------------------------------------------------------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 13 Feb 2025

<|eot_id|><|start_header_id|>user<|end_header_id|>

Please use the following schema to generate a cursor position:
{
  "name": "CursorPositionModel",
  "description": "An object representing the position and click state of a cursor.\n\nAttributes:\n    x_pos: The horizontal position of the cursor in pixels\n    y_pos: The vertical position of the cursor in pixels\n    left_click: Whether the left mouse button is currently pressed. Default is False.",
  "properties": {
    "x_pos": {
      "title": "X Pos",
      "type": "integer",
      "description": "The horizontal position of the cursor in pixels"
    },
    "y_pos": {
      "title": "Y Pos",
      "type": "integer",
      "description": "The vertical position of the cursor in pixels"
    },
    "left_cl