In [1]:
import torch

from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

checkpoint = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
    pad_token_id=tokenizer.eos_token_id)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
from jsonformer.format import highlight_values
from jsonformer.main import Jsonformer

ecomm = {
    "type": "object",
    "properties": {
        "store": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "location": {"type": "string"},
                "inventory": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "productId": {"type": "string"},
                            "name": {"type": "string"},
                            "description": {"type": "string"},
                            "category": {"type": "string"},
                            "price": {"type": "number"},
                            "inStock": {"type": "boolean"},
                            "rating": {"type": "number"},
                            "images": {"type": "array", "items": {"type": "string"}},
                        },
                    },
                },
            },
        }
    },
}


builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=ecomm,
    prompt="write a description about mike's ski shop which sells premium skis and snowboards",
    max_string_token_length=20,
    debug=True
)

print("Generating...")
output = builder()

highlight_values(output)

Generating...
[32m[generate_object] generating value for[0m [34mstore[0m
[32m[generate_object] generating value for[0m [34mname[0m
[32m[generate_string][0m [33mwrite a description about mike's ski shop which sells premium skis and snowboards
Output result in the following JSON schema format:
{"type": "object", "properties": {"store": {"type": "object", "properties": {"name": {"type": "string"}, "location": {"type": "string"}, "inventory": {"type": "array", "items": {"type": "object", "properties": {"productId": {"type": "string"}, "name": {"type": "string"}, "description": {"type": "string"}, "category": {"type": "string"}, "price": {"type": "number"}, "inStock": {"type": "boolean"}, "rating": {"type": "number"}, "images": {"type": "array", "items": {"type": "string"}}}}}}}}}
Result: {"store": {"name": "[0m
[32m[generate_string][0m [34m|Mike's Ski Shop",|[0m
[32m[generate_object] generating value for[0m [34mlocation[0m
[32m[generate_string][0m [33mwrite a descrip

In [3]:
car = {
    "type": "object",
    "properties": {
        "make": {"type": "string"},
        "model": {"type": "string"},
        "year": {"type": "number"},
        "colors_available": {
            "type": "array",
            "items": {"type": "string"},
        },
    },
}

builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=car,
    prompt="generate an example car",
)

print("Generating...")
output = builder()

highlight_values(output)


Generating...
{
  make: [32m"Ford"[0m,
  model: [32m"F-150"[0m,
  year: [32m2017.0[0m,
  colors_available: [
    [32m"Blue"[0m,
    [32m"White"[0m,
    [32m"Red"[0m,
    [32m"Black"[0m
  ]
}


In [4]:
complex_car = {"type": "object", "properties": {"car": {"type": "object", "properties": {"make": {"type": "string"}, "model": {"type": "string"}, "year": {"type": "number"}, "colors": {"type": "array", "items": {"type": "string"}}, "features": {"type": "object", "properties": {"audio": {"type": "object", "properties": {"brand": {"type": "string"}, "speakers": {"type": "number"}, "hasBluetooth": {"type": "boolean"}}}, "safety": {"type": "object", "properties": {"airbags": {"type": "number"}, "parkingSensors": {"type": "boolean"}, "laneAssist": {"type": "boolean"}}}, "performance": {"type": "object", "properties": {"engine": {"type": "string"}, "horsepower": {"type": "number"}, "topSpeed": {"type": "number"}}}}}}}, "owner": {"type": "object", "properties": {"firstName": {"type": "string"}, "lastName": {"type": "string"}, "age": {"type": "number"}}}}}
builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=complex_car,
    prompt="generate an example Rolls Royce Phantom",
)

print("Generating...")
output = builder()

highlight_values(output)


Generating...
{
  car: {
    make: [32m"Rolls Royce"[0m,
    model: [32m"Phantom"[0m,
    year: [32m2015.0[0m,
    colors: [
      [32m"Silver"[0m,
      [32m"Black"[0m,
      [32m"Red"[0m,
      [32m"Blue"[0m,
      [32m"Grey"[0m,
      [32m"White"[0m,
      [32m"Green"[0m,
      [32m"Yellow"[0m,
      [32m"Brown"[0m,
      [32m"Purple"[0m
    ],
    features: {
      audio: {
        brand: [32m"Bose"[0m,
        speakers: [32m12.0[0m,
        hasBluetooth: [32mTrue[0m
      },
      safety: {
        airbags: [32m6.0[0m,
        parkingSensors: [32mFalse[0m,
        laneAssist: [32mFalse[0m
      },
      performance: {
        engine: [32m"V12"[0m,
        horsepower: [32m563.0[0m,
        topSpeed: [32m155.0[0m
      }
    }
  },
  owner: {
    firstName: [32m"John"[0m,
    lastName: [32m"Doe"[0m,
    age: [32m32.0[0m
  }
}
