In [1]:
from llm_core.schema import to_json_schema
from dataclasses import dataclass
from pydantic import BaseModel

@dataclass
class Book: #(BaseModel):
    title: str
    summary: str
    author: str
    published_year: int

schema = to_json_schema(Book)
schema

{'type': 'object',
 'properties': {'title': {'type': 'string'},
  'summary': {'type': 'string'},
  'author': {'type': 'string'},
  'published_year': {'type': 'integer'}},
 'required': ['title', 'summary', 'author', 'published_year']}

In [2]:
from llm_core.parsers import LLaMACPPParser
model = "mistral-7b-instruct-v0.1.Q5_K_M.gguf"


text = """Foundation is a science fiction novel by American writer
Isaac Asimov. It is the first published in his Foundation Trilogy (later
expanded into the Foundation series). Foundation is a cycle of five
interrelated short stories, first published as a single book by Gnome Press
in 1951. Collectively they tell the early story of the Foundation,
an institute founded by psychohistorian Hari Seldon to preserve the best
of galactic civilization after the collapse of the Galactic Empire.
"""

with LLaMACPPParser(Book, model=model, n_gpu_layers=-1, ctx_size=0) as parser:
    book = parser.parse(text)
    print(book)

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes


Book(title='Foundation', summary='Foundation is a science fiction novel by Isaac Asimov. It is the first published in his Foundation Trilogy and tells the early story of the Foundation, an institute founded by psychohistorian Hari Seldon to preserve the best of galactic civilization after the collapse of the Galactic Empire.', author='Isaac Asimov', published_year=1951)


In [3]:
#!/usr/bin/env python3
import argparse
import json
import re
import sys

# whitespace is constrained to a single space char to prevent model "running away" in
# whitespace. Also maybe improves generation quality?
SPACE_RULE = '" "?'

PRIMITIVE_RULES = {
    'boolean': '("true" | "false") space',
    'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
    'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
    'string': r''' "\"" (
        [^"\\] |
        "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
      )* "\"" space ''',
    'null': '"null" space',
}

INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'}


class SchemaConverter:
    def __init__(self, prop_order):
        self._prop_order = prop_order
        self._rules = {'space': SPACE_RULE}

    def _format_literal(self, literal):
        escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
            lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), json.dumps(literal)
        )
        return f'"{escaped}"'

    def _add_rule(self, name, rule):
        esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
        if esc_name not in self._rules or self._rules[esc_name] == rule:
            key = esc_name
        else:
            i = 0
            while f'{esc_name}{i}' in self._rules:
                i += 1
            key = f'{esc_name}{i}'
        self._rules[key] = rule
        return key

    def visit(self, schema, name):
        schema_type = schema.get('type')
        rule_name = name or 'root'

        if 'oneOf' in schema or 'anyOf' in schema:
            rule = ' | '.join((
                self.visit(alt_schema, f'{name}{"-" if name else ""}{i}')
                for i, alt_schema in enumerate(schema.get('oneOf') or schema['anyOf'])
            ))
            return self._add_rule(rule_name, rule)

        elif 'const' in schema:
            return self._add_rule(rule_name, self._format_literal(schema['const']))

        elif 'enum' in schema:
            rule = ' | '.join((self._format_literal(v) for v in schema['enum']))
            return self._add_rule(rule_name, rule)

        elif schema_type == 'object' and 'properties' in schema:
            # TODO: `required` keyword
            prop_order = self._prop_order
            prop_pairs = sorted(
                schema['properties'].items(),
                # sort by position in prop_order (if specified) then by key
                key=lambda kv: (prop_order.get(kv[0], len(prop_order)), kv[0]),
            )

            rule = '"{" space'
            for i, (prop_name, prop_schema) in enumerate(prop_pairs):
                prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
                if i > 0:
                    rule += ' "," space'
                rule += fr' {self._format_literal(prop_name)} space ":" space {prop_rule_name}'
            rule += ' "}" space'

            return self._add_rule(rule_name, rule)

        elif schema_type == 'array' and 'items' in schema:
            # TODO `prefixItems` keyword
            item_rule_name = self.visit(schema['items'], f'{name}{"-" if name else ""}item')
            rule = f'"[" space ({item_rule_name} ("," space {item_rule_name})*)? "]" space'
            return self._add_rule(rule_name, rule)

        else:
            assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
            return self._add_rule(
                'root' if rule_name == 'root' else schema_type,
                PRIMITIVE_RULES[schema_type]
            )

    def format_grammar(self):
        return '\n'.join((f'{name} ::= {rule}' for name, rule in self._rules.items()))



In [4]:
def generate_grammar(schema):
    converter = SchemaConverter({})
    converter.visit(schema, '')
    return converter.format_grammar()

In [5]:
generate_grammar(schema)

'space ::= " "?\nstring ::=  "\\"" (\n        [^"\\\\] |\n        "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n      )* "\\"" space \ninteger ::= ("-"? ([0-9] | [1-9] [0-9]*)) space\nroot ::= "{" space "\\"author\\"" space ":" space string "," space "\\"published_year\\"" space ":" space integer "," space "\\"summary\\"" space ":" space string "," space "\\"title\\"" space ":" space string "}" space'

In [6]:
import requests
def parse(text, schema):
    url = "http://localhost:8080/completion"
    headers = {"Content-Type": "application/json"}
    grammar = generate_grammar(schema)

    prompt = f"""<s>[INST]
    {text}
    [/INST]
    """

    data = {
        "prompt": prompt,
        "n_predict": 512,
        "temperature": 0.1,
        "grammar": grammar,
    }
    response = requests.post(url, headers=headers, json=data)
    return json.loads(response.json()["content"])

In [7]:
url = "http://localhost:8080/chat/completion"
headers = {"Content-Type": "application/json"}
grammar = generate_grammar(schema)

prompt = f"""<s>[INST]./server -t 4 -ngl 16 -m /media/maitre/HDD1/Models/mistral-7b-instruct-v0.1.Q5_K_M.gguf -c 4096
"""

data = {
    "messages":[{"role": "user", "content": str(prompt)}],
    "n_predict": 512,
    "temperature": 0.1,
    "grammar": grammar,
}
response = requests.post(url, headers=headers, json=data)
print(response)

<Response [404]>


In [8]:
text = """Foundation is a science fiction novel by American writer
Isaac Asimov. It is the first published in his Foundation Trilogy (later
expanded into the Foundation series). Foundation is a cycle of five
interrelated short stories, first published as a single book by Gnome Press
in 1951. Collectively they tell the early story of the Foundation,
an institute founded by psychohistorian Hari Seldon to preserve the best
of galactic civilization after the collapse of the Galactic Empire.
"""

@dataclass
class Book: #(BaseModel):
    title: str
    summary: str
    author: str
    published_year: int

    @classmethod
    def schema(cls):
        return to_json_schema(cls)


data = parse(text, Book.schema())
print(data)

{'author': 'Isaac Asimov', 'published_year': 1951, 'summary': 'Foundation is a science fiction novel by American writer Isaac Asimov. It is the first published in his Foundation Trilogy (later expanded into the Foundation series). Foundation is a cycle of five interrelated short stories, first published as a single book by Gnome Press in 1951. Collectively they tell the early story of the Foundation, an institute founded by psychohistorian Hari Seldon to preserve the best of galactic civilization after the collapse of the Galactic Empire.', 'title': 'Foundation'}


In [9]:
Book(**data)

Book(title='Foundation', summary='Foundation is a science fiction novel by American writer Isaac Asimov. It is the first published in his Foundation Trilogy (later expanded into the Foundation series). Foundation is a cycle of five interrelated short stories, first published as a single book by Gnome Press in 1951. Collectively they tell the early story of the Foundation, an institute founded by psychohistorian Hari Seldon to preserve the best of galactic civilization after the collapse of the Galactic Empire.', author='Isaac Asimov', published_year=1951)

In [12]:
from typing import List

@dataclass
class Publication: #(BaseModel):
    title: str
    main_topic: str
    summary_in_50_words: str
    categories: List[str]

    @classmethod
    def schema(cls):
        return to_json_schema(cls)

text = """
Abstract

Selecting the “right” amount of information to include in a summary is
a difficult task. A good summary should be detailed and entity-centric
without being overly dense and hard to follow.

To better understand this tradeoff, we solicit increasingly dense GPT-4
summaries with what we refer to as a “Chain of Density” (CoD) prompt.

Specifically, GPT-4 generates an initial entity- sparse summary before
iteratively incorporating missing salient entities without increasing the
length. Summaries generated by CoD are more abstractive, exhibit more
fusion, and have less of a lead bias than GPT-4 summaries generated by
a vanilla prompt.

We conduct a human preference study on 100 CNN DailyMail articles and
find that that humans prefer GPT-4 summaries that are more dense than
those generated by a vanilla prompt and almost as dense as human
written summaries.

Qualitative analysis supports the notion that there exists a tradeoff between
informativeness and readability. 500 annotated CoD summaries, as well as
an extra 5,000 unannotated summaries, are freely available on HuggingFace.
"""

data = parse(text, Publication.schema())
print(data)

{'categories': ['Natural Language Processing', 'Summarization'], 'main_topic': 'Summary Generation', 'summary_in_50_words': 'This study investigates the tradeoff between informativeness and readability in summary generation. GPT-4 generates increasingly dense summaries using a Chain of Density (CoD) prompt, which leads to more abstractive, fused summaries with less lead bias. Human preference studies show that CoD summaries are preferred over vanilla prompts and almost as dense as human-written summaries.', 'title': 'Tradeoff between Informativeness and Readability in Summary Generation using GPT-4'}


In [13]:
Publication(**data)

Publication(title='Tradeoff between Informativeness and Readability in Summary Generation using GPT-4', main_topic='Summary Generation', summary_in_50_words='This study investigates the tradeoff between informativeness and readability in summary generation. GPT-4 generates increasingly dense summaries using a Chain of Density (CoD) prompt, which leads to more abstractive, fused summaries with less lead bias. Human preference studies show that CoD summaries are preferred over vanilla prompts and almost as dense as human-written summaries.', categories=['Natural Language Processing', 'Summarization'])

In [None]:
from llama_cpp import Llama
llm = Llama(model_path="/media/maitre/HDD1/Models/mixtral-8x7b-instruct-v0.1.Q3_K_M.gguf", chat_format="chatml")
llm.create_chat_completion(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant that outputs in JSON.",
        },
        {"role": "user", "content": "Who won the world series in 2020"},
    ],
    response_format={
        "type": "json_object",
        "schema": {
            "type": "object",
            "properties": {"team_name": {"type": "string"}},
            "required": ["team_name"],
        },
    },
    temperature=0.7,
)