# JSON parser - Tutorial

## Creating the Parser

In [29]:
from lark import Lark
json_parser = Lark(r"""
    value: dict
         | list
         | ESCAPED_STRING
         | SIGNED_NUMBER
         | "true" | "false" | "null"

    list : "[" [value ("," value)*] "]"

    dict : "{" [pair ("," pair)*] "}"
    pair : ESCAPED_STRING ":" value

    %import common.ESCAPED_STRING
    %import common.SIGNED_NUMBER
    %import common.WS
    %ignore WS

    """, start='value')

In [30]:
text = '{"key": ["item0", "item1", 3.14]}'
json_parser.parse(text)

Tree(Token('RULE', 'value'), [Tree(Token('RULE', 'dict'), [Tree(Token('RULE', 'pair'), [Token('ESCAPED_STRING', '"key"'), Tree(Token('RULE', 'value'), [Tree(Token('RULE', 'list'), [Tree(Token('RULE', 'value'), [Token('ESCAPED_STRING', '"item0"')]), Tree(Token('RULE', 'value'), [Token('ESCAPED_STRING', '"item1"')]), Tree(Token('RULE', 'value'), [Token('SIGNED_NUMBER', '3.14')])])])])])])

In [31]:
print(_.pretty())

value
  dict
    pair
      "key"
      value
        list
          value	"item0"
          value	"item1"
          value	3.14



## Shaping the Tree

In [32]:
from lark import Lark
json_parser = Lark(r"""
    ?value: dict
          | list
          | string
          | SIGNED_NUMBER      -> number
          | "true"             -> true
          | "false"            -> false
          | "null"             -> null

    list : "[" [value ("," value)*] "]"

    dict : "{" [pair ("," pair)*] "}"
    pair : string ":" value

    string : ESCAPED_STRING

    %import common.ESCAPED_STRING
    %import common.SIGNED_NUMBER
    %import common.WS
    %ignore WS

    """, start='value')

In [33]:
text = '{"key": ["item0", "item1", 3.14, true]}'
print( json_parser.parse(text).pretty() )

dict
  pair
    string	"key"
    list
      string	"item0"
      string	"item1"
      number	3.14
      true



## Evaluating the tree

In [34]:
from lark import Transformer

class MyTransformer(Transformer):
    def list(self, items):
        return list(items)
    def pair(self, key_value):
        k, v = key_value
        return k, v
    def dict(self, items):
        return dict(items)

In [35]:
tree = json_parser.parse(text)
MyTransformer().transform(tree)

{Tree(Token('RULE', 'string'), [Token('ESCAPED_STRING', '"key"')]): [Tree(Token('RULE', 'string'), [Token('ESCAPED_STRING', '"item0"')]),
  Tree(Token('RULE', 'string'), [Token('ESCAPED_STRING', '"item1"')]),
  Tree('number', [Token('SIGNED_NUMBER', '3.14')]),
  Tree('true', [])]}

In [38]:
from lark import Transformer

class TreeToJson(Transformer):
    def string(self, s):
        (s,) = s
        return s[1:-1]
    def number(self, n):
        # print(n)
        (n,) = n
        # print(n)
        return float(n)

    list = list
    pair = tuple
    dict = dict

    null = lambda self, _: None
    true = lambda self, _: True
    false = lambda self, _: False

In [39]:
tree = json_parser.parse(text)
TreeToJson().transform(tree)

[Token('SIGNED_NUMBER', '3.14')]
3.14


{'key': ['item0', 'item1', 3.14, True]}

# Basic calculator

In [19]:
from lark import Lark, Transformer, v_args

calc_grammar = """
    ?start: sum
          | NAME "=" sum    -> assign_var

    ?sum: product
        | sum "+" product   -> add
        | sum "-" product   -> sub

    ?product: atom
        | product "*" atom  -> mul
        | product "/" atom  -> div

    ?atom: NUMBER           -> number
         | "-" atom         -> neg
         | NAME             -> var
         | "(" sum ")"

    %import common.CNAME -> NAME
    %import common.NUMBER
    %import common.WS_INLINE

    %ignore WS_INLINE
"""


@v_args(inline=True)    # Affects the signatures of the methods
class CalculateTree(Transformer):
    from operator import add, sub, mul, truediv as div, neg
    number = float

    def __init__(self):
        self.vars = {}

    def assign_var(self, name, value):
        self.vars[name] = value
        return value

    def var(self, name):
        try:
            return self.vars[name]
        except KeyError:
            raise Exception("Variable not found: %s" % name)


calc_parser = Lark(calc_grammar, parser='lalr', transformer=CalculateTree())
calc = calc_parser.parse


def main():
    while True:
        try:
            s = input('> ')
        except EOFError:
            break
        print(calc(s))


def test():
    print(calc("a = 1+2"))
    print(calc("1+a*-3"))


# if __name__ == '__main__':
#     # test()
#     main()

In [20]:
test()

3.0
-8.0


# Creating an AST from the parse tree

In [23]:
import sys
from typing import List
from dataclasses import dataclass

from lark import Lark, ast_utils, Transformer, v_args
from lark.tree import Meta

this_module = sys.modules[__name__]


#
#   Define AST
#
class _Ast(ast_utils.Ast):
    # This will be skipped by create_transformer(), because it starts with an underscore
    pass

class _Statement(_Ast):
    # This will be skipped by create_transformer(), because it starts with an underscore
    pass

@dataclass
class Value(_Ast, ast_utils.WithMeta):
    "Uses WithMeta to include line-number metadata in the meta attribute"
    meta: Meta
    value: object

@dataclass
class Name(_Ast):
    name: str

@dataclass
class CodeBlock(_Ast, ast_utils.AsList):
    # Corresponds to code_block in the grammar
    statements: List[_Statement]

@dataclass
class If(_Statement):
    cond: Value
    then: CodeBlock

@dataclass
class SetVar(_Statement):
    # Corresponds to set_var in the grammar
    name: str
    value: Value

@dataclass
class Print(_Statement):
    value: Value


class ToAst(Transformer):
    # Define extra transformation functions, for rules that don't correspond to an AST class.

    def STRING(self, s):
        # Remove quotation marks
        return s[1:-1]

    def DEC_NUMBER(self, n):
        return int(n)

    @v_args(inline=True)
    def start(self, x):
        return x

#
#   Define Parser
#

parser = Lark("""
    start: code_block

    code_block: statement+

    ?statement: if | set_var | print

    if: "if" value "{" code_block "}"
    set_var: NAME "=" value ";"
    print: "print" value ";"

    value: name | STRING | DEC_NUMBER
    name: NAME

    %import python (NAME, STRING, DEC_NUMBER)
    %import common.WS
    %ignore WS
    """,
    parser="lalr",
)

transformer = ast_utils.create_transformer(this_module, ToAst())

def parse(text):
    tree = parser.parse(text)
    return transformer.transform(tree)

#
#   Test
#

# if __name__ == '__main__':
#     print(parse("""
#         a = 1;
#         if a {
#             print "a is 1";
#             a = 2;
#         }
#     """))

In [25]:
text = """
    a = 1;
    if a {
        print "a is 1";
        a = 2;
    }
"""
parse(text)

CodeBlock(statements=[SetVar(name=Token('NAME', 'a'), value=Value(meta=<lark.tree.Meta object at 0x0000024D3A3141D0>, value=1)), If(cond=Value(meta=<lark.tree.Meta object at 0x0000024D3A314890>, value=Name(name=Token('NAME', 'a'))), then=CodeBlock(statements=[Print(value=Value(meta=<lark.tree.Meta object at 0x0000024D3A314920>, value='a is 1')), SetVar(name=Token('NAME', 'a'), value=Value(meta=<lark.tree.Meta object at 0x0000024D3A315F40>, value=2))]))])