Skip to content

Commit

Permalink
A real parser for s-expressions
Browse files Browse the repository at this point in the history
With a tokenizer, lookahead, and recursive descent, alright.  Also,
supports strings, which I think can be pretty useful, and is
considerably more flexible as to what is or is not a 'word'.
  • Loading branch information
bdw committed Jul 11, 2017
1 parent b81cd68 commit 6854e97
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 167 deletions.
2 changes: 1 addition & 1 deletion docs/jit/todo.org
Expand Up @@ -133,7 +133,7 @@ it should crash at compile time.
Challenge is to specify the information in a way that the expr
template compiler (perl) and the expr tree processing code can use.

* Fix S-EXPR parser for tile list
* DONE Fix S-EXPR parser for tile list

I think it currently counts balancing parentheses, and it doesn't
always work when the last line doesn't end with a line. And, it
Expand Down
175 changes: 53 additions & 122 deletions tools/expr-template-compiler.pl
@@ -1,6 +1,7 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;

use Getopt::Long;

Expand All @@ -26,19 +27,17 @@

my $OPLIST = 'src/core/oplist'; # We need this for generating the lookup table
my $PREFIX = 'MVM_JIT_'; # Prefix of all nodes
my %MACROS; # hash table of defined macros
my ($INPUT, $OUTPUT);
my $TESTING;
GetOptions(
'test' => \$TESTING,
'prefix=s' => \$PREFIX,
'input=s' => sub { open $INPUT, '<', $_[1] or die "Could not open $_[1]"; },
'output=s' => sub { open $OUTPUT, '>', $_[1] or die "Could not open $_[1]"; }
);
$OUTPUT = \*STDOUT unless defined $OUTPUT;
# any file-like arguments left, that's our input
if (@ARGV && -f $ARGV[0]) {
open $INPUT, '<', $ARGV[0] or die "Could not open $ARGV[0]";
open $INPUT, '<', $ARGV[0] or die "Could not open $ARGV[0]: $!";
}
$INPUT = \*STDIN unless defined $INPUT;

Expand Down Expand Up @@ -178,130 +177,62 @@ sub write_template {
}


if ($TESTING) {
eval {
# Neither Test, nor Test::More are available everywhere.
# Thanks, Fedora!
require Test::More;
Test::More->import;
1;
} or do {
die "Cannot load Test::More and run tests: $@";
};

$PREFIX = 'MJ_';
my $parser = sexpr->parser();
sub parse_sexp {
return $parser->parse(shift);
}
sub check_parse {
my ($exp, $expected, $msg) = @_;
my ($parsed, $rest) = parse_sexp($exp);
is_deeply($parsed, $expected, $msg);
}
check_parse('', undef, 'empty string should parse to undef');
check_parse('()', [], 'empty parens should be empty list');
check_parse('(foo)', ['foo'], 'single item list');
check_parse('(foo bar)', [qw<foo bar>]);
check_parse('(foo (bar))', ['foo', ['bar']]);
check_parse('((foo) (bar))', [['foo'], ['bar']]);
check_parse('(0)', ['0']);

eval { compile_template(parse_sexp('()')) }; ok ($@, 'Cannot compile empty template');
eval { compile_template(parse_sexp('(foo bar)')) };
ok (!$@, 'a simple expression should work');
eval { compile_template(parse_sexp('(&offsetof foo bar)')) };
ok ($@, 'Template root must be simple expr');
eval { compile_template(parse_sexp('(foo (&sizeof 1))')) }; ok (!$@, 'use sizeof as a macro');
eval { compile_template(parse_sexp('(let: (($foo (bar)) ($quix (quam $1))) (bar $foo $quix))')) };
ok (!$@, 'let expressions should live and take more than one argument');
eval { compile_template(parse_sexp('(foo $bar)')) };
ok ($@, 'Cannot compile with undefined variables');
eval { compile_template(parse_sexp('($1 bar)')) }; ok ($@, 'First argument should be bareword');
eval { compile_template(parse_sexp('(let: (($foo (bar))) (let: (($quix (quam (bar $foo)))) (a $foo $quix)))')); };
ok (!$@, 'Nested lets are ok');
eval { compile_template(parse_sexp('(let: (($foo (bar))) (let: (($foo (bar))) (quix $foo)))')); };
ok ($@, 'Redeclarations are not');
my $simple = compile_template(parse_sexp('(foo bar)'));
is ($simple->{root}, 0, 'root of a very simple expression should be 0');
is ($simple->{desc}, '..', 'simple expression without filling or linking');
my $let = compile_template(parse_sexp('(let: (($foo (baz))) (quix $foo))'));
is ($let->{root}, 1, 'baz expression requires only one node');
is ($let->{desc}, 'r.l');
is_deeply($let->{template}, [qw<MJ_BAZ MJ_QUIX 0>]);
my $par = compile_template(parse_sexp('(foo bar $1)'));
is($par->{desc}, '..f');
is_deeply($par->{template}, [qw<MJ_FOO MJ_BAR 1>]);
my $subex = compile_template(parse_sexp('(foo bar (baz $1))'));
is ($subex->{root}, 2);
is ($subex->{desc}, '.f..l', 'Fill subexpression, link to parent');
is_deeply($subex->{template}, [qw<MJ_BAZ 1 MJ_FOO MJ_BAR 0>]);
my $complex_sexp = '(let: (($foo (bar $1))) (foo zum $2 (zaf $foo 3) (&sizeof int)))';
my ($complex_expr, $rest) = parse_sexp($complex_sexp);
my $complex = compile_template($complex_expr);
is ($complex->{root}, 5);
is ($complex->{desc}, 'rf.l...fl.');
is_deeply($complex->{template}, [qw(MJ_BAR 1 MJ_ZAF 0 3 MJ_FOO MJ_ZUM 2 2 sizeof(int))]);

eval { my ($macro, $rest) = parse_sexp('((,a) (quix quam ,a))', 1);
$parser->decl_macro('^foo', $macro); };
ok (!$@, 'macro parsing lives');
my ($macrod, $restmacro) = parse_sexp('(oh (^foo $1) hai)');
is_deeply($macrod, ['oh', ['quix', 'quam', '$1'], 'hai'], 'macro is spliced in correctly');

done_testing();
} else {
# first read the correct order of opcodes
my (@opcodes, %names);
open my $oplist, '<', $OPLIST or die "Could not open $OPLIST";
while (<$oplist>) {
next unless (m/^\w+/);
my $opcode = substr $_, 0, $+[0];
$names{$opcode} = scalar(@opcodes);
push @opcodes, $opcode;
}
close $oplist;
my (@opcodes, %names);
open my $oplist, '<', $OPLIST or die "Could not open $OPLIST";
while (<$oplist>) {
next unless (m/^\w+/);
my $opcode = substr $_, 0, $+[0];
push @opcodes, $opcode;
$names{$opcode} = $#opcodes;

# read input, which should use the expresison-list
# syntax. generate template info table and template array
my %info;
my @templates;
my $parser = sexpr->parser($INPUT);
}
close $oplist;

while (my $tree = $parser->read) {
my $keyword = shift @$tree;
if ($keyword eq 'macro:') {
my $name = shift @$tree;
$parser->decl_macro($name, $tree);
} elsif ($keyword eq 'template:') {
my $opcode = shift @$tree;
my $template = shift @$tree;
my $flags = 0;
if (substr($opcode, -1) eq '!') {
# destructive template
$opcode = substr $opcode, 0, -1;
$flags |= 1;
}
die "Opcode '$opcode' unknown" unless defined $names{$opcode};
die "Opcode '$opcode' redefined" if defined $info{$opcode};
# Validate template for consistency with expr.h node definitions
validate_template($template);
my $compiled = compile_template($template);
my $idx = scalar(@templates); # template index into array is current array top
$info{$opcode} = { idx => $idx, info => $compiled->{desc},
root => $compiled->{root},
len => length($compiled->{desc}),
flags => $flags };
# read input, which should use the expresison-list
# syntax. generate template info table and template array
my %info;
my @templates;
my $parser = sexpr->parser($INPUT);

push @templates, @{$compiled->{template}};
} else {
die "I don't know what to do with '$keyword' ";

while (my $tree = $parser->parse) {
my $keyword = shift @$tree;
if ($keyword eq 'macro:') {
my $name = shift @$tree;
$parser->decl_macro($name, $tree);
} elsif ($keyword eq 'template:') {
my $opcode = shift @$tree;
my $template = shift @$tree;
my $flags = 0;
if (substr($opcode, -1) eq '!') {
# destructive template
$opcode = substr $opcode, 0, -1;
$flags |= 1;
}
}
close $INPUT;
die "Opcode '$opcode' unknown" unless defined $names{$opcode};
die "Opcode '$opcode' redefined" if defined $info{$opcode};
# Validate template for consistency with expr.h node definitions
validate_template($template);
my $compiled = compile_template($template);
my $idx = scalar(@templates); # template index into array is current array top
$info{$opcode} = {
idx => $idx,
info => $compiled->{desc},
root => $compiled->{root},
len => length($compiled->{desc}),
flags => $flags
};

push @templates, @{$compiled->{template}};
} else {
die "I don't know what to do with '$keyword' ";
}
}
close $INPUT;

# write a c output header file.
print $OUTPUT <<"HEADER";
# write a c output header file.
print $OUTPUT <<"HEADER";
/* FILE AUTOGENERATED BY $0. DO NOT EDIT.
* Defines tables for expression templates. */
HEADER
Expand Down Expand Up @@ -334,4 +265,4 @@ sub write_template {
return &MVM_jit_expr_template_info[opcode];
}
FOOTER
}

125 changes: 82 additions & 43 deletions tools/sexpr.pm
@@ -1,71 +1,110 @@
package sexpr;
use strict;
use warnings;

# declare keyword syntax regex
my $keyword = qr/^[&\$^,]?[\w\.\[\]_:\*]+[!]?/;
my $tokenize = qr/
\A
(?<open>\() |
(?<close>\)) |
(?<space>\s+) |
(?<comment>\#.+) |
(?<string>\".*?") |
(?<word>[^\s\(\)\#"']+)
/x;

sub parser {
my ($class, $input) = @_;
return bless {
input => $input,
buffer => '',
token => undef,
match => undef,
macros => {},
}, $class;
}

sub read {
sub empty {
my $self = shift;
my $file = $self->{input};
my $expr = $self->{buffer};
my ($open, $close) = (0, 0);
while (!eof($file)) {
my $line = <$file>;
next if $line =~ m/^#|^\s*$/;
$expr .= $line;
$open = $expr =~ tr/(//;
$close = $expr =~ tr/)//;
last if ($open > 0) && ($open == $close);
length($self->{buffer}) == 0 and eof($self->{input});
}

sub current {
my $self = shift;
unless (length($self->{buffer}) or eof($self->{input})) {
$self->{buffer} = readline($self->{input});
}
$self->{buffer};
}


sub token {
my $self = shift;
my $line = $self->current;
# cache token
return @$self{'token','match'} if $self->{token};
return unless length($line);
return unless $line =~ $tokenize;
@$self{'token','match'} = %+;
}

sub _shift {
my ($self) = @_;
my $length = length($self->{match});
@$self{'token','match'} = (undef,undef);
substr($self->{buffer}, 0, $length, '');
}

sub expect {
my ($self, $expect) = @_;
my ($token, $match) = $self->token;
die "Got $token but expected $expect" unless $expect eq $token;
$self->_shift;
}

sub peek {
my ($self, $expect) = @_;
my ($token, $match) = $self->token or return;
return $match if $token eq $expect;
}

sub skip {
my ($self, @possible) = @_;
my %check = map { $_ => 1 } @possible;
while (my ($token, $match) = $self->token) {
last unless $check{$token};
$self->_shift;
}
die "End of input with unclosed template" if $open > $close;
my ($tree, $rest) = $self->parse($expr);
$self->{buffer} = $rest;
return $tree;
}

sub parse {
my ($self, $expr) = @_;
my $tree = [];
# consume initial opening parenthesis
return (undef, $expr) unless $expr =~ m/^\s*\(/;
$expr = substr($expr, $+[0]);
while ($expr) {
# remove initial space
$expr =~ s/^\s*//;
if (substr($expr, 0, 1) eq '(') {
# descend on opening parenthesis
my ($child, $rest) = $self->parse($expr);
$expr = $rest;
push @$tree, $child;
} elsif (substr($expr, 0, 1) eq ')') {
# ascend on closing parenthesis
$expr = substr $expr, 1;
last;
} elsif ($expr =~ m/$keyword/) {
# consume keyword
push @$tree, substr($expr, $-[0], $+[0] - $-[0]);
$expr = substr $expr, $+[0];
my $self = shift;
$self->skip('comment', 'space');
return if $self->empty;
$self->expect('open');
my @expr;
until ($self->peek('close')) {
die "Could not continue reading" if $self->empty;
my ($token, $what) = $self->token or
die "Could not read a token";
if ($token eq 'word' or $token eq 'string') {
push @expr, $self->_shift;
} elsif ($token eq 'open') {
push @expr, $self->parse;
} else {
die "Could not parse $expr";
$self->_shift;
}
}
if (@$tree && substr($tree->[0], 0, 1) eq '^') {
if (defined $self->{macros}->{$tree->[0]}) {
$tree = apply_macro($self->{macros}->{$tree->[0]}, $tree);
$self->_shift;
if (@expr and $expr[0] =~ m/\A\^/) {
my $macro = $self->{macros}{$expr[0]};
if (defined $macro) {
@expr = @{apply_macro($macro, \@expr)};
} else {
die "Attempted to invoke undefined macro $tree->[0]";
die "Attempt to invoke undefined macro by name: $expr[0]";
}
}
return ($tree, $expr);
return \@expr;
}

sub decl_macro {
Expand Down
3 changes: 2 additions & 1 deletion tools/tiler-table-generator.pl
Expand Up @@ -362,7 +362,8 @@ sub compute_costs {

# Collect rules from the grammar
my $parser = sexpr->parser($input);
while (my $tree = $parser->read) {

while (my $tree = $parser->parse) {
my $keyword = shift @$tree;
if ($keyword eq 'tile:') {
# (tile: name pattern symbol cost)
Expand Down

0 comments on commit 6854e97

Please sign in to comment.