Skip to content

Commit

Permalink
Implement protoregexes
Browse files Browse the repository at this point in the history
  • Loading branch information
sorear committed Aug 14, 2010
1 parent 986e36b commit 3ba22dc
Show file tree
Hide file tree
Showing 10 changed files with 241 additions and 24 deletions.
2 changes: 2 additions & 0 deletions CodeGen.pm
Expand Up @@ -20,6 +20,7 @@ use 5.010;
DynMetaObject =>
{ BuildC3MRO => [m => 'Void'],
HasMRO => [m => 'Boolean'],
AddMultiRegex=> [m => 'Void'],
typeObject => [f => 'IP6'],
how => [f => 'IP6'],
local => [f => 'Dictionary<string,IP6>'],
Expand Down Expand Up @@ -68,6 +69,7 @@ use 5.010;
'System.IO.File.ReadAllText' => [m => 'System.String'],
'Lexer.RunProtoregex' => [m => 'IP6[]'],
'Kernel.CoTake' => [c => 'Variable'],
'Kernel.Take' => [c => 'Variable'],
'Kernel.GatherHelper' => [c => 'Frame'],
Expand Down
64 changes: 58 additions & 6 deletions Cursor.cs
Expand Up @@ -184,10 +184,7 @@ public sealed class Edge {
LAD sub = null;
if (method_cache.TryGetValue(name, out sub))
return sub;
IP6 method = null;
foreach (DynMetaObject dmo in cursor_class.mro)
if (dmo.local.TryGetValue(name, out method))
break;
IP6 method = cursor_class.Can(name);

if (Lexer.LtmTrace && method != null)
Console.WriteLine("+ Found method");
Expand Down Expand Up @@ -403,8 +400,6 @@ public class LADNull : LAD {
public class LADMethod : LAD {
public readonly string name;

//XXX need to handle literal prefix *soon*

public LADMethod(string name) { this.name = name; }

public override void ToNFA(NFA pad, int from, int to) {
Expand Down Expand Up @@ -452,6 +447,22 @@ public class LADMethod : LAD {
Console.WriteLine(new string(' ', indent) + "methodcall " + name);
}
}

public class LADProtoRegex : LAD {
public readonly string name;

public LADProtoRegex(string name) { this.name = name; }

public override void ToNFA(NFA pad, int from, int to) {
foreach (DynObject cand in Lexer.ResolveProtoregex(pad.cursor_class, name)) {
((LAD)cand.slots["ltm-prefix"]).ToNFA(pad, from, to);
}
}

public override void Dump(int indent) {
Console.WriteLine(new string(' ', indent) + "protorx " + name);
}
}
// These objects get put in hash tables, so don't change nstates[] after
// that happens
public class LexerState {
Expand Down Expand Up @@ -617,6 +628,47 @@ public class Lexer {
return uniqfates.ToArray();
}

public static IP6[] RunProtoregex(IP6 cursor, string name) {
DynObject dc = (DynObject)cursor;
DynObject[] candidates = ResolveProtoregex(dc.klass, name);
LAD[] branches = new LAD[candidates.Length];
for (int i = 0; i < candidates.Length; i++)
branches[i] = (LAD) candidates[i].slots["ltm-prefix"];
Lexer l = new Lexer(dc, name, branches);
Cursor c = (Cursor)Kernel.UnboxAny(cursor);
int[] brnum = l.Run(c.backing, c.pos);
IP6[] ret = new IP6[brnum.Length];
for (int i = 0; i < brnum.Length; i++)
ret[i] = candidates[brnum[i]];
return ret;
}

public static DynObject[] ResolveProtoregex(DynMetaObject cursor_class,
string name) {
IP6 proto = cursor_class.Can(name);

List<DynObject> raword = new List<DynObject>();

foreach (DynMetaObject k in cursor_class.mro) {
if (proto != k.Can(name))
continue;
if (k.multiregex == null)
continue;
List<DynObject> locord;
if (k.multiregex.TryGetValue(name, out locord))
foreach (DynObject o in locord)
raword.Add(o);
}

HashSet<IP6> unshadowed = cursor_class.AllMethodsSet();
List<DynObject> useord = new List<DynObject>();
foreach (DynObject o in raword)
if (unshadowed.Contains(o))
useord.Add(o);

return useord.ToArray();
}

public static void SelfTest() {
Lexer l = new Lexer(null, "[for|forall]", new LAD[] {
new LADStr("for"),
Expand Down
19 changes: 19 additions & 0 deletions Decl.pm
Expand Up @@ -408,6 +408,25 @@ use CgOp;
no Moose;
}

{
package Decl::HasMultiRx;
use Moose;
extends 'Decl';

has name => (is => 'ro', isa => 'Str', required => 1);
has var => (is => 'ro', isa => 'Str', required => 1);

sub preinit_code {
my ($self, $body) = @_;
CgOp::sink(
CgOp::methodcall(CgOp::letvar("how"), "add-multiregex",
CgOp::string_var($self->name), CgOp::scopedlex($self->var)));
}

__PACKAGE__->meta->make_immutable;
no Moose;
}

{
package Decl::Super;
use Moose;
Expand Down
37 changes: 37 additions & 0 deletions Kernel.cs
Expand Up @@ -201,6 +201,8 @@ public List<DynMetaObject> superclasses
public Dictionary<string, IP6> local_attr
= new Dictionary<string, IP6>();

public Dictionary<string, List<DynObject>> multiregex;

public List<DynMetaObject> mro;
public HashSet<DynMetaObject> isa;

Expand All @@ -213,6 +215,41 @@ public List<DynMetaObject> superclasses
isa.Add(this);
}

public void AddMultiRegex(string name, IP6 m) {
if (multiregex == null)
multiregex = new Dictionary<string, List<DynObject>>();
List<DynObject> dl;
if (! multiregex.TryGetValue(name, out dl)) {
dl = new List<DynObject>();
multiregex[name] = dl;
}
dl.Add((DynObject)m);
}

public IP6 Can(string name) {
IP6 m;
foreach (DynMetaObject k in mro)
if (k.local.TryGetValue(name, out m))
return m;
return null;
}

public Dictionary<string,IP6> AllMethods() {
Dictionary<string,IP6> r = new Dictionary<string,IP6>();
foreach (DynMetaObject k in mro)
foreach (KeyValuePair<string,IP6> kv in k.local)
if (!r.ContainsKey(kv.Key))
r[kv.Key] = kv.Value;
return r;
}

public HashSet<IP6> AllMethodsSet() {
HashSet<IP6> r = new HashSet<IP6>();
foreach (KeyValuePair<string,IP6> kv in AllMethods())
r.Add(kv.Value);
return r;
}

public bool HasMRO(DynMetaObject m) {
return isa.Contains(m);
}
Expand Down
59 changes: 53 additions & 6 deletions Niecza/Actions.pm
Expand Up @@ -281,6 +281,31 @@ sub regex_def { my ($cl, $M) = @_;
return;
}

my $isproto;
local $::symtext =
!defined($name) ? undef :
($name =~ /:sym<(.*)>/) ? $1 :
($name =~ /:(\w+)/) ? $1 :
undef; #XXX
my $unsymtext =
!defined($name) ? undef :
($name =~ /(.*):sym<.*>/) ? $1 :
($name =~ /(.*):\w+/) ? $1 :
undef;
if ($::MULTINESS eq 'proto') {
if ($M->{signature}[0] || !$M->{regex_block}{onlystar} || $scope ne 'has') {
$M->sorry("Only simple {*} protoregexes with no parameters are supported");
return;
}
$isproto = 1;
} else {
my $m2 = defined($::symtext) ? 'multi' : 'only';
if ($::MULTINESS && $::MULTINESS ne $m2) {
$M->sorry("Inferred multiness disagrees with explicit");
return;
}
}

if ($path && $scope ne 'our') {
$M->sorry("Putting a regex in a package requires using the our scope.");
return;
Expand All @@ -303,15 +328,18 @@ sub regex_def { my ($cl, $M) = @_;
: '&' . $name;

local $::parenid = 0;
local $::symtext =
($name =~ /:sym<(.*)>/) ? $1 :
($name =~ /:(\w+)/) ? $1 :
undef; #XXX
my ($cn, $op) = $M->{regex_block}{_ast}->term_rx;

my $ast = $M->{regex_block}{_ast};
if ($isproto) {
$ast = RxOp::ProtoRedis->new(name => $name);
}

my ($cn, $op) = $ast->term_rx;
$M->{_ast} = Op::SubDef->new(
var => $var, class => 'Regex',
method_too => ($scope eq 'has' ? $name : undef),
ltm => $M->{regex_block}{_ast}->lad,
proto_too => ($scope eq 'has' ? $unsymtext : undef),
ltm => $ast->lad,
body => Body->new(
type => 'regex',
signature => $sig->for_regex($cn),
Expand Down Expand Up @@ -1451,7 +1479,17 @@ sub scope_declarator__S_has {}
sub scope_declarator__S_state {}
sub scope_declarator__S_anon {}

sub multi_declarator { my ($cl, $M) = @_;
$M->{_ast} = ($M->{declarator} // $M->{routine_def})->{_ast};
}
sub multi_declarator__S_multi {}
sub multi_declarator__S_proto {}
sub multi_declarator__S_only {}

sub variable_declarator { my ($cl, $M) = @_;
if ($::MULTINESS) {
$M->sorry("Multi variables NYI");
}
if ($M->{trait}[0] || $M->{post_constraint}[0] || $M->{shape}[0]) {
$M->sorry("Traits, postconstraints, and shapes on variable declarators NYI");
return;
Expand Down Expand Up @@ -1508,6 +1546,9 @@ sub variable_declarator { my ($cl, $M) = @_;

sub type_declarator {}
sub type_declarator__S_constant { my ($cl, $M) = @_;
if ($::MULTINESS) {
$M->sorry("Multi variables NYI");
}
my $scope = $::SCOPE // 'my';
if (!$M->{identifier} && !$M->{variable}) {
$M->sorry("Anonymous constants NYI"); #wtf?
Expand Down Expand Up @@ -1763,6 +1804,9 @@ sub statement_control__S_use { my ($cl, $M) = @_;
# All package defs have a couple things in common - a special-ish block,
# with a special decl, and some nice runtimey code
sub package_def { my ($cl, $M) = @_;
if ($::MULTINESS) {
$M->sorry("Multi variables NYI");
}
my $scope = $::SCOPE;
if (!$M->{longname}[0]) {
$scope = 'anon';
Expand Down Expand Up @@ -1916,6 +1960,9 @@ sub get_placeholder_sig { my ($cl, $M) = @_;

# always a sub, though sometimes it's an implied sub after multi/proto/only
sub routine_def { my ($cl, $M) = @_;
if ($::MULTINESS) {
$M->sorry("Multi routines NYI");
}
if ($M->{sigil}[0] && $M->{sigil}[0]->Str eq '&*') {
$M->sorry("Contextuals NYI");
return;
Expand Down
3 changes: 3 additions & 0 deletions Op.pm
Expand Up @@ -764,6 +764,7 @@ use CgOp;
has class => (isa => 'Str', is => 'ro', default => 'Sub');
has ltm => (isa => 'Maybe[CgOp]', is => 'ro', default => undef);
has method_too => (isa => 'Maybe[Str]', is => 'ro', required => 0);
has proto_too => (isa => 'Maybe[Str]', is => 'ro', required => 0);
has exports => (isa => 'ArrayRef[Str]', is => 'ro', default => sub { [] });

sub lift_decls {
Expand All @@ -773,6 +774,8 @@ use CgOp;
code => $self->body, ltm => $self->ltm);
push @r, Decl::HasMethod->new(name => $self->method_too,
var => $self->var) if defined($self->method_too);
push @r, Decl::HasMultiRx->new(name => $self->proto_too,
var => $self->var) if defined($self->proto_too);
push @r, Decl::PackageAlias->new(slot => $self->var,
name => $self->var, path => [ 'OUR', 'EXPORT', $_ ])
for (@{ $self->exports });
Expand Down
30 changes: 30 additions & 0 deletions RxOp.pm
Expand Up @@ -356,4 +356,34 @@ use CgOp;
no Moose;
}

{
package RxOp::ProtoRedis;
use Moose;
extends 'RxOp';

has name => (isa => 'Str', is => 'ro', required => 1);
has cutltm => (isa => 'Bool', is => 'ro', default => 0);

sub op {
my ($self, $cn, $cont) = @_;
my $icn = Niecza::Actions->gensym;
$icn, Op::CallSub->new(
invocant => Op::Lexical->new(name => '&_rxproto'),
positionals => [
Op::Lexical->new(name => $icn),
$self->_close_k($cn, $cont),
Op::StringLiteral->new(text => $self->name)
]);
}

sub lad {
my ($self) = @_;
$self->cutltm ? CgOp::rawnew('LADImp') :
CgOp::rawnew('LADProtoRegex', CgOp::clr_string($self->name));
}

__PACKAGE__->meta->make_immutable;
no Moose;
}

1;
25 changes: 25 additions & 0 deletions SAFE.setting
Expand Up @@ -956,6 +956,31 @@ sub _rxcall(@list, $k) {
$k(@list.shift) while @list;
}
PRE-INIT {
ClassHOW.HOW.add-method("add-multiregex",
anon method add-multiregex($name, $rx) {
Q:CgOp { (prog
[rawcall (unwrap DynMetaObject (getattr meta-object (@ (pos 0))))
AddMultiRegex (unbox String (@ (l $name))) (@ (l $rx))]
[l True])
}
});
}
sub _rxproto($C, $k, $name) {
sub lbody($fn) { $fn($C).for($k) }
Q:CgOp {
(letn branches (rawscall Lexer.RunProtoregex
(@ (l $C)) (unbox String (@ (l $name))))
ix (int 0)
max (getfield Length (l branches))
(whileloop 0 0 (< (l ix) (l max))
(sink (subcall (@ (l &lbody))
(newscalar (getindex (l ix) (l branches))))))
(null Variable))
};
}
# A call to a subrule could return a cursor of a different type, or with
# unwanted subcaptures that need to be cleared for <.foo>
sub _rxbind($C, @names, $fun, $k) {
Expand Down
15 changes: 14 additions & 1 deletion test.pl
Expand Up @@ -2,7 +2,7 @@

use Test;

plan 320;
plan 322;

ok 1, "one is true";
ok 2, "two is also true";
Expand Down Expand Up @@ -814,3 +814,16 @@
is "foo $cow bar", "foo hi bar", '$-interpolation works';
is "foo $cow.substr(0,1) bar", "foo h bar", 'methodcall interpolation works';
}
{
my grammar G7 {
proto token tok {*}
token tok:sym<+> { <sym> }
token tok:foo { <sym> }
rule TOP { <tok> }
}
ok G7.parse('+'), "can parse :sym<> symbols";
ok G7.parse('foo'), "can parse : symbols";
}

0 comments on commit 3ba22dc

Please sign in to comment.