Skip to content
This repository has been archived by the owner on Oct 23, 2019. It is now read-only.

Commit

Permalink
- regexps group names are allowed to start with a number
Browse files Browse the repository at this point in the history
Originally checked in by DEVSENSE\Miloslav on 2011-11-07 19:26:23.733 as Changeset 2125

git-tfs-id: [https://devsense.visualstudio.com/DefaultCollection]$/Phalanger/Main;C6277
  • Loading branch information
Miloslav committed Feb 7, 2015
1 parent 3b3a7e1 commit 2320c53
Showing 1 changed file with 126 additions and 8 deletions.
134 changes: 126 additions & 8 deletions Source/ClassLibrary/RegExpPerl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,9 @@ private static int Match(object pattern, object data, out PhpArray matches, Matc
for (int i = 0; i <= GetLastSuccessfulGroup(m.Groups); i++)
{
groupName = converter.Regex.GroupNameFromNumber(i);
//remove sign from the beginning of the groupName
groupName = groupName.Remove(0, PerlRegExpConverter.GroupPrefix.Length);

if (!String.IsNullOrEmpty(groupName) && groupName != i.ToString())
{
matches[groupName] = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, (flags & MatchFlags.OffsetCapture) != 0);
Expand Down Expand Up @@ -1139,11 +1142,16 @@ private static int FillMatchesArrayAllPatternOrder(Regex r, Match m, ref PhpArra

// named group?
string name;
if ((name = r.GroupNameFromNumber(i)) != String.Empty && name != i.ToString(CultureInfo.InvariantCulture))
{
if (j == 0) matches[name] = new PhpArray();
((PhpArray)matches[name])[j] = arr;
}

name = r.GroupNameFromNumber(i);
//remove sign from the beginning of the groupName
name = name.Remove(0, PerlRegExpConverter.GroupPrefix.Length);

if (!String.IsNullOrEmpty(name) && name != i.ToString())
{
if (j == 0) matches[name] = new PhpArray();
((PhpArray)matches[name])[j] = arr;
}

if (j == 0) matches[i] = new PhpArray();
((PhpArray)matches[i])[j] = arr;
Expand Down Expand Up @@ -1180,9 +1188,15 @@ private static int FillMatchesArrayAllSetOrder(Regex r, Match m, ref PhpArray ma
object arr = NewArrayItem(m.Groups[j].Value, m.Groups[j].Index, addOffsets);

// named group?
string name;
if (j > 0 && (name = r.GroupNameFromNumber(j)) != String.Empty)
pa[name] = arr;
string name = r.GroupNameFromNumber(j);
//remove sign from the beginning of the groupName
name = name.Remove(0, PerlRegExpConverter.GroupPrefix.Length);

if (!String.IsNullOrEmpty(name) && name != j.ToString())
{
pa[name] = arr;
}


pa[j] = arr;
}
Expand Down Expand Up @@ -1820,6 +1834,13 @@ private static string ConvertReplacement(int max_number, string replacement)
/// </summary>
internal sealed class PerlRegExpConverter
{
/// <summary>
/// All named groups from Perl regexp are renamed to start with this character.
/// In order to enable group names starting with number
/// </summary>
internal const string GroupPrefix = "a";


#region Properties

/// <summary>
Expand Down Expand Up @@ -2359,6 +2380,14 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi
}

// In perl regexps, named groups are written like this: "(?P<name> ... )"
// (\k<name>...)
// (\k'name'...)
// (\k{name}...)
// (\g{name}...)
// (?'name'...)
// (?<name>...)
// (?P=name)

// If the group is starting here, we need to skip the 'P' character (see state 4)
switch (inner_state)
{
Expand All @@ -2381,6 +2410,20 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi
inner_state = 3;
continue; //skip 'P' from resulting pattern
}
else if (ch == '<')
{
inner_state = 15;
break;
}
else if (ch == '\'')
{
i++;
result.Append('\'');
result.Append(GroupPrefix);

inner_state = 0;
continue;
}

inner_state = 0;
break;
Expand All @@ -2395,6 +2438,15 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi
{
result.Append('P');
}
else if (ch == '<')
{
i++;
result.Append('<');
result.Append(GroupPrefix);

inner_state = 0;
continue;
}

inner_state = 0;
break;
Expand Down Expand Up @@ -2430,6 +2482,7 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi
{
// it can be named group
result.Append("k<");
result.Append(GroupPrefix);
inner_state = 10;

//result.Append("g{"); // unexpected character after '/g{', so put it back to pattern
Expand All @@ -2450,6 +2503,15 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi
{
inner_state = 8;
}
else
{
//name of the group starts with a number
//put behind PreGroupNameSign
result.Insert(result.Length - 1,"k<");
result.Insert(result.Length - 1, GroupPrefix);
inner_state = 14;
}



break;
Expand All @@ -2462,6 +2524,14 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi
inner_state = 9;
continue; // skip '}' from resulting pattern
}
else
{
//name of the group starts with a number
//put behind PreGroupNameSign
result.Insert(result.Length - 1, "k<");
result.Insert(result.Length - 2, GroupPrefix);
inner_state = 14;
}

// there is just 99 back references possible

Expand Down Expand Up @@ -2500,8 +2570,27 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi
i++;
inner_state = 10;
result.Append('<');
result.Append(GroupPrefix);
continue; // skip '{' from resulting pattern
}
else if (ch == '<')
{
i++;
result.Append('<');
result.Append(GroupPrefix);

inner_state = 0;
continue;
}
else if (ch == '\'')
{
i++;
result.Append('\'');
result.Append(GroupPrefix);

inner_state = 0;
continue;
}

inner_state = 0;

Expand All @@ -2516,6 +2605,7 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi

// add '<' so it is '\k<'
result.Append('<');
result.Append(GroupPrefix);

inner_state = 13;

Expand All @@ -2533,6 +2623,34 @@ private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encodi

break;

case 14:// '\g{[0-9].*?'

if (ch == '}')
{
i++;
inner_state = 9;
result.Append(">");
continue; // skip '}' from resulting pattern
}

break;

case 15:// (?<

//Add group prefix only if it's not lookbehind assertions
//(?<! negative
//(?<= positive
if (ch != '!' && ch != '=')
{
result.Append(GroupPrefix);

}

inner_state = 0;

break;



default: inner_state = 0; break;
}
Expand Down

0 comments on commit 2320c53

Please sign in to comment.