Skip to content

Commit

Permalink
SDF -> BGF extractor is made much more advanced
Browse files Browse the repository at this point in the history
git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@950 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information
grammarware committed Feb 11, 2011
1 parent eea62bd commit abc637a
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 50 deletions.
19 changes: 18 additions & 1 deletion shared/tools/sdf2bgf
Expand Up @@ -3,7 +3,7 @@
# Get our hands on basedir
LOCAL1=${PWD}
cd `dirname $0`
cd ../../topics/extraction/sdf2bgf
cd ../../topics/extraction/sdf
XTR=${PWD}
cd ${LOCAL1}

Expand Down Expand Up @@ -32,6 +32,23 @@ else
else
echo ")" >> _.def
cat _.def | sglr -p ${XTR}/Main.tbl | asfe -e ${XTR}/Main.eqs | unparsePT > $i
# a fix for ->
perl -pi -w -e 's/strcon2chardata\(\"\-\>\"\)/\-\>/g;' $i
# a fix for <=
perl -pi -w -e 's/strcon2chardata\(\"\<\=\"\)/\&lt;\=/g;' $i
perl -pi -w -e "s/sqstrcon2chardata\(\'\<\=\'\)/\&lt;\=/g;" $i
# a fix for <<
perl -pi -w -e 's/strcon2chardata\(\"\<\<\"\)/\&lt;\&lt;/g;' $i
# a fix for <<=
perl -pi -w -e 's/strcon2chardata\(\"\<\<\=\"\)/\&lt;\&lt;\=/g;' $i
# a fix for >>
perl -pi -w -e 's/strcon2chardata\(\"\>\>\"\)/\&gt;\&gt;/g;' $i
# a fix for &=
perl -pi -w -e 's/strcon2chardata\(\"\&\=\"\)/\&amp;\=/g;' $i
# a fix for >=
perl -pi -w -e 's/\<terminal\>\>\=\<\/terminal\>/\<terminal\>\&gt;\=\<\/terminal\>/g;' $i
# a fix for >>=
perl -pi -w -e 's/\<terminal\>\>\>\=\<\/terminal\>/\<terminal\>\&gt;\&gt;\=\<\/terminal\>/g;' $i
fi
done
rm -f _.def
Expand Down
Expand Up @@ -93,14 +93,22 @@ trafoSymbol(&N1) =
<nonterminal>&C1</nonterminal>
</bgf:expression>

[terminal]
[terminal-1]
&C1 := strcon2chardata(&Z1)
==================
trafoSymbol(&Z1) =
<bgf:expression>
<terminal>&C1</terminal>
</bgf:expression>

[terminal-2]
&C1 := sqstrcon2chardata(&Y1)
==================
trafoSymbol(&Y1) =
<bgf:expression>
<terminal>&C1</terminal>
</bgf:expression>

equations

[plus]
Expand Down Expand Up @@ -175,6 +183,16 @@ trafoSymbol((&S1 &S2 &S+1)) =
</sequence>
</bgf:expression>

[selector-1]
&E1 := trafoSymbol(&N1)
================
trafoSymbol(&I1:&N1) =
<bgf:expression>
<selectable>
<selector>idcon2chardata(&I1)</selector>
&E1
</selectable>
</bgf:expression>

[cons-to-label-only-cons]
guessLabel({cons(&I1)}) = <label>idcon2chardata(&I1)</label>
Expand Down
Expand Up @@ -49,4 +49,5 @@ module Main
"&S+" [0-9]+ -> Symbol+
"&Ss" [0-9]+ -> Symbols
"&Z" [0-9]+ -> StrCon
"&Y" [0-9]+ -> SingleQuotedStrCon
"&I" [0-9]+ -> IdCon
File renamed without changes.
File renamed without changes.
Expand Up @@ -14,10 +14,25 @@ sort2chardata(sort(&Head1 &Middle1 &Middles1 &Last1)) = chardata(&Char1 &Middle1

equations

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% StrCon -> CharData
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%[strcon-of-length-1]
%% Not working! But needed for &, <, >, etc.
%%strcon2chardata(strcon(\" strchar(\&) \")) = entityref(& name(letter(a) namechar(letter(m)) namechar(letter(p))) ;)

[strcon-of-length-1-lt]
strcon2chardata(strcon(\" strchar(\<) \")) = &lt;

[strcon-of-length-1-gt]
strcon2chardata(strcon(\" strchar(\>) \")) = &gt;

[strcon-of-length-1-amp]
strcon2chardata(strcon(\" strchar(\&) \")) = &amp;

%%[strcon-of-length-1-minus]
%%strcon2chardata(strcon(\" strchar(\-) \")) = "-"

[default-strcon-of-length-1]
strchar(&Chr1) := &StrChar1
Expand All @@ -30,6 +45,34 @@ chardata(&Char1 &Chars1) := strcon2chardata(strcon(\" &StrChar+2 \"))
===========================================================
strcon2chardata(strcon(\" &StrChar1 &StrChar+2 \")) = chardata(&Chr1 &Char1 &Chars1)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% SingleQuotedStrCon -> CharData
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

[sqstrcon-of-length-1-lt]
sqstrcon2chardata(singlequotedstrcon(\' singlequotedstrchar(\<) \')) = &lt;

[sqstrcon-of-length-1-gt]
sqstrcon2chardata(singlequotedstrcon(\' singlequotedstrchar(\>) \')) = &gt;

[sqstrcon-of-length-1-amp]
sqstrcon2chardata(singlequotedstrcon(\' singlequotedstrchar(\&) \')) = &amp;

[default-sqstrcon-of-length-1]
singlequotedstrchar(&Ch1) := &SQStrChar1
========================================
sqstrcon2chardata(singlequotedstrcon(\' &SQStrChar1 \')) = chardata(&Ch1)

[sqstrcon-of-length-greater-than-1]
singlequotedstrchar(&Ch1) := &SQStrChar1,
chardata(&Char1 &Chars1) := sqstrcon2chardata(singlequotedstrcon(\' &SQStrChar+2 \'))
===========================================================
sqstrcon2chardata(singlequotedstrcon(\' &SQStrChar1 &SQStrChar+2 \')) = chardata(&Ch1 &Char1 &Chars1)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% IdCon -> CharData
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

[idcon-of-length-1]
idcon2chardata(idcon(&IdHead1)) = chardata(&IdHead1)

Expand Down
56 changes: 56 additions & 0 deletions topics/extraction/sdf/Tokens.sdf
@@ -0,0 +1,56 @@
module Tokens

imports
languages/sdf2/syntax/Literals
languages/sdf2/syntax/Sorts
languages/xml/syntax/XML
basic/StrCon
basic/IdentifierCon

exports
sorts
CharData
context-free syntax
sort2chardata(Sort) -> CharData
strcon2chardata(StrCon) -> Content
sqstrcon2chardata(SingleQuotedStrCon) -> Content
idcon2chardata(IdCon) -> CharData
strchar2strcon(StrChar) -> Content

hiddens
lexical syntax
%% Let's remind ourselves of the relevant lexical syntax.
%%
%% 1. languages/sdf2/syntax/Sorts
%% head:[A-Z] -> Sort {cons("one-char")}
%% head:[A-Z] middle:[A-Za-z0-9\-]* last:[A-Za-z0-9] -> Sort {cons("more-chars")}
%%
%% 2. languages/xml/syntax/XML
%% ~[\<\&]+ -> CharData {avoid}
%% ~[\<\&]* "]]>" ~[\<\&]* -> CharData {reject}
%%
%% 3. basic/StrCon
%% "\\n" -> StrChar {cons("newline")}
%% "\\t" -> StrChar {cons("tab")}
%% "\\\"" -> StrChar {cons("quote")}
%% "\\\\" -> StrChar {cons("backslash")}
%% "\\" a:[0-9]b:[0-9]c:[0-9] -> StrChar {cons("decimal")}
%% ~[\0-\31\n\t\"\\] -> StrChar {cons("normal")}
%% [\"] chars:StrChar* [\"] -> StrCon {cons("default")}

lexical variables
"&Head" [0-9]* -> [A-Z] %% Part of Sort
"&IdHead" [0-9]* -> [A-Za-z] %% Part of IdCon
"&Middle" [0-9]* -> [A-Za-z0-9\-] %% Part of Sort
"&Middles" [0-9]* -> [A-Za-z0-9\-]* %% Part of Sort
"&Last" [0-9]* -> [A-Za-z0-9] %% Part of Sort
"&Char" [0-9]* -> ~[\<\&] %% Part of CharData
"&Chars" [0-9]* -> ~[\<\&]* %% Part of CharData
"&StrChar" [0-9]* -> StrChar %% Part of StrCon
"&StrChar+" [0-9]* -> StrChar+ %% Part of StrCon
"&Chr" [0-9]* -> ~[\0-\31\n\t\"\\\<\&]
"&Ch" [0-9]* -> ~[\0-\31\n\t\'\\\<\&]
"&Both" -> ~[\0-\31\n\t\"\\\<\&] /\ ~[\<\&]
"&StrCon" [0-9]* -> StrCon
"&SQStrChar" [0-9]* -> SingleQuotedStrChar %% Part of SingleQuotedStrCon
"&SQStrChar+" [0-9]* -> SingleQuotedStrChar+ %% Part of SingleQuotedStrCon
File renamed without changes.
48 changes: 0 additions & 48 deletions topics/extraction/sdf2bgf/Tokens.sdf

This file was deleted.

0 comments on commit abc637a

Please sign in to comment.