Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

全新的parser #11

Closed
RubyLouvre opened this issue Jun 30, 2013 · 17 comments
Closed

全新的parser #11

RubyLouvre opened this issue Jun 30, 2013 · 17 comments

Comments

@RubyLouvre
Copy link
Owner

为了解决soom提出的BUG, 由于短路与或短路或导致一开始无法进入某些分支,就无法取得其依赖关系。另外,之前的使用with,效率有点低。

<!DOCTYPE HTML>
<html id="html">
<head>
  <meta charset="utf-8">
  <title>测试用例</title>
</head>
<body>
  <div ms-controller="test">
    <button ms-click="one">测试1</button>
    <button ms-click="two">测试2</button>
    <br>test1: {{test1}}
    <br>test2: {{test2}}
    <br>上边两个变量变true的时候,下面的表达式是false的
    <br>test1 && test2, result: {{test1 && test2}} and {{test2 && test1}}
    <br>ps: {{test2 || test1}}
  </div>

  <script src="avalon.mobile.js"></script>
  <script>
  avalon.define("test", function(vm) {
    vm.test1 = false;
    vm.test2 = false;
    vm.one = function() {
      vm.test1 = false;
      vm.test2 = false;

      vm.test1 = true;
      vm.test2 = true;
    };
    vm.two = function() {
      vm.test1 = false;
      vm.test2 = false;

      vm.test2 = true;
      vm.test1 = true;
    };
  });
  avalon.scan();
  </script>
</body>
</html>

新的parser, 设法取得里面的变量,然后把所有赋值语句放在前面,从而解决这问题

    var KEYWORDS =
            // 关键字
            'break,case,catch,continue,debugger,default,delete,do,else,false'
            + ',finally,for,function,if,in,instanceof,new,null,return,switch,this'
            + ',throw,true,try,typeof,var,void,while,with'

            // 保留字
            + ',abstract,boolean,byte,char,class,const,double,enum,export,extends'
            + ',final,float,goto,implements,import,int,interface,long,native'
            + ',package,private,protected,public,short,static,super,synchronized'
            + ',throws,transient,volatile'

            // ECMA 5 - use strict
            + ',arguments,let,yield'

            + ',undefined';
    var REMOVE_RE = /\/\*(?:.|\n)*?\*\/|\/\/[^\n]*\n|\/\/[^\n]*$|'[^']*'|"[^"]*"|[\s\t\n]*\.[\s\t\n]*[$\w\.]+/g;
    var SPLIT_RE = /[^\w$]+/g;
    var KEYWORDS_RE = new RegExp(["\\b" + KEYWORDS.replace(/,/g, '\\b|\\b') + "\\b"].join('|'), 'g');
    var NUMBER_RE = /\b\d[^,]*/g;
    var BOUNDARY_RE = /^,+|,+$/g;
    var getVariables = function(code) {
        code = code
                .replace(REMOVE_RE, '')

                .replace(SPLIT_RE, ',')


                .replace(KEYWORDS_RE, '')
                .replace(NUMBER_RE, '')
                .replace(BOUNDARY_RE, '');

        code = code ? code.split(/,+/) : [];

        return code;
    };


    function addScope(vars, scope, index) {
        var ret = [], prefix = " = vm" + index + "."
        for (var i = vars.length, name; name = vars[--i]; ) {
            name = vars[i]
            if (scope.hasOwnProperty(name)) {
                ret.push(name + prefix + name)
                vars.splice(i, 1)
            }
        }
        return ret

    }
    function getValueFunction(code, scopes) {
        var vars = getVariables(code), ret, variables = [], uniq = {};
        vars = vars.filter(function(el){
            if(!uniq[el]){
                uniq[el] = 1
                return true;
            }
            return false
        })
        var args = []
        for (var i = 0, scope, n = scopes.length; i < n; i++) {
            if (vars.length) {
                args.push("vm" + i)
                variables.push.apply(variables, addScope(vars, scopes[i], i))
            }
        }
        var pre = variables.join(", ")
        if (pre) {
            pre = "var " + pre
        }
        args.push(pre + "\nreturn " +code)
        return Function.apply(Function,args)
    }
    var obj = {test1:23, test2:"sdfsd"}

 var fn = getValueFunction("test1 && test2 ",[obj])  

 console.log(fn+"")

 fn(obj)

生成的求值函数为:

function anonymous(vm0) {
      var test2 = vm0.test2, test1 = vm0.test1
      return test1 && test2 
}

如果存在过滤器,那么应该生成

    function anonymous(vm0, filters123143213) {
        var test1= vm0.test1
        var ret123456 = test1
        if (filters123143213.html) {
            ret123456 = filters123143213.html(ret123456)
        }
        return ret123456
    }
@RubyLouvre
Copy link
Owner Author

function anonymous(vm0) {
'use strict';
var test2 = vm0.test2, test1 = vm0.test1
return test1 && test2
}



function anonymous(vm0) {
'use strict';
var test1 = vm0.test1, test2 = vm0.test2
return test2 && test1
}



function anonymous(vm0) {
'use strict';
var test1 = vm0.test1, test2 = vm0.test2
return test2 || test1
}

这是新parser生成的求值函数

function anonymous(test1372575919386) {
with(test1372575919386){
var ret1372575919386 = test1 && test2
}

return ret1372575919386
}


function anonymous(test1372575919387) {
with(test1372575919387){
var ret1372575919387 = test2 && test1
}

return ret1372575919387
}


function anonymous(test1372575919389) {
with(test1372575919389){
var ret1372575919389 = test2 || test1
}

return ret1372575919389
}

这是旧parser生成的求值函数

@yxnino
Copy link

yxnino commented Jul 4, 2013

var rule = item.rule;
var names = rule.replace(/["|']\w*["|']/g, ' ').match(/\w+/g); // 先replace掉字符串,匹配到的变量名赋给names

/*去掉重复的项和JS关键字*/
var unique = {};
names = dojo.filter(names, function(name) {
    if (!unique[name]) {
        unique[name] = true;
        return dojo.indexOf(smartdot.Validator.keyword, name) === -1;
    } else {
        return false;
    }
});

var param = [];
var arg = [];
dojo.forEach(names, function(name) {
    if (name in smartdot.Validator.cache.index) {
        param.push(smartdot.Validator.cache.get(name).get('value'));
        arg.push(name);
    }
});

return item.lambda('(' + arg.join(', ') + ') => (' + item.rule + ')').apply(item, param);

跟我之前做的一个小功能思路一样

@Gaubee
Copy link

Gaubee commented Jul 5, 2013

这里可以帮你省下几个字符。

按你的需求来说,是要提取类似全局变量的变量名,除了eval之类的使用字符串生产变量的东西。
所以,在遇到那些关键字,直接用try、catch来走一边并缓存成相应的hash表,就可以了,并不会影响性能,毕竟缓存了。

贴上地址:https://gist.github.com/Gaubee/5938254

因为有用到循环,所以顺便把你getValueFunction里面那个filter写在一起。

@Gaubee
Copy link

Gaubee commented Jul 6, 2013

另外细看你的正则,有坑:单双引号的字符串匹配竟然就这样写:'[^']*'|"[^"]*"
遇到"nihao"dajiahao"就出问题了。

比如

"nihao\"dajiahao"  ; test1&&test2;"nihao\"dajiahao" 

中间那部分就直接被过滤掉了……

DoubleQuotedString = new RegExp('"(?:\\.|(\\\\\\")|[^\\""\\n])*"','g');//双引号字符串
SingleQuotedString = new RegExp("'(?:\\.|(\\\\\\')|[^\\''\\n])*'",'g');//单引号字符串

@RubyLouvre
Copy link
Owner Author

abcd 搞出的抽取变量的函数,我做了部分调整

            var rword = /[^, ]+/g
            function oneObject(array, val) {
                if (typeof array === "string") {
                    array = array.match(rword) || []
                }
                var result = {},
                        value = val !== void 0 ? val : 1
                for (var i = 0, n = array.length; i < n; i++) {
                    result[array[i]] = value
                }
                return result
            }
            function getVars(s) {
                var s_kws = " break case catch continue default delete do else finally for function if in instanceof new return switch this throw try typeof var void while with null true false"
                        + " abstract boolean byte char class const debugger double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile ";

                var keywords = oneObject(s_kws)
                var re_ws = /\s+/;
                var re_comm = /\/\/[^\r\n\u2028\u2029]*|\/\*(?:\/|\**[^*/])*\*+\//;
                var re_id = /[a-zA-Z_$][\w$]*/;
                var re_punc = /~|\}|\|\||\|=|\||\{|\^=|\^|\]|\[|\?|>>>=|>>>|>>=|>>|>=|>|===|==|=|<=|<<=|<<|<|;|:|\.|-=|--|-|,|\+=|\+\+|\+|\*=|\*|\)|\(|&=|&&|&|%=|%|!==|!=|!|\/=?/;
                var re_num = /0x[\dA-Fa-f]+|(?:(?:0|[1-9]\d*)(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?/;
                var re_str = /"(?:[^"\\]|\\[\s\S])*"|'(?:[^'\\]|\\[\s\S])*'/;

                var re = function(a) {
                    for (var i = 0; i < a.length; ++i)
                        a[i] = '(' + a[i].source + ')';
                    return RegExp(a.join('|') + '|', 'g');
                }([re_ws, re_comm, re_id, re_punc, re_num, re_str]);

                var beforeIsDot = false;
                var r = [];
                var t;

                function f(all, ws, comm, id, punc, num, str) {
                    if (beforeIsDot) {
                        beforeIsDot = false;
                    }
                    else {
                        if (all == '.') {
                            beforeIsDot = true;
                        }
                        else {
                            if (id && !keywords[id])
                                r.push(id);
                        }
                    }
                }
                while (t = re.exec(s)) {
                    if (!t[0]) {
                        if (t.index != s.length)
                            throw Error("error: " + t.index);
                        break;
                    }
                    f.apply(null, t);
                }

                return r;
            }
            console.log(getVars("new Date()"))

@Gaubee
Copy link

Gaubee commented Jul 8, 2013

@RubyLouvre 这种写法说实话,很不稳定,而且可读性又低,你让getVars解析一下getVars自己。

自己做一下测试就知道,我用之前从你那边修改的Gist(这个还是用上eval的低效)和这个新的做比较,对getVariables这个函数体进行解析,不论从 速度可读性稳定性 上都比你后来贴上来的这段代码好多了。

附上测试地址:https://gist.github.com/Gaubee/5945554

@RubyLouvre
Copy link
Owner Author

那个注释正则,可以简化一下
var re_comm = ///[^\r\n\u2028\u2029]|/_[\s\S]?_//;

这样也更快一点
其中的 \u2028\u2029 也可以去掉
因为一般代码中不会出现这种字符

@Gaubee
Copy link

Gaubee commented Jul 8, 2013

@RubyLouvre 你可以运行一下你自己自己写的那段代码,相比abcd写的,特别是遇上简单结构的,速度优势更加明显,因为代码结构上的优势。

你也可以运行一下我上面贴给你的测试代码。

总之我觉得还是用你之前自己写的那段比较好。abcd这代码明显是用在更复杂的分析,而你的代码只是用于变量的提取。

@RubyLouvre
Copy link
Owner Author

我现在不想用with了
比如说我的绑定属性里面有个items
而这个绑定属性在select元素上
如果用with, 它就找到元素的select属性,而不是VM的items属性,这真吐血

@RubyLouvre
Copy link
Owner Author

我早期想到的怪招,利用TypeError进行抽取

    //将绑定属性的值或插值表达式里面部分转换一个函数compileFn,里面或包含ViewModel的某些属性
    //而它们分分种都是setter, getter,成为双向绑定链的一部分
    var regEscape = /([-.*+?^${}()|[\]\/\\])/g;

    function escapeRegExp(target) {
        //将字符串安全格式化为正则表达式的源码
        return target.replace(regEscape, "\\$1");
    }
    var isStrict = (function() {
        return !this;
    })();

    function insertScopeNameBeforeVariableName(e, text, scopeList, names, args, random) {
        var ok = false;
        if (window.dispatchEvent) { //判定是否IE9-11或者为标准浏览器
            ok = e instanceof ReferenceError;
        } else {
            ok = e instanceof TypeError;
        }
        //opera9.61
        //message: Statement on line 810: Undefined variable: nickName
        //opera12
        //Undefined variable: nickName
        //safari 5
        //Can't find variable: nickName
        //firefox3-20  chrome
        //ReferenceError: nickName is not defined
        //IE10
        //“nickName”未定义 
        //IE6 
        //'eee' 未定义 
        if (ok) {
            if (window.opera) {
                var varName = e.message.split("Undefined variable: ")[1];
            } else {
                varName = e.message.replace("Can't find variable: ", "")
                        .replace("“", "").replace("'", "");
            }
            varName = (varName.match(/^[\w$]+/) || [""])[0]; //取得未定义的变量名
            for (var i = 0, scope; scope = scopeList[i++]; ) {
                if (scope.hasOwnProperty(varName)) {
                    var scopeName = scope.$id + random;
                    if (names.indexOf(scopeName) === -1) {
                        names.push(scopeName);
                        args.push(scope);
                    }
                    //这里实际还要做更严格的处理
                    var reg = new RegExp("(^|[^\\w\\u00c0-\\uFFFF_])(" + escapeRegExp(varName) + ")($|[^\\w\\u00c0-\\uFFFF_])", "g");
                    return text.replace(reg, function(a, b, c, d) {
                        return b + scopeName + "." + c + d; //添加作用域
                    });
                }
            }

        }
    }
    var doubleQuotedString = /"([^\\"\n]|\\.)*"/g;
    var singleQuotedString = /'([^\\'\n]|\\.)*'/g;

@RubyLouvre
Copy link
Owner Author

@aui
Copy link

aui commented Jul 9, 2013

@RubyLouvre 过滤算法有更新,1、完善数字判断,避免BUG 2、完善字符串正则:

aui/art-template#27

欢迎共同完善

@limodou
Copy link
Contributor

limodou commented Jul 9, 2013

做广告?

@aui
Copy link

aui commented Jul 9, 2013

@limodou 楼主采用了我写的parser,他不知道有在BUG,我修复后再告诉他--这何来广告一说?

@limodou
Copy link
Contributor

limodou commented Jul 9, 2013

报歉,不知道前因后果。

@yolio2003
Copy link

原来几大高手一起合作在弄, 太赞了

@RubyLouvre
Copy link
Owner Author

 var str = "aaa.bbb.ccc+ddd"
            var str = "aaa[bbb].ccc-888+ikk"
            var str = "aaa[ bbb ].ccc-888+ikk"
            var str = "aaa['bbb'].ccc||a888&&ikk(ddd.eee)"
            var str = 'aaa["aaa"]+null'
            //取得变量名或属性名

            var KEYWORDS =
                    // 关键字
                    'break,case,catch,continue,debugger,default,delete,do,else,false'
                    + ',finally,for,function,if,in,instanceof,new,null,return,switch,this'
                    + ',throw,true,try,typeof,var,void,while,with'

                    // 保留字
                    + ',abstract,boolean,byte,char,class,const,double,enum,export,extends'
                    + ',final,float,goto,implements,import,int,interface,long,native'
                    + ',package,private,protected,public,short,static,super,synchronized'
                    + ',throws,transient,volatile'

                    // ECMA 5 - use strict
                    + ',arguments,let,yield'

                    + ',undefined';
            function oneObject(array, val) {
                if (typeof array === "string") {
                    array = array.match(/[^, ]+/g) || []
                }
                var result = {},
                        value = val !== void 0 ? val : 1
                for (var i = 0, n = array.length; i < n; i++) {
                    result[array[i]] = value
                }
                return result
            }
            var keywordOne = oneObject(KEYWORDS)
            var rvar = /\b[\$\_a-z][\w$]*(?:\.[$\w]+|\[[^\]]+\])*/ig

            var rstringLiterals = /(['"])(\\\1|.)+?\1/g
            var rregexp = /([^\/])(\/(?!\*|\/)(\\\/|.)+?\/[gim]{0,3})/g
            var rcomment1 = /\/\/.*?\/?\*.+?(?=\n|\r|$)|\/\*[\s\S]*?\/\/[\s\S]*?\*\//g
            var rcomment2 = /\/\/.+?(?=\n|\r|$)|\/\*[\s\S]+?\*\//g
            function getVars(str) {
//http://james.padolsey.com/javascript/javascript-comment-removal-revisted/
                var uid = '_' + +new Date(),
                        primatives = [],
                        primIndex = 0;
                str = str
                        /* 移除所有字符串*/
                        .replace(rstringLiterals, function(match) {
                            primatives[primIndex] = match;
                            return (uid + '') + primIndex++;
                        })

                        /* 移除所有正则 */
                        .replace(rregexp, function(match, $1, $2) {
                            primatives[primIndex] = $2;
                            return $1 + (uid + '') + primIndex++;
                        })

                        .replace(rcomment1, "")

                        .replace(rcomment2, "")


                        .replace(RegExp('\\/\\*[\\s\\S]+' + uid + '\\d+', 'g'), "")


                var vars = []

                str.replace(rvar, function(a) {
                    if (keywordOne[a])
                        return
                    vars.push(a.replace(RegExp(uid + '(\\d+)', 'g'), function(match, n) {
                        return primatives[n];
                    }))
                })
                console.log(vars)

            }

            getVars(str)

RubyLouvre pushed a commit that referenced this issue Feb 18, 2015
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

6 participants