GitHub - BLKStone/ply-simple-demo: ply(python lex yacc) simple demos

#说明本笔记是将 Lex&YACC HOWTO[1] 的部分内容用PLY重新实现。本文不是正则表达式的教程，相关内容请寻找其他教程。 PLY文档[2]的翻译(以及原文)也是非常重要的参考资料。

#安装PLY

pip install ply

#lex简易示例 ##认识lex

#!/usr/bin/python
# -*- coding: utf-8 -*-  
# exam1.py

import ply.lex as lex

tokens = ['START','STOP']

def t_START(t):
    r'start'
    print "start command received"
    return t

def t_STOP(t):
    r'stop'
    print "stop command received"
    return t

# 行号统计
def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")

# 出错处理
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)


# Build the lexer
lexer = lex.lex()

# 测试数据
s = '''
stop and start
'''

# Give the lexer some input
lexer.input(s)

while True:
    tok = lexer.token()
    if not tok: break

##一个更复杂的示例假设下面是一个我们想解析的文件：

logging {
    category lame−servers { null; };
    category cname { null; };
};

zone "." {
    type hint;
    file "/etc/bind/db.root";
};

这个文件中有以下几类符号(tokens)

WORDs ，如zone和type
FILENAMEs ，如*/etc/bind/db.root*
QUOTEs ，如包括文件名的符号
OBRACEs ，左花括号*{*
EBRACEs ，右花括号*}*
SEMICOLONs ，;

对应的lex文件如下

#!/usr/bin/python
# -*- coding: utf-8 -*-
# exam2.py  

import ply.lex as lex

tokens = ["WORD","FILENAME","QUOTE","OBRACE","EBRACE","SEMICOLON"]

def t_WORD(t):
    r'[a-zA-Z][a-zA-Z0-9-]*'
    print "WORD ",
    return t

def t_FILENAME(t):
    r'[a-zA-Z0-9/.-]+'
    print "FILENAME ",
    return t

def t_QUOTE(t):
    r'"'
    print "QUOTE ",
    return t

def t_OBRACE(t):
    r'{'
    print "OBRACE ",
    return t

def t_EBRACE(t):
    r'}'
    print "EBRACE ",
    return t

def t_SEMICOLON(t):
    r';'
    print "SEMICOLON ",
    return t


# 不做处理的符号 空格与tab
t_ignore = " \t"

# 行号统计
def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")
    print ''

# 出错处理
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)

# Build the lexer
lexer = lex.lex()


file_object = open('test.conf')
s = file_object.read()
print s


# Give the lexer some input
lexer.input(s)

while True:
    tok = lexer.token()
    if not tok: break

#yacc示例 ##一个简单的温度调节控制器我们想用一门简单的语言去控制一个温度调节器，例如：

heat on
    Heater on!
heat off
    Heater off!
target temperature 22
    New temperature set!

我们需要辨别的符号有：heat,on/off(STATE),target,temperature,NUMBER。对应的lex文件如下（Example 3）:

#!/usr/bin/python
# -*- coding: utf-8 -*-  
# exam3lex.py

import ply.lex as lex

tokens = ['NUMBER','TOKHEAT','STATE','TOKTARGET','TOKTEMPRATURE']

def t_NUMBER(t):
    r'[0-9]+'
    return t;

def t_TOKHEAT(t):
    r'heat'
    return t

def t_STATE(t):
    r'on|off'
    return t

def t_TOKTARGET(t):
    r'target'
    return t

def t_TOKTEMPRATURE(t):
    r'temprature'
    return t

# 不做处理的符号 空格与tab
t_ignore = " \t"

# 行号统计
def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")

# 出错处理
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)

# Build the parser
lexer = lex.lex()

# # 测试数据
# s = '''
# heat on
# '''

# # Give the lexer some input
# lexer.input(s)

# while True:
#     tok = lexer.token()
#     if not tok: break
#     print '(',tok.type,','+str(tok.value)+')'

接下来是在yacc文件中用产生式说明文法。

#!/usr/bin/python
# -*- coding: utf-8 -*-  
# exam3.py

import ply.yacc as yacc
from exam3lex import tokens

def p_commands(p):
    '''commands : empty
                            | commands command
    '''

def p_command(p):
    '''command : heatswitch
                            | targetset'''

def p_heatswitch(p):
    'heatswitch : TOKHEAT STATE'
    print "Heat turned on or off"

def p_targetset(p):
    'targetset : TOKTARGET TOKTEMPRATURE NUMBER'
    print "temprature set"

def p_empty(p):
    'empty :'
    pass

# Error rule for syntax errors
def p_error(p):
    print "Syntax error in input!"

# Build the parser
parser = yacc.yacc()
 
while True:
   try:
       s = raw_input('calc > ')
   except EOFError:
       break
   if not s: continue
   result = parser.parse(s)
   print result

##拓展温度调节器使其可处理参数利用参数p[0]获取属性值

#!/usr/bin/python
# -*- coding: utf-8 -*-  
# exam4.py

import ply.lex as lex
import ply.yacc as yacc

tokens = ['NUMBER','TOKHEAT','STATE','TOKTARGET','TOKTEMPRATURE']

def t_NUMBER(t):
    r'[0-9]+'
    return t;

def t_TOKHEAT(t):
    r'heat'
    return t

def t_STATE(t):
    r'on|off'
    return t

def t_TOKTARGET(t):
    r'target'
    return t

def t_TOKTEMPRATURE(t):
    r'temprature'
    return t

# 不做处理的符号 空格与tab
t_ignore = " \t"

# 行号统计
def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")

# 出错处理
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)

def p_commands(p):
    '''commands : empty
                            | commands command
    '''

def p_command(p):
    '''command : heat_switch
                            | target_set'''

def p_heatswitch(p):
    'heat_switch : TOKHEAT STATE'
    print "Heat turned " + p[2]

def p_targetset(p):
    'target_set : TOKTARGET TOKTEMPRATURE NUMBER'
    print "temprature set " + p[3]

def p_empty(p):
    'empty :'
    pass

# Error rule for syntax errors
def p_error(p):
    print "Syntax error in input!"


# Build the lexer
lexer = lex.lex()

# Build the parser
parser = yacc.yacc()
 
while True:
   try:
       s = raw_input('input > ')
   except EOFError:
       break
   if not s: continue
   result = parser.parse(s)

##解析配置文件让我们继续讨论前面提到的配置文件：

zone "." {
        type hint;
        file "/etc/bind/db.root";
}

example 5:

#!/usr/bin/python
# -*- coding: utf-8 -*-  
# exam5.py

import ply.lex as lex
import ply.yacc as yacc

#保留字
reserved = {
   'zone' : 'ZONETOK',
   'file' : 'FILETOK',
   'else' : 'ELSE',
}

tokens = ['WORD','FILENAME','QUOTE','OBRACE','EBRACE','SEMICOLON'] + list(reserved.values())

t_FILENAME = r'[a-zA-Z0-9/.-]+'

t_QUOTE = r'"'

t_OBRACE = r'{'

t_EBRACE = r'}'

t_SEMICOLON =  r';'

def t_WORD(t):
    r'[a-zA-Z][a-zA-Z0-9]+'
    t.type = reserved.get(t.value,'WORD')
    return t

# 不做处理的符号 空格与tab
t_ignore = " \t"

# 行号统计
def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")

# 出错处理
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)


def p_commands(p):
    '''commands : empty 
                            | commands command
    '''
    if len(p) == 3:
        p[0] = p[2]

def p_command(p):
    'command : zone_set'
    p[0] = p[1]

def p_zoneset(p):
    'zone_set : ZONETOK quotename zonecontent'
    print "complete zone for",p[2],"found"
    p[0] = p[3]

def p_zonecontent(p):
    'zonecontent : OBRACE zonestatements EBRACE SEMICOLON'
    p[0] = p[2]

def p_quotename(p):
    'quotename : QUOTE FILENAME QUOTE'
    p[0] = p[2]

def p_zonestatements(p):
    '''zonestatements : empty
                                        | zonestatements zonestatement SEMICOLON
    '''
    if len(p) == 4:
        p[0] = p[2]


def p_zonestatement(p):
    '''zonestatement : statements
                                    | FILETOK quotename
    '''
    if p[1]=='file':
        p[0] = p[2]
        print "a zonefile name",p[2],"was encountered"

def p_block(p):
    'block : OBRACE zonestatements EBRACE SEMICOLON'

def p_statements(p):
    '''statements : empty
                            | statements statement 
    '''

def p_statement(p):
    '''statement : WORD
                            | block
                            | quotename'''

# Error rule for syntax errors
def p_error(p):
    print "Syntax error in input!"

def p_empty(p):
    'empty :'
    pass

# Build the lexer
lexer = lex.lex()

# Build the parser
parser = yacc.yacc()

file_object = open('test2.conf')
s = file_object.read()
print s

# lexer.input(s)

# while True:
#     tok = lexer.token()
#     if not tok: break
#     print tok

result = parser.parse(s)
print result

#深度阅读 GUN YACC (Bison)带有一个很常不错的info文件（.info），它是非常好的YACC语法文档，除了里面仅提到了一次Lex,其它的都还好。可以使用Emacs阅读info文件，或者非常不错的工具pinfo。

Flex有一个不错的用户手册，如果你已经理解Flex是做什么的，它还是非常有用的。

读完了这个Lex和YACC介绍，你可能想找到更多的信息。虽然以下的书我一本都没看过，不过听说不错：

Bision-The Yacc-Compatible Parser Generator
Lex&Yacc
Compliers: Principles,Techiniques,and Tools

Tohmas Niemann 写了一篇文档，讨论如何使用Lex和YACC写一个编译器和计算器。

usenet新闻组com.compilers也是非常有用的，不过请记住，那些人并非专门服务支持，在你发贴之前，阅读他们的感兴趣的页面，特别是FAQ

Lex-A Lexical Analyzer Generator[4]，M.E.Lesk and E.Schmidt，最原始的论文。

Yacc: Yet Another Compiler[5]

#参考资料以下推荐部分参考资料。比以上内容略复杂的PLY示例项目[3]中的calc.py感觉很适合作为进一步学习的资料。

[1] Lex&YACC HOWTO 中文翻译

[2] PLY文档中文翻译

[3] PLY示例项目

[4] Lex-A Lexical Analyzer Generator，M.E.Lesk and E.Schmidt

[5] Yacc: Yet Another Compiler

Name		Name	Last commit message	Last commit date
Latest commit History 2 Commits
README.md		README.md
calc_class.py		calc_class.py
calclex.py		calclex.py
exam1.py		exam1.py
exam2.py		exam2.py
exam3.py		exam3.py
exam3lex.py		exam3lex.py
exam3lex.pyc		exam3lex.pyc
exam4.py		exam4.py
exam5.py		exam5.py
parser.out		parser.out
parsetab.py		parsetab.py
parsetab.pyc		parsetab.pyc
test.conf		test.conf
test2.conf		test2.conf

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

README.md

README.md

calc_class.py

calc_class.py

calclex.py

calclex.py

exam1.py

exam1.py

exam2.py

exam2.py

exam3.py

exam3.py

exam3lex.py

exam3lex.py

exam3lex.pyc

exam3lex.pyc

exam4.py

exam4.py

exam5.py

exam5.py

parser.out

parser.out

parsetab.py

parsetab.py

parsetab.pyc

parsetab.pyc

test.conf

test.conf

test2.conf

test2.conf

Repository files navigation

About

Releases

Packages

Languages

BLKStone/ply-simple-demo

Folders and files

Latest commit

History

Repository files navigation

About

Resources

Stars

Watchers

Forks

Languages