In [1]:
from pypeg2 import *


class Type(Keyword):
    grammar = Enum( K("int"), K("long") )
    
class Parameter(object):
    grammar = attr("typing", Type), blank, name()
    
class Parameters(Namespace):
    grammar = optional(csl(Parameter))
    

class Instruction(str):
    def heading(self, parser):
        return "/* on level " + str(parser.indention_level) + " */", endl

    grammar = heading, word, ";", endl
    
block = "{", endl, maybe_some(indent(Instruction)), "}", endl

class Function(List):
    grammar = attr("typing", Type), blank, name(), "(", attr("parms", Parameters), ")", endl, block


In [2]:
f = parse("int f(int a, long b) { do_this; do_that; }", Function)

In [3]:
f.typing

Type('int')

In [18]:
from pypeg2 import *

#tools

class Digit:
    grammar = re.compile(r"[0-9]")

class Alpha:
    grammar = re.compile(r"[a-zA-Z]")
    
class HexDig:
    grammar = [Digit, "A", "B", "C", "D", "E", "F"]
    
class SubDelimiter(Keyword):
    grammar = Enum(K("!"), K("$"), K("&"), K("'"), K("("), K(")"), K("*"), K("+"), K(","), K(";"), K("="))
    
class Unreserved:
    grammar = [Alpha, Digit, "-", ".", "_", "~"]
    
class PctEncoded:
    grammar = "%", HexDig, HexDig
    
class PChar:
    grammar = [Unreserved, PctEncoded, SubDelimiter, ":", "@"]
#endtools

class Method(Keyword):
    grammar = Enum(K("GET"), K("HEAD"), K("POST"), K("PUT"), K("DELETE"), K("TRACE"), K("CONNECT"), K("OPTIONS"))
    
    
class DecOctet:
    grammar = [Digit, (re.compile(r"[1-9]"), Digit), ("1", 2, Digit), ("2", re.compile(r"[0-4]"), Digit), ("25", re.compile(r"[0-5]"))]
    
class IPAddress:
    grammar = attr("ip_field_1", DecOctet), ".", attr("ip_field_2", DecOctet), ".", attr("ip_field_3", DecOctet), ".", attr("ip_field_4", DecOctet)
    
class RegName:
    grammar = some([Unreserved, PctEncoded, SubDelimiter])
    
class UserInfo:
    grammar = some([Unreserved, PctEncoded, SubDelimiter, ":"])
    
class Host:
    grammar = [attr("ip_address", IPAddress), attr("regular_name", RegName)]
    
class Port:
    grammar = some(Digit)
    
class Segment:
    grammar = maybe_some(PChar)
    
class Path(List):
    grammar = some("/", Segment)
    
class Authority:
    grammar = optional(attr("userinfo", UserInfo), "@"), attr("host", Host), optional(":", attr("port", Port))

class HierPart:
    grammar = [("//", attr("authority", Authority), optional(attr("path", Path))), ("/", attr("path", Path))]
    
class Scheme:
    grammar = Alpha, maybe_some([Alpha, Digit, "+", "-", "."])
    
class URI(str):
    grammar = optional(attr("scheme", Scheme), ":"), attr("hier-part", HierPart)
    #, optional(("?", attr("query", Query))), optionnal(("#", attr("fragment", Fragment)))

class HTTPVersion:
    grammar = "HTTP/", some(Digit), ".", some(Digit)
    
class ReqLine:
    grammar = attr("method", Method), blank, attr("request_uri", URI), blank, attr("http_version", HTTPVersion), endl

class MessageHeader:
    grammar = attr("field_name", word), ":", blank, attr("field_value", word) 

class Request(List):
    grammar = attr("req_line", ReqLine), endl, maybe_some(attr("message_header", MessageHeader), endl)
    

In [19]:
to_parse = \
"GET /hello.htm \
Host: tutorialspoint \
"

f = parse(to_parse, Request)

SyntaxError: expecting one of [('//', Attribute(name='authority', thing=<class '__main__.Authority'>, subtype=None), (0, Attribute(name='path', thing=<class '__main__.Path'>, subtype=None))), ('/', Attribute(name='path', thing=<class '__main__.Path'>, subtype=None))] (<string>, line 1)

In [50]:
f.header.param

'tutorialspoint'

In [16]:
class Method(Keyword):
    grammar = Enum(K("GET"), K("HEAD"), K("POST"), K("PUT"), K("DELETE"), K("TRACE"), K("CONNECT"), K("OPTIONS"))

class Digit:
    grammar = re.compile(r"[0-9]")

class SubDelimiter(Keyword):
    grammar = Enum(K("!"), K("$"), K("&"), K("'"), K("("), K(")"), K("*"), K("+"), K(","), K(";"), K("="))

# Alpha | Digit 
#   <=> [a-zA-Z0-9_] 
#   <=> \w
class Unreserved:
    grammar = re.compile(r"\w"), "-", ".", "_", "~"

# Pour host, on ignore pct-encoded
class RegularName:
    grammar = maybe_some([Unreserved, SubDelimiter]) 

#Je decide de skip pct-encoded pour l'instant
class PChar:
    grammar = [Unreserved, SubDelimiter, ":", "@"]

#Start Authority
class Fragment:
    grammar = maybe_some([PChar, "/", "?"])

class DecOctet:
    grammar = [Digit, ([1-9], Digit), ("1", Digit, Digit), ("2", [0-4], Digit), ("25", [0-5])]

# Not considering IPv6 adresses for the moment
class IPAddr:
    grammar = DecOctet, ".", DecOctet, ".", DecOctet, ".", DecOctet

#test syntax ?
class Host:
    grammar = attr("identifier", [IPAddr, RegularName])

class Port:
    grammar = maybe_some(Digit)

class Auth:
    grammar = Host, maybe_some(":", Port)
# end Authority

#start Path
class Segment:
    grammar = maybe_some(PChar)
class Path:
    grammar = Segment, maybe_some("/", Segment)
#end Path

class HierPart:
    grammar = "//", Auth, Path


#TODO: changer cette expreg (marche pas)
class RequestURI:
    grammar = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")

class HttpVer:
    grammar = "HTTP", "/", some(Digit), ".", some(Digit)

class ReqLine:
    grammar = attr("method", Method), whitespace, attr("request_uri", RequestURI), whitespace, attr("http_version", HttpVer), endl

#DEBUG
class Header:
    grammar = re.compile(r"header")

class MessageBody:
    grammar = re.compile(r"body")

class Request:
    grammar = attr("req_line", ReqLine), maybe_some(attr("header", Header), endl), endl, optional(attr("mess_body", MessageBody))


In [17]:
request = """GET /hello.htm
header
mess_body """

f = parse(request, ReqLine)
f.method

SyntaxError: expecting match on (?m)\s+ (<string>)