Skip to content
Permalink
Browse files

Первое приближение

  • Loading branch information
ilovb committed May 17, 2014
1 parent 377599d commit ce1b2e0c34891cbe25d59ef8506fc5ca969697f3
Showing with 28 additions and 122 deletions.
  1. +28 −122 cp_lexer_pygments.py
@@ -1,24 +1,13 @@
class Modula2Lexer(RegexLexer):
"""
For `Modula-2 <http://www.modula2.org/>`_ source code.
For `Component Pascal <http://www.oberon.ch/pdf/CP-Lang.pdf>`_ source code.
Additional options that determine which keywords are highlighted:
`pim`
Select PIM Modula-2 dialect (default: True).
`iso`
Select ISO Modula-2 dialect (default: False).
`objm2`
Select Objective Modula-2 dialect (default: False).
`gm2ext`
Also highlight GNU extensions (default: False).
.. versionadded:: 1.3
.. versionadded:: 0.0
"""
name = 'Modula-2'
aliases = ['modula2', 'm2']
filenames = ['*.def', '*.mod']
mimetypes = ['text/x-modula2']
name = 'Component Pascal'
aliases = ['ComponentPascal', 'cp', 'CP']
filenames = ['*.cp', '*.cps']
mimetypes = ['text/plain']

flags = re.MULTILINE | re.DOTALL

@@ -31,14 +20,11 @@ class Modula2Lexer(RegexLexer):
(r'([a-zA-Z_\$][\w\$]*)', Name),
],
'numliterals': [
(r'[01]+B', Number.Binary), # binary number (ObjM2)
(r'[0-7]+B', Number.Oct), # octal number (PIM + ISO)
(r'[0-7]+C', Number.Oct), # char code (PIM + ISO)
(r'[0-9A-F]+C', Number.Hex), # char code (ObjM2)
(r'[0-9A-F]+H', Number.Hex), # hexadecimal number
(r'[0-9A-F]+X', Number.Hex), # char code
(r'[0-9A-F]+[HL]', Number.Hex), # hexadecimal number
(r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number
(r'[0-9]+\.[0-9]+', Number.Float), # real number
(r'[0-9]+', Number.Integer), # decimal whole number
(r'[0-9]+\.[0-9]+', Number.Float), # real number
(r'[0-9]+', Number.Integer), # decimal whole number
],
'strings': [
(r"'(\\\\|\\'|[^'])*'", String), # single quoted string
@@ -47,23 +33,18 @@ class Modula2Lexer(RegexLexer):
'operators': [
(r'[*/+=#~&<>\^-]', Operator),
(r':=', Operator), # assignment
(r'@', Operator), # pointer deref (ISO)
(r'^', Operator), # pointer deref
(r'\.\.', Operator), # ellipsis or range
(r'`', Operator), # Smalltalk message (ObjM2)
(r'::', Operator), # type conversion (ObjM2)
],
'punctuation': [
(r'[\(\)\[\]{},.:;|]', Punctuation),
],
'comments': [
(r'//.*?\n', Comment.Single), # ObjM2
(r'/\*(.*?)\*/', Comment.Multiline), # ObjM2
(r'\(\*([^\$].*?)\*\)', Comment.Multiline),
# TO DO: nesting of (* ... *) comments
],
'pragmas': [
(r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM
(r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2
# ???
],
'root': [
include('whitespace'),
@@ -77,104 +58,29 @@ class Modula2Lexer(RegexLexer):
]
}

pim_reserved_words = [
# 40 reserved words
'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION',
'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR',
'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD',
'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED',
'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
'UNTIL', 'VAR', 'WHILE', 'WITH',
]

pim_pervasives = [
# 31 pervasives
'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC',
'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL',
'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD',
'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL',
]

iso_reserved_words = [
# 46 reserved words
'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY',
'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN',
'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER',
'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY',
'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
'WITH',
]

iso_pervasives = [
# 42 pervasives
'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX',
'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH',
'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH',
'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW',
'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE',
'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
]

objm2_reserved_words = [
# base language, 42 reserved words
'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF',
'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD',
'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE',
'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
'UNTIL', 'VAR', 'VARIADIC', 'WHILE',
# OO extensions, 16 reserved words
'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
'SUPER', 'TRY',
]

objm2_pervasives = [
# base language, 38 pervasives
'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE',
'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD',
'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL',
'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX',
'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF',
# OO extensions, 3 pervasives
'OBJECT', 'NO', 'YES',
]

gnu_reserved_words = [
# 10 additional reserved words
'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
'__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
cp_reserved_words = [
# 36 reserved words
'ABSTRACT', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CLOSE', 'CONST', 'DIV',
'DO', 'ELSE', 'ELSIF', 'EMPTY', 'END', 'EXIT', 'EXTENSIBLE', 'FOR',
'IF', 'IMPORT', 'IN', 'IS', 'LIMITED', 'LOOP', 'MOD', 'MODULE', 'NIL',
'OF', 'OR', 'OUT', 'RETURN', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR',
'WHILE', 'WITH',
]

gnu_pervasives = [
# 21 identifiers, actually from pseudo-module SYSTEM
# but we will highlight them as if they were pervasives
'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
cp_pervasives = [
# 36 pervasives
'ABS', 'ANYPTR', 'ANYREC', 'ASH', 'ASSERT', 'BITS', 'BOOLEAN', 'BYTE',
'CAP', 'CHAR', 'CHR', 'DEC', 'ENTIER', 'EXCL', 'HALT', 'INC', 'INCL',
'INF', 'INTEGER', 'FALSE', 'LEN', 'LONG', 'LONGINT', 'MAX', 'MIN',
'NEW', 'ODD', 'ORD', 'REAL', 'SET', 'SHORT', 'SHORTCHAR', 'SHORTINT',
'SHORTREAL', 'SIZE', 'TRUE',
]

def __init__(self, **options):
self.reserved_words = set()
self.pervasives = set()
# ISO Modula-2
if get_bool_opt(options, 'iso', False):
self.reserved_words.update(self.iso_reserved_words)
self.pervasives.update(self.iso_pervasives)
# Objective Modula-2
elif get_bool_opt(options, 'objm2', False):
self.reserved_words.update(self.objm2_reserved_words)
self.pervasives.update(self.objm2_pervasives)
# PIM Modula-2 (DEFAULT)
else:
self.reserved_words.update(self.pim_reserved_words)
self.pervasives.update(self.pim_pervasives)
# GNU extensions
if get_bool_opt(options, 'gm2ext', False):
self.reserved_words.update(self.gnu_reserved_words)
self.pervasives.update(self.gnu_pervasives)
self.reserved_words.update(self.cp_reserved_words)
self.pervasives.update(self.cp_pervasives)
# initialise
RegexLexer.__init__(self, **options)

0 comments on commit ce1b2e0

Please sign in to comment.
You can’t perform that action at this time.