-
Notifications
You must be signed in to change notification settings - Fork 62
/
parse.py
executable file
·766 lines (636 loc) · 27.5 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
import sys
import os
import re
from operator import attrgetter
class LibrarySource(object):
"""Holds info on all the library source code"""
class SourceFile(object):
"""Info for an individual source file"""
class SectionFinder(object):
"""Match a section within a source file"""
def __init__(self, source_file):
self.match = None
self.line_number = 0
self.lines = []
self.source_file = source_file
def check_for_end(self, line):
if self.end_re.search(line):
self.finish()
self.lines = []
return True
return False
def check_for_start(self, line_number, line):
match = self.start_re.search(line)
if match:
self.match = match
self.line_number = line_number
self.lines.append(line)
return True
return False
class LineFinder(object):
"""Match a line within a source file"""
def __init__(self, source_file):
self.source_file = source_file
def check_match(self, line, line_number):
match = self.line_re.search(line)
if match:
self.add(match, line_number)
class SubroutineFinder(SectionFinder):
start_re = re.compile(
r'^\s*(RECURSIVE\s+)?SUBROUTINE\s+([A-Z0-9_]+)\(',
re.IGNORECASE)
end_re = re.compile(r'^\s*END\s*SUBROUTINE', re.IGNORECASE)
def finish(self):
name = self.match.group(2)
self.source_file.subroutines[name] = Subroutine(
name, self.line_number, self.lines, self.source_file)
class InterfaceFinder(SectionFinder):
start_re = re.compile(
r'^\s*INTERFACE\s+([A-Z0-9_]+)',
re.IGNORECASE)
end_re = re.compile(r'^\s*END\s*INTERFACE', re.IGNORECASE)
def finish(self):
name = self.match.group(1)
self.source_file.interfaces[name] = Interface(
name, self.line_number, self.lines, self.source_file)
class TypeFinder(SectionFinder):
start_re = re.compile(r'^\s*TYPE\s+([A-Z0-9_]+)', re.IGNORECASE)
end_re = re.compile(r'^\s*END\s*TYPE', re.IGNORECASE)
def finish(self):
name = self.match.group(1)
self.source_file.types[name] = Type(
name, self.line_number, self.lines, self.source_file)
class PublicFinder(LineFinder):
line_re = re.compile(
r'^\s*PUBLIC\s*:*\s*([A-Z0-9_,\s]+)',
re.IGNORECASE)
def add(self, match, line_number):
for symbol in match.group(1).split(','):
self.source_file.public.add(symbol.strip())
class ConstantFinder(LineFinder):
line_re = re.compile(
r'^\s*INTEGER\([A-Z0-9\(\),_\s]+::\s*'
r'([A-Z0-9_]+)\s*=\s*([A-Z0-9_\-\.]+)[^!]*(!<.*$)?',
re.IGNORECASE)
def add(self, match, line_number):
name = match.group(1)
assignment = match.group(2)
if match.group(3) is None:
doxy = ''
else:
doxy = match.group(3)[2:].strip()
self.source_file.constants[name] = Constant(
name, line_number, assignment, doxy)
class DoxygenGroupingFinder(LineFinder):
# match at least one whitespace character before the ! to make sure
# we don't get stuff from the file header
line_re = re.compile(
r'^\s+!\s*>\s*(\\(addtogroup|brief|see)|@[\{\}])(.*$)',
re.IGNORECASE)
def add(self, match, line_number):
line = match.group(1)
if match.group(3) is not None:
line += match.group(3)
self.source_file.doxygen_groupings.append(
DoxygenGrouping(line_number, line))
def __init__(self, source_file, params_only=False):
"""Initialise SourceFile object
Arguments:
source_file -- Path to the source file
"""
self.file_path = source_file
self.public = IdentifierSet()
self.doxygen_groupings = []
self.interfaces = IdentifierDict()
self.subroutines = IdentifierDict()
self.constants = IdentifierDict()
self.types = IdentifierDict()
self.parse_file(params_only)
def parse_file(self, params_only=False):
"""Run through file once, getting everything we'll need"""
source_lines = _join_lines(
open(self.file_path, 'r').read()).splitlines()
if not params_only:
# only keep the source_lines if we need them
self.source_lines = source_lines
# Set the things we want to find
line_finders = []
section_finders = []
line_finders.append(self.ConstantFinder(self))
if not params_only:
line_finders.extend((
self.PublicFinder(self),
self.DoxygenGroupingFinder(self)))
section_finders.extend((
self.SubroutineFinder(self),
self.InterfaceFinder(self),
self.TypeFinder(self)))
# Find them
current_section = None
for (line_number, line) in enumerate(source_lines):
if current_section is not None:
current_section.lines.append(line)
if current_section.check_for_end(line):
current_section = None
else:
for line_finder in line_finders:
line_finder.check_match(line, line_number)
for section in section_finders:
if section.check_for_start(line_number, line):
current_section = section
break
def __init__(self, cm_path):
"""Load library information from source files
Arguments:
cm_path -- Path to OpenCMISS iron directory
"""
self.lib_source = self.SourceFile(
os.sep.join((cm_path, 'src', 'opencmiss_iron.f90')))
cm_source_path = cm_path + os.sep + 'src'
source_files = [
cm_source_path + os.sep + file_name
for file_name in os.listdir(cm_source_path)
if file_name.endswith('.f90') and file_name != 'opencmiss_iron.f90']
self.sources = [
self.SourceFile(source, params_only=True)
for source in source_files]
self.resolve_constants()
# Get all public types, constants and routines to include
# Store all objects to be output in a dictionary with the line number
# as the key
public_objects = {}
for t in self.lib_source.types.values():
if t.name in self.lib_source.public:
public_objects[t.line_number] = t
for const in self.lib_source.constants.values():
if const.name in self.lib_source.public:
public_objects[const.line_number] = const
self.public_subroutines = [
routine
for routine in self.lib_source.subroutines.values()
if routine.name in self.lib_source.public]
for interface in self.lib_source.interfaces.values():
if interface.name in self.lib_source.public:
self.public_subroutines += [
self.lib_source.subroutines[routine]
for routine in interface.get_subroutines()]
self.public_subroutines = sorted(
self.public_subroutines, key=attrgetter('name'))
# Remove cmfe...TypesCopy routines, as these are only used within the
# C bindings. Also remove cmfe_GeneratedMeshSurfaceGet for now as it
# takes an allocatable array but will be removed soon anyways.
self.public_subroutines = list(filter(
lambda r:
not (r.name.startswith('cmfe_GeneratedMesh_SurfaceGet') or
r.name.endswith('TypesCopy')),
self.public_subroutines))
self.unbound_routines = []
for routine in self.public_subroutines:
routine.get_parameters()
owner_class = routine.get_class()
if owner_class != None:
try:
type = self.lib_source.types[owner_class]
type.methods.append(routine)
except KeyError:
sys.stderr.write("Warning: Couldn't find matching class "
"for routine %s" % routine.name)
else:
self.unbound_routines.append(routine)
for routine in self.public_subroutines:
public_objects[routine.line_number] = routine
for doxygen_grouping in self.lib_source.doxygen_groupings:
public_objects[doxygen_grouping.line_number] = doxygen_grouping
self.ordered_objects = [public_objects[k]
for k in sorted(public_objects.keys())]
def resolve_constants(self):
"""Go through all public constants and work out their actual values"""
for pub in self.lib_source.constants:
if pub in self.lib_source.public:
self.get_constant_value(pub)
def get_constant_value(self, constant):
"""Get the actual value for a constant from the source files
Arguments:
constant -- Name of the constant to get the value for
"""
assignment = self.lib_source.constants[constant].assignment
exhausted = False
while ((not self.lib_source.constants[constant].resolved) and
(not exhausted)):
for (i, source) in enumerate(self.sources):
if assignment in source.constants:
if source.constants[assignment].resolved:
self.lib_source.constants[constant].value = (
source.constants[assignment].value)
self.lib_source.constants[constant].resolved = True
break
else:
assignment = source.constants[assignment].assignment
break
if i == (len(self.sources) - 1):
exhausted = True
if not self.lib_source.constants[constant].resolved:
sys.stderr.write("Warning: Couldn't resolve constant value: %s\n"
% constant)
def group_constants(self):
"""Returns a list of enums and ungrouped constants"""
enums = []
enum_dict = {}
ungrouped_constants = []
current_enum = None
for o in self.ordered_objects:
if isinstance(o, DoxygenGrouping):
if o.type == 'group':
# Strip CMISS/OpenCMISS prefix from some constant group names
if o.group.startswith('CMISS'):
group_name = o.group[5:]
elif o.group.startswith('OpenCMISS'):
group_name = o.group[9:]
else:
group_name = o.group
if group_name in enum_dict:
current_enum = enum_dict[group_name]
else:
current_enum = Enum(group_name)
enum_dict[group_name] = current_enum
elif o.type == 'brief':
current_enum.comment = o.brief
elif o.type == 'close':
if (current_enum is not None and
len(current_enum.constants) > 0):
enums.append(current_enum)
current_enum = None
elif isinstance(o, Constant):
if current_enum is not None:
current_enum.constants.append(o)
else:
ungrouped_constants.append(o)
if current_enum is not None:
sys.stderr.write("Error: Didn't match a closing group "
"for Doxygen groupings\n")
return (enums, ungrouped_constants)
class CodeObject(object):
"""Base class for any line or section of code"""
def __init__(self, name, line_number):
self.name = name
self.line_number = line_number
def _get_comments(self):
"""Sets the comment_lines property
This is a list of comment lines above this section or line of code
"""
self.comment_lines = []
line_num = self.line_number - 1
while self.source_file.source_lines[line_num].strip().startswith('!>'):
self.comment_lines.append(
self.source_file.source_lines[line_num].strip()[2:].strip())
line_num -= 1
self.comment_lines.reverse()
class Constant(CodeObject):
"""Information on a public constant"""
def __init__(self, name, line_number, assignment, comment):
"""Initialise Constant
Extra arguments:
assignment -- Value or another variable assigned to this variable
comment -- Contents of the doxygen comment describing
the constant
"""
super(Constant, self).__init__(name, line_number)
self.assignment = assignment
self.comment = comment
try:
self.value = int(self.assignment)
self.resolved = True
except ValueError:
try:
self.value = float(self.assignment)
self.resolved = True
except ValueError:
self.value = None
self.resolved = False
class Interface(CodeObject):
"""Information on an interface"""
def __init__(self, name, line_number, lines, source_file):
"""Initialise an interface
Arguments:
name -- Interface name
line_number -- Line number where the interface starts
lines -- Contents of interface as a list of lines
source_file -- Source file containing the interface
"""
super(Interface, self).__init__(name, line_number)
self.lines = lines
self.source = source_file
def get_subroutines(self):
"""Find the subroutines for an interface
Choose the one with the highest number if there are options. This
corresponds to the routine that takes array parameters
Returns a list of subroutines
"""
all_subroutines = []
routine_re = re.compile(
r'^\s*MODULE PROCEDURE ([A-Z0-9_]+)', re.IGNORECASE)
varying_string_re1 = re.compile(
r'VSC*(Obj|Number|)[0-9]*$', re.IGNORECASE)
varying_string_re2 = re.compile(
r'VSC*(Obj|Number|Region|Interface|)*$', re.IGNORECASE)
for line in self.lines:
match = routine_re.search(line)
if match:
routine_name = match.group(1)
if (varying_string_re1.search(routine_name) or
varying_string_re2.search(routine_name)):
# Don't include routines using varying_string parameters
pass
else:
all_subroutines.append(routine_name)
subroutines = self._get_array_routines(all_subroutines)
for routine in subroutines:
try:
self.source.subroutines[routine].interface = self
except KeyError:
raise KeyError("Couldn't find subroutine %s for interface %s" %
(routine, self.name))
return subroutines
def _get_array_routines(self, routine_list):
"""Return a list of the routines that take array parameters if there
is an option between passing an array or a scalar. All other routines
are also returned.
Arguments:
routine_list -- List of subroutine names
"""
routine_groups = {}
routines = []
# Group routines depending on their name, minus any number indicating
# whether they take a scalar or array
for routine in routine_list:
routine_group = re.sub('\d', '0', routine)
if routine_group in routine_groups:
routine_groups[routine_group].append(routine)
else:
routine_groups[routine_group] = [routine]
for group in routine_groups.keys():
max_number = -1
for routine in routine_groups[group]:
try:
number = int(''.join([c for c in routine if str.isdigit(c)]))
if number > max_number:
array_routine = routine
except ValueError:
# only one routine in group
array_routine = routine
routines.append(array_routine)
return routines
class Subroutine(CodeObject):
"""Store information for a subroutine"""
def __init__(self, name, line_number, lines, source_file):
super(Subroutine, self).__init__(name, line_number)
self.lines = lines
self.source_file = source_file
self.parameters = None
self.interface = None
self.self_idx = -1
self._get_comments()
def get_parameters(self):
"""Get details of the subroutine parameters
Sets the Subroutines parameters property as a list of all parameters,
excluding the Err parameter.
"""
def filter_match(string):
if string is None:
return ''
else:
return string.strip()
self.parameters = []
match = re.search(
r'^\s*(RECURSIVE\s+)?SUBROUTINE\s+'
r'([A-Z0-9_]+)\(([A-Z0-9_,\*\s]*)\)',
self.lines[0],
re.IGNORECASE)
if not match:
raise ValueError(
"Could not read subroutine line:\n %s" % self.lines[0])
parameters = [p.strip() for p in match.group(3).split(',')]
try:
parameters.remove('Err')
except ValueError:
try:
parameters.remove('err')
except ValueError:
sys.stderr.write("Warning: Routine doesn't take Err parameter:"
"%s\n" % self.name)
for param in parameters:
param_pattern = r"""
# parameter type at start of line, followed by possible type
# info, eg DP or SP in brackets
^\s*([A-Z_]+\s*(\(([A-Z_=,\*0-9]+)\))?)
# extra specifications such as intent or pointer
\s*([A-Z0-9\s_\(\):,\s]+)?\s*
::
# Allow for other parameters to be included on the same line
[A-Z_,\s\(\):]*
# Before parameter name
[,\s:]
# Parameter name
%s\b
# Array dimensions if present
(\(([0-9,:]+)\))?
# Doxygen comment
[^!]*(!<(.*)$)?
""" % param
param_re = re.compile(param_pattern, re.IGNORECASE | re.VERBOSE)
for line in self.lines:
match = param_re.search(line)
if match:
param_type = match.group(1)
(type_params, extra_stuff, array, comment) = (
filter_match(match.group(i)) for i in (3, 4, 6, 8))
self.parameters.append(
Parameter(param, self, param_type, type_params,
extra_stuff, array, comment))
break
if not match:
raise RuntimeError("Couldn't find parameter %s "
"for subroutine %s" % (param, self.name))
def get_class(self):
"""Work out if this routine is a method of a class
Sets the self_idx attribute
Uses the routine name, which will be in the form Object_Method
if this is a method of a class. The same naming is also used
for user number routines so we have to check the parameter
type is correct.
"""
if len(self.parameters) == 0:
return
if self.name.count('_') > 1:
# Type name eg. = cmfe_Basis
# Sometimes the type name has an extra bit at the end, eg cmfe_FieldMLIO,
# but routines are named cmfe_FieldML_OutputCreate, so we check if
# the parameter type name starts with the routine type name
routine_type_name = '_'.join(self.name.split('_')[0:2])
# Object parameter is either first or last, it is last if this
# is a Create or CreateStart routine, otherwise it is first
if self.parameters[0].var_type == Parameter.CUSTOM_TYPE:
param_type_name = self.parameters[0].type_name
if param_type_name.startswith(routine_type_name):
self.self_idx = 0
return param_type_name
# Some stuff like cmfe_FieldML_OutputCreate has the "self" object
# as the last parameter, check for these here:
if (self.parameters[-1].var_type == Parameter.CUSTOM_TYPE and
self.name.find('Create') > -1):
param_type_name = self.parameters[-1].type_name
if param_type_name.startswith(routine_type_name):
self.self_idx = len(self.parameters) - 1
return param_type_name
class Parameter(object):
"""Information on a subroutine parameter"""
# Parameter types enum:
(INTEGER,
FLOAT,
DOUBLE,
CHARACTER,
LOGICAL,
CUSTOM_TYPE) = range(6)
def __init__(self, name, routine, param_type, type_params, extra_stuff,
array, comment):
"""Initialise a parameter
Arguments:
name -- Parameter name
routine -- Pointer back to the subroutine this parameter belongs to
param_type -- String from the parameter declaration
type_params -- Any parameters for parameter type, eg "DP" for a real
extra_stuff -- Any extra parameter properties listed after the type,
including intent
array -- The array dimensions included after the parameter name if they
exist, otherwise an empty string
comment -- The doxygen comment after the parameteter
"""
self.name = name
self.routine = routine
self.pointer = False
self.comment = comment
self.type_name = None
intent = None
if extra_stuff != '':
match = re.search(
r'INTENT\(([A-Z]+)\)?', extra_stuff, re.IGNORECASE)
if match is not None:
intent = match.group(1)
if extra_stuff.find('DIMENSION') > -1:
sys.stderr.write("Warning: Ignoring DIMENSION specification "
"on parameter %s of routine %s\n" %
(self.name, routine.name))
sys.stderr.write(" Using DIMENSION goes against "
"the OpenCMISS style guidelines.\n")
if extra_stuff.find('POINTER') > -1:
self.pointer = True
# Get parameter intent
if intent is None:
self.intent = 'INOUT'
sys.stderr.write("Warning: No intent for parameter %s of "
"routine %s\n" % (self.name, routine.name))
else:
self.intent = intent
# Get array dimensions and work out how many dimension sizes
# are variable
if array != '':
self.array_spec = [a.strip() for a in array.split(',')]
self.array_dims = len(self.array_spec)
self.required_sizes = self.array_spec.count(':')
else:
self.array_spec = []
self.array_dims = 0
self.required_sizes = 0
# Work out the type of parameter
param_type = param_type.upper()
if param_type.startswith('INTEGER'):
self.var_type = Parameter.INTEGER
elif param_type.startswith('REAL'):
precision = type_params
if precision == 'DP':
self.var_type = Parameter.DOUBLE
else:
self.var_type = Parameter.FLOAT
elif param_type.startswith('CHARACTER'):
self.var_type = Parameter.CHARACTER
# Add extra dimension, 1D array of strings in Fortran is a 2D
# array of chars in C
self.array_spec.append(':')
self.array_dims += 1
self.required_sizes += 1
elif param_type.startswith('LOGICAL'):
self.var_type = Parameter.LOGICAL
elif param_type.startswith('TYPE'):
self.var_type = Parameter.CUSTOM_TYPE
self.type_name = type_params
else:
sys.stderr.write("Error: Unknown type %s for routine %s\n" %
(param_type, routine.name))
self.var_type = None
self.type_name = param_type
class Type(CodeObject):
"""Information on a Fortran type"""
def __init__(self, name, line_number, lines, source_file):
"""Initialise type
Arguments:
name -- Type name
line_number -- Line number in source where this is defined
lines -- Contents of lines where this type is defined
"""
super(Type, self).__init__(name, line_number)
self.lines = lines
self.source_file = source_file
self.methods = []
self._get_comments()
class DoxygenGrouping(object):
"""Store a line used for grouping in Doxygen"""
def __init__(self, line_number, line):
self.line_number = line_number
self.line = line.strip()
if line.find(r'\see') > -1:
self.type = 'see'
elif line.find(r'\addtogroup') > -1:
self.type = 'group'
self.group = line[
line.find('OpenCMISS_') + len('OpenCMISS_'):].split()[0]
elif line.find(r'\brief') > -1:
self.type = 'brief'
self.brief = line[line.find(r'\brief') + len(r'\brief'):].strip()
elif line.find(r'@{') > -1:
self.type = 'open'
elif line.find(r'@}') > -1:
self.type = 'close'
else:
self.type = None
class Enum(object):
"""A group of constants"""
def __init__(self, name):
self.name = name
self.constants = []
self.comment = ''
class UnsupportedParameterError(Exception):
pass
def _join_lines(source):
"""Remove Fortran line continuations"""
return re.sub(r'[\t ]*&[\t ]*[\r\n]+[\t ]*&[\t ]*', ' ', source)
class IdentifierDict(dict):
"""Dictionary used to store Fortran identifiers, to allow
getting items with case insensitivity"""
def __getitem__(self, key):
try:
val = dict.__getitem__(self, key)
except KeyError:
for ikey in self:
if ikey.lower() == key.lower():
val = dict.__getitem__(self, ikey)
break
else:
raise
return val
class IdentifierSet(set):
"""Set used to store Fortran identifiers, to allow
checking for items with case insensitivity"""
def add(self, val):
set.add(self, val.lower())
def __contains__(self, val):
return set.__contains__(self, val.lower())