-
Notifications
You must be signed in to change notification settings - Fork 215
/
compiler.py
3542 lines (3008 loc) · 160 KB
/
compiler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# ====== Legal notices
#
#
# Copyright 2014 - 2018 Jacques de Hooge, GEATEC engineering, www.geatec.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path
import sys
import ast
import re
import copy
import datetime
import math
import traceback
import io
import subprocess
import shlex
import shutil
import tokenize
import collections
import json
from contextlib import contextmanager, ExitStack
from org.transcrypt import utils, sourcemaps, minify, static_check, type_check
inIf = False
ecom = True
noecom = False
dataClassDefaultArgTuple = (['init', True], ['repr', True], ['eq', True], ['order', False], ['unsafe_hash', False], ['frozen', False])
'''
All files required for deployment are placed in subdirectory __target__ of the application.
Each module has an unambiguous dotted path, always from one of the module roots, never relative.
Dotted paths are translated to dotted filenames.
The __runtime__ module is just another Python module with lots of JS code inside __pragma__ ('js', '{}', include...) fragments,
namely the __core__ and __builtin__ parts.
Sourcemaps are generated per module.
There's no need for a link with other modules.
Since import paths are static, names of minified JS files simply end on .js just like non-minified files, so not on .min.js.
Sourcemaps are named <module name>.map.
'''
class Program:
def __init__ (
self,
moduleSearchDirs, # All possible roots of the module path, the latter uniquely denoted by the dotted module name
symbols, # Set of symbols either passed on the command line, deduced from command line switches etc.
envir # Data about run / compilation environment
):
utils.setProgram (self)
self.moduleSearchDirs = moduleSearchDirs
self.symbols = symbols
self.envir = envir
self.javascriptVersion = int (utils.commandArgs.esv) if utils.commandArgs.esv else 6
self.moduleDict = {} # Administration of all modules that play a role in this program
self.importStack = [] # Pending imports, enables showing load sequence in case a module cannot be loaded
# Set paths
self.sourcePrepath = os.path.abspath (utils.commandArgs.source) .replace ('\\', '/')
self.sourceDir = '/'.join (self.sourcePrepath.split ('/') [ : -1])
self.mainModuleName = self.sourcePrepath.split ('/') [-1]
if utils.commandArgs.outdir:
if os.path.isabs (utils.commandArgs.outdir):
self.targetDir = utils.commandArgs.outdir.replace ('\\', '/')
else:
self.targetDir = f'{self.sourceDir}/{utils.commandArgs.outdir}'.replace ('\\', '/')
else:
self.targetDir = f'{self.sourceDir}/__target__'.replace ('\\', '/')
self.projectPath = f'{self.targetDir}/{self.mainModuleName}.project'
# Load the most recent project metadata
try:
with open (self.projectPath, 'r') as projectFile:
project = json.load (projectFile)
except:
project = {}
# Reset everything in case of a build or a command args change
self.optionsChanged = utils.commandArgs.projectOptions != project.get ('options')
if utils.commandArgs.build or self.optionsChanged:
shutil.rmtree (self.targetDir, ignore_errors = True)
try:
# Provide runtime module since it's always needed but never imported explicitly
self.runtimeModuleName = 'org.transcrypt.__runtime__'
self.searchedModulePaths = [] # Report only failure of searching runtime, so clear any history
self.provide (self.runtimeModuleName)
# Provide main module and, with that, all other modules recursively
self.searchedModulePaths = [] # Report only failure of searching for main, so clear any history
self.provide (self.mainModuleName, '__main__')
except Exception as exception:
utils.enhanceException ( # If it was an Error, don't change it, otherwise make it one (??? Just to be sure?)
exception,
message = f'\n\t{exception}'
)
# Finally, save the run info
project = {
'options': utils.commandArgs.projectOptions,
'modules': [{'source': module.sourcePath, 'target': module.targetPath} for module in self.moduleDict.values ()],
}
with utils.create (self.projectPath) as projectFile:
json.dump (project, projectFile)
def provide (self, moduleName, __moduleName__ = None, filter = None):
# moduleName may contain dots if it's imported, but it'll have the same name in every import
if moduleName in self.moduleDict: # Find out if module is already provided
return self.moduleDict [moduleName]
else: # If not, provide by loading or compiling
# This may fail legally if filteredModuleName ends on a name of something in a module, rather than of the module itself
return Module (self, moduleName, __moduleName__, filter)
class Module:
def __init__ (self, program, name, __name__, filter):
self.program = program
self.name = name
self.__name__ = __name__ if __name__ else self.name
# Try to find module, exception if fails
self.findPaths (filter)
# Remember names of module being under compilation and line nrs of current import
# Used for error reports
# Note that JavaScript-only modules will leave lineNr None if they import something
# This is since there's no explicit import location in such modules
# Only add a module to the importStack if it's at least found by findPaths, otherwise it has no sourcePath to report
self.program.importStack.append ([self, None])
# Register that module is found
self.program.moduleDict [self.name] = self
# Create sourcemapper, if only for cleaning dir after previous run
self.sourceMapper = sourcemaps.SourceMapper (
self.name,
self.program.targetDir,
not utils.commandArgs.nomin,
utils.commandArgs.dmap
)
# Generate, copy or load target code and symbols
if (
utils.commandArgs.build or self.program.optionsChanged
or
not os.path.isfile (self.targetPath) or os.path.getmtime (self.sourcePath) > os.path.getmtime (self.targetPath)
):
# If it's a build rather than a make, or the target doesn't exist or the target is obsolete
if self.isJavascriptOnly:
# Digest source JavaScript and copy to target location
self.loadJavascript ()
# JavaScript-only, so annotations are pointless, so it's ok to strip
javascriptDigest = utils.digestJavascript (self.targetCode, self.program.symbols, not utils.commandArgs.dnostrip, False)
else:
# Perform static typecheck on source code
if utils.commandArgs.dstat:
try:
type_check.run (self.sourcePath)
except Exception as exception:
utils.log (True, 'Validating: {} and dependencies\n\tInternal error in static typing validator\n', self.sourcePath)
# Construct parse tree
self.parse ()
if utils.commandArgs.dtree:
self.dumpTree ()
# Perform lightweight static check on parse tree
if utils.commandArgs.dcheck:
try:
static_check.run (self.sourcePath, self.parseTree)
except Exception as exception:
utils.log (True, 'Checking: {}\n\tInternal error in lightweight consistency checker, remainder of module skipped\n', self.sourcePath)
# Generate JavaScript code and sourcemap from parse tree
self.generateJavascriptAndPrettyMap ()
# Generated code, may have annotations so don't strip comments, and there are no non-annotation comments to strip anyhow, neither are there any strip pragma's
javascriptDigest = utils.digestJavascript (self.targetCode, self.program.symbols, False, self.generator.allowDebugMap)
# Write target code
utils.log (True, 'Saving target code in: {}\n', self.targetPath)
filePath = self.targetPath if utils.commandArgs.nomin else self.prettyTargetPath
with utils.create (filePath) as aFile:
aFile.write (self.targetCode)
# Minify target code
if not utils.commandArgs.nomin:
utils.log (True, 'Saving minified target code in: {}\n', self.targetPath)
minify.run (
self.program.targetDir,
self.prettyTargetName,
self.targetName,
mapFileName = self.shrinkMapName if utils.commandArgs.map else None,
)
if utils.commandArgs.map:
if self.isJavascriptOnly:
if os.path.isfile (self.mapPath):
os.remove (self.mapPath)
os.rename (self.shrinkMapPath, self.mapPath)
else:
self.sourceMapper.generateMultilevelMap ()
# Append map reference to target file, which may be minified or not
with open (self.targetPath, 'a') as targetFile:
targetFile.write (self.mapRef)
else:
# If it's a make rather than a build and the target exists, load it, beautify it if needed and run through digestJavascript for obtaining symbols
self.targetCode = open (self.targetPath, 'r') .read ()
javascriptDigest = utils.digestJavascript (self.targetCode, self.program.symbols, True, False, refuseIfAppearsMinified = True)
if not javascriptDigest:
minify.run (
self.program.targetDir,
self.targetName,
self.prettyTargetName,
prettify = True,
)
self.prettyTargetCode = open (self.prettyTargetPath, 'r') .read ()
javascriptDigest = utils.digestJavascript (self.prettyTargetCode, self.program.symbols, True, False) # Prettified, so doesn't start with '/'!
self.targetCode = javascriptDigest.digestedCode
self.importedModuleNames = javascriptDigest.importedModuleNames
self.exportedNames = javascriptDigest.exportedNames
for importedModuleName in self.importedModuleNames:
# Unfiltered hyphens allowed, since we may be in a JavaScript-only part of the module hierarchy
# Also these imports cannot legally fail, since the digested JavaScript code already has unambiguous imports
# If the JavaScript module was just generated from a Python module, it will already be in the module dictionary
self.program.searchedModulePaths = []
self.program.provide (importedModuleName)
# Remove eventual intermediate files
utils.tryRemove (self.prettyTargetPath)
utils.tryRemove (self.shrinkMapPath)
utils.tryRemove (self.prettyMapPath)
# Module not under compilation anymore, so pop it
self.program.importStack.pop ()
def findPaths (self, filter):
# Filter to get hyphens and/or dots in name if a suitable alias is defined
# The filter function, and with it the active aliases, are passed by the importing module
rawRelSourceSlug = self.name.replace ('.', '/')
relSourceSlug = filter (rawRelSourceSlug) if filter and utils.commandArgs.alimod else rawRelSourceSlug
'''
# BEGIN DEBUGGING CODE
print ()
print ('Raw slug :', rawRelSourceSlug)
print ('Cooked slug:', relSourceSlug)
print ()
# END DEBUGGING CODE
'''
for searchDir in self.program.moduleSearchDirs:
# Find source slugs
sourceSlug = f'{searchDir}/{relSourceSlug}'
if os.path.isdir (sourceSlug):
self.sourceDir = sourceSlug
self.sourcePrename = '__init__'
else:
self.sourceDir, self.sourcePrename = sourceSlug.rsplit ('/', 1)
self.sourcePrepath = f'{self.sourceDir}/{self.sourcePrename}'
self.pythonSourcePath = f'{self.sourcePrepath}.py'
self.javascriptSourcePath = f'{self.sourcePrepath}.js'
# Find target slugs
self.targetPrepath = f'{self.program.targetDir}/{self.name}'
self.targetName = f'{self.name}.js'
self.targetPath = f'{self.targetPrepath}.js'
self.prettyTargetName = f'{self.name}.pretty.js'
self.prettyTargetPath = f'{self.targetPrepath}.pretty.js'
self.importRelPath = f'./{self.name}.js'
self.treePath = f'{self.targetPrepath}.tree'
self.mapPath = f'{self.targetPrepath}.map'
self.prettyMapPath = f'{self.targetPrepath}.shrink.map'
self.shrinkMapName = f'{self.name}.shrink.map'
self.shrinkMapPath = f'{self.targetPrepath}.shrink.map'
self.mapSourcePath = f'{self.targetPrepath}.py'
self.mapRef = f'\n//# sourceMappingURL={self.name}.map'
# If module exists
if os.path.isfile (self.pythonSourcePath) or os.path.isfile (self.javascriptSourcePath):
# Check if it's a JavaScript-only module
self.isJavascriptOnly = os.path.isfile (self.javascriptSourcePath) and not os.path.isfile (self.pythonSourcePath)
# Set more paths (tree, sourcemap, ...)
# (To do)
self.sourcePath = self.javascriptSourcePath if self.isJavascriptOnly else self.pythonSourcePath
break
# Remember all fruitless paths to give a decent error report if module isn't found
# Note that these aren't all searched paths for a particular module,
# since the difference between an module and a facility inside a module isn't always known a priori
self.program.searchedModulePaths.extend ([self.pythonSourcePath, self.javascriptSourcePath])
else:
# If even the target can't be loaded then there's a problem with this module, root or not
# However, loading a module is allowed to fail (see self.revisit_ImportFrom)
# In that case this error is swallowed, but searchedModulePath is retained,
# because searching in the swallowing except branch may also fail and should mention ALL searched paths
raise utils.Error (
message = '\n\tImport error, can\'t find any of:\n\t\t{}\n'.format ('\n\t\t'. join (self.program.searchedModulePaths))
)
def generateJavascriptAndPrettyMap (self):
utils.log (False, 'Generating code for module: {}\n', self.targetPath)
# Generate target fragments
self.generator = Generator (self)
# Fabricate target lines from target fragments
if utils.commandArgs.map or utils.commandArgs.anno:
# In both cases the generator will have instrumented the target fragments by appending line numbers
# N.B. __pragma__ ('noanno') will be too late to prevent instrumenting of the main module's first line
# In that case if no source maps are required either, the appended line numbers simply won't be used
# Split joined fragments into (instrumented) lines
instrumentedTargetLines = ''.join (self.generator.targetFragments) .split ('\n')
# Only remember source line nrs if a map is to be generated (so not if only annotated JavaScript is needed)
if utils.commandArgs.map:
self.sourceLineNrs = []
# Split instrumentedTargetLines in (bare) targetLines and sourceLineNrs, skipping empty statements
targetLines = []
for targetLine in instrumentedTargetLines:
# The actual splitting
sourceLineNrString = targetLine [-sourcemaps.lineNrLength : ] # Take the appended line number, e.g. the string '000014'
sourceLineNr = int ('1' + sourceLineNrString) - sourcemaps.maxNrOfSourceLinesPerModule # Turn it into an integer, e.g. 14
targetLine = targetLine [ : -sourcemaps.lineNrLength] # Obtain non-instrumented line by removing the appended line number
# Only append non-emptpy statements and their number info
if targetLine.strip () != ';': # If the non-instrumented line isn't empty
if self.generator.allowDebugMap: # If annotations comments have to be prepended
targetLine = '/* {} */ {}'.format (sourceLineNrString, targetLine) # Prepend them
targetLines.append (targetLine) # Add the target line, with or without prepended annotation comment
# Store line nrs for source map
if utils.commandArgs.map:
self.sourceLineNrs.append (sourceLineNr) # Remember its line number to be able to generate a sourcemap
# Generate per module sourcemap and copy sourcefile to target location
if utils.commandArgs.map:
utils.log (False, 'Saving source map in: {}\n', self.mapPath)
self.sourceMapper.generateAndSavePrettyMap (self.sourceLineNrs)
shutil.copyfile (self.sourcePath, self.mapSourcePath)
else:
# No maps or annotations needed, so this 'no stripping' shortcut for speed
targetLines = [line for line in ''.join (self.generator.targetFragments) .split ('\n') if line.strip () != ';']
self.targetCode = '\n'.join (targetLines)
def loadJavascript (self):
with tokenize.open (self.sourcePath) as sourceFile:
self.targetCode = sourceFile.read ()
def parse (self):
def pragmasFromComments (sourceCode):
# This function turns comment-like pragma's into regular ones, both for multi-line and single-line pragma's
# It changes rather than regenerates the sourcecode, since tokenize/untokenize will mess up formatting
# Single line pragma's are always comment-like and will be turned into multi-line function-like pragma's
# Also in this function executable comments are converted to normal code
# Tokenize the source code, to be able to recognize comments easily
tokens = tokenize.tokenize (io.BytesIO (sourceCode.encode ('utf-8')) .readline)
# Store all line indices of comment-like pragma's, multi-line and single-line in separate lists
pragmaCommentLineIndices = []
shortPragmaCommentLineIndices = []
ecomPragmaLineIndices = []
noecomPragmaLineIndices = []
pragmaIndex = -1000
for tokenIndex, (tokenType, tokenString, startRowColumn, endRowColumn, logicalLine) in enumerate (tokens):
if tokenType == tokenize.COMMENT:
strippedComment = tokenString [1 : ] .lstrip ()
if strippedComment.startswith ('__pragma__'):
# Remember line index of multi-line pragma, like: # __pragma__ (...
pragmaCommentLineIndices.append (startRowColumn [0] - 1)
elif strippedComment.replace (' ', '') .replace ('\t', '') .startswith ('__:'):
# Remember line index of single-line pragma, like: <some code> # __: ...
shortPragmaCommentLineIndices.append (startRowColumn [0] - 1)
if tokenType == tokenize.NAME and tokenString == '__pragma__':
pragmaIndex = tokenIndex
if tokenIndex - pragmaIndex == 2:
pragmaKind = tokenString [1:-1]
if pragmaKind == 'ecom':
ecomPragmaLineIndices.append (startRowColumn [0] - 1)
elif pragmaKind == 'noecom':
noecomPragmaLineIndices.append (startRowColumn [0] - 1)
# Convert original, non-tokenized sourcecode to a list of lines
sourceLines = sourceCode.split ('\n')
# Use line indices of multi-line function-like ecom / noecom pragma's to transform these lines into executable comment switches
for ecomPragmaLineIndex in ecomPragmaLineIndices:
sourceLines [ecomPragmaLineIndex] = ecom
for noecomPragmaLineIndex in noecomPragmaLineIndices:
sourceLines [noecomPragmaLineIndex] = noecom
# Use line indices of multi-line comment-like pragma singles to transform these into function-like pragma singles (which often turn out te be part of a matching pair)
allowExecutableComments = utils.commandArgs.ecom
for pragmaCommentLineIndex in pragmaCommentLineIndices:
indentation, separator, tail = sourceLines [pragmaCommentLineIndex] .partition ('#')
pragma, separator, comment = tail.partition ('#')
pragma = pragma.replace (' ', '') .replace ('\t', '')
# Turn appropriate lines into executable comment switches
if "('ecom')" in pragma or '("ecom")' in pragma:
allowExecutableComments = True
sourceLines [pragmaCommentLineIndex] = ecom
elif "('noecom')" in pragma or '("noecom")' in pragma:
allowExecutableComments = False
sourceLines [pragmaCommentLineIndex] = noecom
else:
sourceLines [pragmaCommentLineIndex] = indentation + tail.lstrip ()
# Use line indices of single-line comment-like pragma's to transform these into function-like pragma pairs
for shortPragmaCommentLineIndex in shortPragmaCommentLineIndices:
head, tail = sourceLines [shortPragmaCommentLineIndex] .rsplit ('#', 1)
strippedHead = head.lstrip ()
indent = head [ : len (head) - len (strippedHead)]
pragmaName = tail.replace (' ', '') .replace ('\t', '') [3:]
# Turn appropriate lines into executable comment switches
if pragmaName == 'ecom':
sourceLines [pragmaCommentLineIndex] = ecom
elif pragmaName == 'noecom':
sourceLines [pragmaCommentLineIndex] = noecom
elif pragmaName.startswith ('no'):
sourceLines [shortPragmaCommentLineIndex] = '{}__pragma__ (\'{}\'); {}; __pragma__ (\'{}\')' .format (indent, pragmaName, head, pragmaName [2:]) # Correct!
else:
sourceLines [shortPragmaCommentLineIndex] = '{}__pragma__ (\'{}\'); {}; __pragma__ (\'no{}\')' .format (indent, pragmaName, head, pragmaName)
# Switch executable comments on c.q. off and turn executable comments into normal code lines for Transcrypt (as opposed to CPython)
uncommentedSourceLines = []
for sourceLine in sourceLines:
if sourceLine == ecom:
allowExecutableComments = True
elif sourceLine == noecom:
allowExecutableComments = False
elif allowExecutableComments:
lStrippedSourceLine = sourceLine.lstrip ()
if not lStrippedSourceLine [:4] in {"'''?", "?'''", '"""?', '?"""'}:
uncommentedSourceLines.append (sourceLine.replace ('#?', '', 1) if lStrippedSourceLine.startswith ('#?') else sourceLine)
else:
uncommentedSourceLines.append (sourceLine)
# Return joined lines, to be used for parsing
return '\n'.join (uncommentedSourceLines)
try:
utils.log (False, 'Parsing module: {}\n', self.sourcePath)
with tokenize.open (self.sourcePath) as sourceFile:
self.sourceCode = utils.extraLines + sourceFile.read ()
self.parseTree = ast.parse (pragmasFromComments (self.sourceCode))
for node in ast.walk (self.parseTree):
for childNode in ast.iter_child_nodes (node):
childNode.parentNode = node
except SyntaxError as syntaxError:
utils.enhanceException (
syntaxError,
lineNr = syntaxError.lineno,
message = (
'\n\t{} [<-SYNTAX FAULT] {}'.format (
syntaxError.text [:syntaxError.offset].lstrip (),
syntaxError.text [syntaxError.offset:].rstrip ()
)
if syntaxError.text else
syntaxError.args [0]
)
)
def dumpTree (self):
utils.log (False, 'Dumping syntax tree for module: {}\n', self.sourcePath)
def walk (name, value, tabLevel):
self.treeFragments .append ('\n{0}{1}: {2} '.format (tabLevel * '\t', name, type (value).__name__ ))
if isinstance (value, ast.AST):
for field in ast.iter_fields (value):
walk (field [0], field [1], tabLevel + 1)
elif isinstance (value, list):
for element in value:
walk ('element', element, tabLevel + 1)
else:
self.treeFragments.append ('= {0}'.format (value))
self.treeFragments = []
walk ('file', self.parseTree, 0)
self.textTree = ''.join (self.treeFragments) [1:]
with utils.create (self.treePath) as treeFile:
treeFile.write (self.textTree)
class Generator (ast.NodeVisitor):
# Terms like parent, child, ancestor and descendant refer to the parse tree here, not to inheritance
def __init__ (self, module):
self.module = module
self.targetFragments = []
self.fragmentIndex = 0
self.indentLevel = 0
self.scopes = []
self.importHeads = set ()
self.importHoistMemos = []
self.allOwnNames = set ()
self.allImportedNames = set ()
self.expectingNonOverloadedLhsIndex = False
self.lineNr = 1
self.propertyAccessorList = []
self.mergeList = []
self.aliases = [
# START predef_aliases
# Format: ('<Python source identifier>', '<JavaScript target identifier>')
('js_and', 'and'),
('arguments', 'py_arguments'), ('js_arguments', 'arguments'),
('case', 'py_case'),
('clear', 'py_clear'), ('js_clear', 'clear'),
('js_conjugate', 'conjugate'),
('default', 'py_default'),
('del', 'py_del'), ('js_del', 'del'),
('false', 'py_false'),
('js_from', 'from'),
('get', 'py_get'), ('js_get', 'get'),
('js_global', 'global'),
('Infinity', 'py_Infinity'), ('js_Infinity', 'Infinity'),
('is', 'py_is'), ('js_is', 'is'),
('isNaN', 'py_isNaN'), ('js_isNaN', 'isNaN'),
('iter', 'py_iter'), ('js_iter', 'iter'),
('items', 'py_items'), ('js_items', 'items'),
('keys', 'py_keys'), ('js_keys', 'keys'),
('name', 'py_name'), ('js_name', 'name'),
('NaN', 'py_NaN'), ('js_NaN', 'NaN'),
('new', 'py_new'),
('next', 'py_next'), ('js_next', 'next'),
('js_not', 'not'),
('js_or', 'or'),
('pop', 'py_pop'), ('js_pop', 'pop'),
('popitem', 'py_popitem'), ('js_popitem', 'popitem'),
('replace', 'py_replace'), ('js_replace', 'replace'),
('selector', 'py_selector'), ('js_selector', 'selector'),
('sort', 'py_sort'), ('js_sort', 'sort'),
('split', 'py_split'), ('js_split', 'split'),
('switch', 'py_switch'),
('type', 'py_metatype'), ('js_type', 'type'), # Only for the type metaclass, the type operator is dealt with separately in visit_Call
('TypeError', 'py_TypeError'), ('js_TypeError', 'TypeError'),
('update', 'py_update'), ('js_update', 'update'),
('values', 'py_values'), ('js_values', 'values'),
('reversed', 'py_reversed'), ('js_reversed', 'reversed'),
('setdefault', 'py_setdefault'), ('js_setdefault', 'setdefault'),
('js_super', 'super'),
('true', 'py_true'),
('undefined', 'py_undefined'), ('js_undefined', 'undefined'),
# END predef_aliases
]
self.idFiltering = True
self.tempIndices = {}
self.skippedTemps = set ()
self.stubsName = 'org.{}.stubs.'.format (self.module.program.envir.transpiler_name)
self.nameConsts = {
None: 'null',
True: 'true',
False: 'false'
}
'''
The precendences explicitly given as integers in the list below are JavaScript precedences as specified by:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence .
Python precedences are implicitly present in branch ordering of the AST generated by CPython's parser.
'''
self.operators = {
# Highest precendence
ast.Not: ('!', 16),
ast.Invert: ('~', 16),
ast.UAdd: ('+', 16),
ast.USub: ('-', 16),
ast.Pow: (None, 15), # Dealt with separately
ast.Mult: ('*', 14),
ast.MatMult: (None, 14), # Dealt with separately
ast.Div: ('/', 14),
ast.FloorDiv: (None, 14), # Dealt with separately
ast.Mod: ('%', 14), # Used only for JavaScript %, which differs from Python %
ast.Add: ('+', 13),
ast.Sub: ('-', 13),
ast.LShift: ('<<', 12),
ast.RShift: ('>>', 12),
ast.Lt: ('<', 11),
ast.LtE: ('<=', 11),
ast.Gt: ('>', 11),
ast.GtE: ('>=', 11),
ast.In: (None, 11), # Dealt with separately
ast.NotIn: (None, 11), # Dealt with separately
ast.Eq: ('==', 10),
ast.NotEq: ('!=', 10),
ast.Is: ('===', 10), # Not really, but closest for now
ast.IsNot: ('!==', 10), # Not really, but closest for now
ast.BitAnd: ('&', 9),
ast.BitOr: ('|', 8),
ast.BitXor: ('^', 7),
ast.And: ('&&', 6),
ast.Or: ('||', 5)
# Lowest precedence
}
self.allowKeywordArgs = utils.commandArgs.kwargs
self.allowOperatorOverloading = utils.commandArgs.opov
self.allowConversionToIterable = utils.commandArgs.iconv
self.allowConversionToTruthValue = utils.commandArgs.tconv
self.allowKeyCheck = utils.commandArgs.keycheck
self.allowDebugMap = utils.commandArgs.anno and not self.module.sourcePath.endswith ('.js')
self.allowDocAttribs = utils.commandArgs.docat
self.allowGlobals = utils.commandArgs.xglobs
self.allowJavaScriptIter = False
self.allowJavaScriptCall = utils.commandArgs.jscall
self.allowJavaScriptKeys = utils.commandArgs.jskeys
self.allowJavaScriptMod = utils.commandArgs.jsmod
self.allowMemoizeCalls = utils.commandArgs.fcall
self.noskipCodeGeneration = True
self.conditionalCodeGeneration = True
self.stripTuple = False # For speed, tuples are translated to bare JavaScript arrays if they're just indices. Will autoreset.
self.stripTuples = False # For correctness, tuples are translated to bare JavaScript arrays if they are assignment target in a JavaScript 6 for-loop. Will not autoreset.
self.replaceSend = False
try:
self.visit (module.parseTree)
self.targetFragments.append (self.lineNrString) # Last target fragment doesn't have a '\n' to replace in the emit method
except Exception as exception:
# print (traceback.format_exc ()) # Keep at hand for debugging purposes
utils.enhanceException (
exception,
lineNr = self.lineNr
)
if self.tempIndices:
raise utils.Error (
message = '\n\tTemporary variables leak in code generator: {}'.format (self.tempIndices)
)
def visitSubExpr (self, node, child):
def getPriority (exprNode):
if type (exprNode) in (ast.BinOp, ast.BoolOp):
return self.operators [type (exprNode.op)][1]
elif type (exprNode) == ast.Compare:
return self.operators [type (exprNode.ops [0])][1] # All ops have same priority
elif type (exprNode) == ast.Yield:
return -1000000
else:
return 1000000 # No need for parenthesis
if getPriority (child) <= getPriority (node):
self.emit ('(')
self.visit (child)
self.emit (')')
else:
self.visit (child)
def filterId (self, qualifiedId): # Convention: only called at emission time or file name fabrication time
if not self.idFiltering or (qualifiedId.startswith ('__') and qualifiedId.endswith ('__')):
# Leave system dunder names unchanged
return qualifiedId
else:
# Filter the rest, trying all aliases sucessively
for alias in self.aliases:
# Replace non-adjacent and odd adjacent matches, turning __<alias [0]>__ into =<alias [1]>=
qualifiedId = re.sub (
fr'(^|(?P<preDunder>__)|(?<=[./])){alias [0]}((?P<postDunder>__)|(?=[./])|$)',
lambda matchObject: (
('=' if matchObject.group ('preDunder') else '') +
alias [1] +
('=' if matchObject.group ('postDunder') else '')
),
qualifiedId
)
# Replace all remaining matches
qualifiedId = re.sub (
fr'(^|(?<=[./=])){alias [0]}((?=[./=])|$)',
alias [1],
qualifiedId
)
# Take out all occurences of temporary =, leave non-matching __ unchanged)
return qualifiedId.replace ('=', '')
def tabs (self, indentLevel = None):
if indentLevel == None:
indentLevel = self.indentLevel
return indentLevel * '\t'
def emit (self, fragment, *formatter):
if ( # If at the start of a new line
not self.targetFragments or # It may be the first line
(self.targetFragments and self.targetFragments [self.fragmentIndex - 1] .endswith ('\n')) # It may a new line but not the first line
):
self.targetFragments.insert (self.fragmentIndex, self.tabs ())
self.fragmentIndex += 1
fragment = fragment [:-1] .replace ('\n', '\n' + self.tabs ()) + fragment [-1] # There may be \n's embedded in the fragment
self.targetFragments.insert (self.fragmentIndex, fragment.format (*formatter) .replace ('\n', self.lineNrString + '\n'))
self.fragmentIndex += 1
def indent (self):
self.indentLevel += 1
def dedent (self):
self.indentLevel -= 1
def inscope (self, node):
# Called at visiting modules, classes and functions
self.scopes.append (utils.Any (
node = node,
nonlocals = set (),
containsYield = False
))
def descope (self):
self.scopes.pop ()
def getScope (self, *nodeTypes):
if nodeTypes:
for scope in reversed (self.scopes):
if type (scope.node) in nodeTypes:
return scope
else:
return self.scopes [-1]
def getAdjacentClassScopes (self, inMethod = False):
# Work backward until finding an interruption in the chain
# Needed to fix destructuring assignment in nested classes and to make super () work
# The latter needs inMethod, since supported use of super () is directly or indirectly enclosed in a method body
reversedClassScopes = []
for scope in reversed (self.scopes):
if inMethod:
if type (scope.node) in (ast.FunctionDef, ast.AsyncFunctionDef):
continue
else:
inMethod = False
if type (scope.node) != ast.ClassDef:
break
reversedClassScopes.append (scope)
return reversed (reversedClassScopes)
def emitComma (self, index, blank = True):
if self.noskipCodeGeneration and self.conditionalCodeGeneration and index:
self.emit (', ' if blank else ',')
def emitBeginTruthy (self):
if self.allowConversionToTruthValue:
self.emit ('__t__ (')
def emitEndTruthy (self):
if self.allowConversionToTruthValue:
self.emit (')')
def adaptLineNrString (self, node = None, offset = 0):
if utils.commandArgs.map or utils.commandArgs.anno: # Under these conditions, appended line numbers will be stripped later, so they have to be there
if node:
if hasattr (node, 'lineno'):
lineNr = node.lineno + offset # Use new line number
else:
lineNr = self.lineNr + offset # Use 'cached' line nubmer
else:
lineNr = 1 + offset
self.lineNrString = str (sourcemaps.maxNrOfSourceLinesPerModule + lineNr) [1 : ]
else: # __pragma__ ('noanno') isn't enough to perform this else-clause and to later on take the 'no stripping' shortcut
# This is in the main module the first line will already have been instrumented
# So in that case each line is instrumented and instrumentation will be stripped later on
self.lineNrString = ''
def isCommentString (self, statement):
return isinstance (statement, ast.Expr) and isinstance (statement.value, ast.Constant) and type (statement.value.value) == str
def emitBody (self, body):
for statement in body:
if self.isCommentString (statement):
pass
else:
self.visit (statement)
self.emit (';\n')
def emitSubscriptAssign (self, target, value, emitPathIndices = lambda: None):
if type (target.slice) == ast.Slice:
if self.allowOperatorOverloading:
self.emit ('__setslice__ (') # Free function tries .__setitem__ (overload) and .__setslice__ (native)
self.visit (target.value)
self.emit (', ')
else:
self.visit (target.value)
self.emit ('.__setslice__ (')
if target.slice.lower == None:
self.emit ('0')
else:
self.visit (target.slice.lower)
self.emit (', ')
if target.slice.upper == None:
self.emit ('null')
else:
self.visit (target.slice.upper)
self.emit (', ')
if target.slice.step:
self.visit (target.slice.step)
else:
self.emit ('null') # Must be null rather than 1, see Array.prototype.__setslice__
self.emit (', ')
self.visit (value)
self.emit (')')
else: # Always overloaded
if type (target.slice) == ast.Tuple:
self.visit (target.value)
self.emit ('.__setitem__ (') # Free function tries .__setitem__ (overload) and [] (native)
self.stripTuple = True
self.visit (target.slice)
self.emit (', ')
self.visit (value)
emitPathIndices ()
self.emit (')')
elif self.allowOperatorOverloading: # Possibly overloaded LHS index dealt with here, is special case
self.emit ('__setitem__ (') # Free function tries .__setitem__ (overload) and [] (native)
self.visit (target.value)
self.emit (', ')
self.visit (target.slice)
self.emit (', ')
self.visit (value)
emitPathIndices ()
self.emit (')')
else: # Non-overloaded LHS index just dealt with by visit_Subscript
# which is called indirectly here
self.expectingNonOverloadedLhsIndex = True
self.visit (target)
self.emit (' = ')
self.visit (value)
emitPathIndices ()
def nextTemp (self, name):
if name in self.tempIndices:
self.tempIndices [name] += 1
else:
self.tempIndices [name] = 0
return self.getTemp (name)
def skipTemp (self, name):
self.skippedTemps.add (self.nextTemp (name))
def skippedTemp (self, name):
return self.getTemp (name) in self.skippedTemps
def getTemp (self, name):
if name in self.tempIndices:
return '__{}{}__'.format (name, self.tempIndices [name])
else:
return None
def prevTemp (self, name):
if self.getTemp (name) in self.skippedTemps:
self.skippedTemps.remove (self.getTemp (name))
self.tempIndices [name] -= 1
if self.tempIndices [name] < 0:
del self.tempIndices [name]
def useModule (self, name):
self.module.program.importStack [-1][1] = self.lineNr # Remember line nr of import statement for the error report
return self.module.program.provide (name, filter = self.filterId) # Must be done first because it can generate a healthy exception
def isCall (self, node, name):
return type (node) == ast.Call and type (node.func) == ast.Name and node.func.id == name
def getPragmaFromExpr (self, node):
return node.value.args if type (node) == ast.Expr and self.isCall (node.value, '__pragma__') else None
def getPragmaFromIf (self, node):
return node.test.args if type (node) == ast.If and self.isCall (node.test, '__pragma__') else None
def visit (self, node): # Overrides visit () method of parent ast.NodeVisitor
try:
# Adapt self.lineNr to each visited node
# The lineNr is used in line number annotations and in error reports
# In case of hoisting the line number of the source code will have to be remembered until the hoist is dealt with
self.lineNr = node.lineno
except:
pass
pragmaInIf = self.getPragmaFromIf (node)
pragmaInExpr = self.getPragmaFromExpr (node)
if pragmaInIf:
if pragmaInIf [0] .s == 'defined':
for symbol in pragmaInIf [1:]:
if symbol.s in self.module.program.symbols:
definedInIf = True
break
else:
definedInIf = False
elif pragmaInExpr:
if pragmaInExpr [0] .s == 'skip':
self.noskipCodeGeneration = False
elif pragmaInExpr [0] .s == 'noskip':
self.noskipCodeGeneration = True
if pragmaInExpr [0] .s in ('ifdef', 'ifndef'):
definedInExpr = eval ( # Explained with __pragma__ ('js', ...)
compile (
ast.Expression (pragmaInExpr [1]),
'<string>',
'eval'
),
{},
{'__envir__': self.module.program.envir}
) in self.module.program.symbols
if pragmaInExpr [0] .s == 'ifdef':
self.conditionalCodeGeneration = definedInExpr
elif pragmaInExpr [0] .s == 'ifndef':
self.conditionalCodeGeneration = not definedInExpr
elif pragmaInExpr [0] .s == 'else':
self.conditionalCodeGeneration = not self.conditionalCodeGeneration
elif pragmaInExpr [0] .s == 'endif':
self.conditionalCodeGeneration = True
if self.noskipCodeGeneration and self.conditionalCodeGeneration:
if pragmaInIf:
if definedInIf:
self.emitBody (node.body)
else:
super () .visit (node)
def visit_arg (self, node):
self.emit (self.filterId (node.arg))
def visit_arguments (self, node): # Visited for def's, not for calls
self.emit ('(')
for index, arg in enumerate (node.args):
self.emitComma (index)
self.visit (arg)
# If there's a vararg or a kwarg, no formal parameter is emitted for it, it's just retrieved in the body
# so def f (a, b=3, *x, c, d=4, **y, e, f = 5) generates function f (a, b, c, d, e, f), since x and y are never passed in positionally
self.emit (') {{\n')