# split

在编译原理课程项目中，进行基本块切分的部分。

## 需要的库

- 正则表达式 re
- 序列化 json

In [59]:
import re
import json
import copy

## 读入json

In [49]:
filename = "../examples/vm_src02.json"
vm_src = {}
with open(filename, "r") as fp:
    vm_src = json.load(fp)
table = vm_src["table"]
code = vm_src["code"]

cond_f = {"==": lambda x, y: x == y, "!=": lambda x, y: x != y, ">=": lambda x, y: x >=
          y, "<=": lambda x, y: x <= y, ">": lambda x, y: x > y, "<": lambda x, y: x < y}
arith_f = {"+": lambda x, y: x+y, "-": lambda x, y: x-y, "*": lambda x,
           y: x*y, "/": lambda x, y: x//y, "%": lambda x, y: x % y}
eserved_word = {"HALT", "=", "+", "-", "*", "/", "%", "?",
                ":", "!:", ">", "<", ">=", "<=", "==", "!=", "[", "]"}
reg = re.compile('^[0-9]+$')

## 开始切分基本块

先看一眼代码

In [7]:
for i,line in enumerate(code):
    print("{:3d} : {}".format(i,line)) 

  0 : I = M - 1
  1 : J = N
  2 : T_1 = N
  3 : V = A [ T_1 ]
  4 : I = I + 1
  5 : T_2 = I
  6 : T_3 = A [ T_2 ]
  7 : ? V > T_3 : 4
  8 : J = J - 1
  9 : T_4 = J
 10 : T_5 = A [ T_4 ]
 11 : ? T_5 > V : 8
 12 : ? J <= I : 22
 13 : T_6 = I
 14 : X = A [ T_6 ]
 15 : T_7 = I
 16 : T_8 = J
 17 : T_9 = A [ T_8 ]
 18 : A [ T_7 ] = T_9
 19 : T_10 = J
 20 : A [ T_10 ] = X
 21 : !: 4
 22 : T_11 = I
 23 : X = A [ T_11 ]
 24 : T_12 = J
 25 : T_13 = N
 26 : T_14 = A [ T_13 ]
 27 : A [ T_12 ] = T_14
 28 : T_15 = N
 29 : A [ T_15 ] = X
 30 : HALT


In [9]:
table = vm_src["table"]
code = vm_src["code"]
jump_targets = [table[i.split(" ")[-1]]["val"] for i in code if i.split(" ")[0] in {"?","!:"}]
after_jump = [n+1 for n,i in enumerate(code) if i.split(" ")[0] in {"?","!:"}]
print(jump_targets)
print(after_jump)

[4, 8, 22, 4]
[8, 12, 13, 22]


In [32]:
split_points = list(set([0]+jump_targets+after_jump+[len(code)]))
split_points = sorted(split_points)
print(sorted(split_points))
blocks_linenum = [(split_points[i-1],split_points[i]-1) for i in range(1,len(split_points))]
intro_block = {i[0]:n for n,i in enumerate(blocks_linenum)}
print(blocks_linenum)
print(intro_block)

[0, 4, 8, 12, 13, 22, 31]
[(0, 3), (4, 7), (8, 11), (12, 12), (13, 21), (22, 30)]
{0: 0, 4: 1, 8: 2, 12: 3, 13: 4, 22: 5}


In [29]:
for n,b in enumerate(blocks_linenum):
    print("#BLOCK {:3d}".format(n))
    for i in range(b[0],b[1]+1):
        print("{:3d} : {}".format(i,code[i])) 
    print()
        

#BLOCK   0
  0 : I = M - 1
  1 : J = N
  2 : T_1 = N
  3 : V = A [ T_1 ]

#BLOCK   1
  4 : I = I + 1
  5 : T_2 = I
  6 : T_3 = A [ T_2 ]
  7 : ? V > T_3 : 4

#BLOCK   2
  8 : J = J - 1
  9 : T_4 = J
 10 : T_5 = A [ T_4 ]
 11 : ? T_5 > V : 8

#BLOCK   3
 12 : ? J <= I : 22

#BLOCK   4
 13 : T_6 = I
 14 : X = A [ T_6 ]
 15 : T_7 = I
 16 : T_8 = J
 17 : T_9 = A [ T_8 ]
 18 : A [ T_7 ] = T_9
 19 : T_10 = J
 20 : A [ T_10 ] = X
 21 : !: 4

#BLOCK   5
 22 : T_11 = I
 23 : X = A [ T_11 ]
 24 : T_12 = J
 25 : T_13 = N
 26 : T_14 = A [ T_13 ]
 27 : A [ T_12 ] = T_14
 28 : T_15 = N
 29 : A [ T_15 ] = X
 30 : HALT



In [46]:
blocks = {"summary":{},"blocks":{}}
blocks["summary"] = {"total_blocks":len(blocks_linenum)}
for n,b in enumerate(blocks_linenum):
    nxt = [None,None]
    if code[b[1]].split(" ")[0] == "!:":
        nxt[0]=intro_block[table[code[b[1]].split(" ")[1]]["val"]]
    elif code[b[1]].split(" ")[0] == "?":
        nxt[1]=intro_block[table[code[b[1]].split(" ")[5]]["val"]]
        nxt[0]=n+1
    elif code[b[1]]=="HALT":
        pass
    else:
        nxt[0]=n+1
    blocks["blocks"][n]={"line_num":b,"next":tuple(nxt),"code":code[b[0]:b[1]+1]}
blocks

{'summary': {'total_blocks': 6},
 'blocks': {0: {'line_num': (0, 3),
   'next': (1, None),
   'code': ['I = M - 1', 'J = N', 'T_1 = N', 'V = A [ T_1 ]']},
  1: {'line_num': (4, 7),
   'next': (2, 1),
   'code': ['I = I + 1', 'T_2 = I', 'T_3 = A [ T_2 ]', '? V > T_3 : 4']},
  2: {'line_num': (8, 11),
   'next': (3, 2),
   'code': ['J = J - 1', 'T_4 = J', 'T_5 = A [ T_4 ]', '? T_5 > V : 8']},
  3: {'line_num': (12, 12), 'next': (4, 5), 'code': ['? J <= I : 22']},
  4: {'line_num': (13, 21),
   'next': (1, None),
   'code': ['T_6 = I',
    'X = A [ T_6 ]',
    'T_7 = I',
    'T_8 = J',
    'T_9 = A [ T_8 ]',
    'A [ T_7 ] = T_9',
    'T_10 = J',
    'A [ T_10 ] = X',
    '!: 4']},
  5: {'line_num': (22, 30),
   'next': (None, None),
   'code': ['T_11 = I',
    'X = A [ T_11 ]',
    'T_12 = J',
    'T_13 = N',
    'T_14 = A [ T_13 ]',
    'A [ T_12 ] = T_14',
    'T_15 = N',
    'A [ T_15 ] = X',
    'HALT']}}}

In [62]:
for n,b in blocks["blocks"].items():
    blines = b["code"]
    used = []
    defd = []
    for s in blines:
        symbols = s.split(" ")
        if "=" in symbols:
            if len(symbols) == 3:
                if (symbols[2] not in used) and (symbols[2] not in defd):
                    used.append(symbols[2])
                if (symbols[0] not in used) and (symbols[0] not in defd):
                    defd.append(symbols[0])
            elif symbols[3] == "]":
                if (symbols[2] not in used) and (symbols[2] not in defd):
                    used.append(symbols[2])
                if (symbols[5] not in used) and (symbols[5] not in defd):
                    used.append(symbols[5])
                if (symbols[0] not in used) and (symbols[0] not in defd):
                    defd.append(symbols[0])
            elif symbols[3] == "[":
                if (symbols[2] not in used) and (symbols[2] not in defd):
                    used.append(symbols[2])
                if (symbols[4] not in used) and (symbols[4] not in defd):
                    used.append(symbols[4])
                if (symbols[0] not in used) and (symbols[0] not in defd):
                    defd.append(symbols[0])

            elif symbols[3] in arith_f:
                if (symbols[2] not in used) and (symbols[2] not in defd):
                    used.append(symbols[2])
                if (symbols[4] not in used) and (symbols[4] not in defd):
                    used.append(symbols[4])
                if (symbols[0] not in used) and (symbols[0] not in defd):
                    defd.append(symbols[0])
        elif "?" in symbols:
                if (symbols[1] not in used) and (symbols[1] not in defd):
                    used.append(symbols[1])
                if (symbols[3] not in used) and (symbols[3] not in defd):
                    used.append(symbols[3])
    used = [i for i in used if reg.match(i)==None]
    defd = [i for i in defd if reg.match(i)==None]
    
    blocks["blocks"][n]["defd"] = set(defd)
    blocks["blocks"][n]["used"] = set(used)
    blocks["blocks"][n]["in"] = set([])
    blocks["blocks"][n]["out"] = set([])
    print(n,blocks["blocks"][n]["defd"],blocks["blocks"][n]["used"])

0 {'I', 'J', 'V', 'T_1'} {'N', 'M', 'A'}
1 {'T_2', 'T_3'} {'I', 'A', 'V'}
2 {'T_5', 'T_4'} {'V', 'A', 'J'}
3 set() {'I', 'J'}
4 {'T_6', 'T_8', 'T_7', 'X', 'T_10', 'T_9'} {'I', 'A', 'J'}
5 {'T_12', 'T_13', 'T_14', 'X', 'T_15', 'T_11'} {'I', 'A', 'N', 'J'}


In [75]:
flag = True
i = 0
while flag:
    flag = False
    i+=1
    print(i)
    
    for n,b in blocks["blocks"].items():
        inset = copy.deepcopy(blocks["blocks"][n]["in"])
        inset_ori = copy.deepcopy(blocks["blocks"][n]["in"])
        outset = copy.deepcopy(blocks["blocks"][n]["out"])
        if b["next"][0]!=None:
            outset = outset | blocks["blocks"][b["next"][0]]["in"]
        if b["next"][1]!=None:
            outset = outset | blocks["blocks"][b["next"][1]]["in"]
        inset = blocks["blocks"][n]["used"] | (outset - blocks["blocks"][n]["defd"])
        blocks["blocks"][n]["in"] = inset
        blocks["blocks"][n]["out"] = outset
        if (inset!=inset_ori):
            flag = True

1
2
3
4
5


In [76]:
for n,b in blocks["blocks"].items():
    print(n,blocks["blocks"][n]["in"],blocks["blocks"][n]["out"])

0 {'N', 'M', 'A'} {'I', 'N', 'V', 'A', 'J'}
1 {'I', 'N', 'V', 'A', 'J'} {'I', 'N', 'V', 'A', 'J'}
2 {'I', 'N', 'V', 'A', 'J'} {'I', 'N', 'V', 'A', 'J'}
3 {'I', 'N', 'J', 'V', 'A'} {'I', 'N', 'V', 'A', 'J'}
4 {'I', 'N', 'V', 'A', 'J'} {'I', 'N', 'V', 'A', 'J'}
5 {'I', 'A', 'N', 'J'} set()
