Skip to content

Commit

Permalink
Instruction prefetching for subroutines
Browse files Browse the repository at this point in the history
  • Loading branch information
caryan committed May 12, 2016
1 parent a44cf4a commit b9e9a9e
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 35 deletions.
37 changes: 21 additions & 16 deletions QGL/Compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from . import ControlFlow
from . import BlockLabel


def map_logical_to_physical(wires):
# construct a mapping of physical channels to lists of logical channels
# (there will be more than one logical channel if multiple logical
Expand Down Expand Up @@ -233,10 +234,14 @@ def collect_specializations(seqs):
Collects function definitions for all targets of Call instructions
'''
targets = [x.target for x in flatten(seqs) if isinstance(x, ControlFlow.Call)]
funcDefs = []
for target in set(targets):
funcDefs += ControlFlow.qfunction_specialization(target)
return funcDefs
funcs = []
done = []
#Manually keep track of done (instead of `set`) to keep calling order
for target in targets:
if target not in done:
funcs.append( ControlFlow.qfunction_specialization(target) )
done.append(target)
return funcs

def compile_to_hardware(seqs, fileName, suffix='', qgl2=False):
'''
Expand All @@ -256,7 +261,7 @@ def compile_to_hardware(seqs, fileName, suffix='', qgl2=False):
PatternUtils.add_slave_trigger(seqs, ChannelLibrary.channelLib['slaveTrig'])

# find channel set at top level to account for individual sequence channel variability
channels = set([])
channels = set()
for seq in seqs:
channels |= find_unique_channels(seq)

Expand Down Expand Up @@ -328,7 +333,7 @@ def compile_to_hardware(seqs, fileName, suffix='', qgl2=False):
# Return the filenames we wrote
return fileList

def compile_sequences(seqs, channels=None, qgl2=False):
def compile_sequences(seqs, channels=set(), qgl2=False):
'''
Main function to convert sequences to miniLL's and waveform libraries.
'''
Expand All @@ -343,12 +348,13 @@ def compile_sequences(seqs, channels=None, qgl2=False):
seqs[-1].append(ControlFlow.Goto(BlockLabel.label(seqs[0])))
logger.debug("Appending a Goto at end to loop")

# inject function definitions prior to sequences
funcDefs = collect_specializations(seqs)
if funcDefs:
# inject GOTO to jump over definitions
funcDefs.insert(0, ControlFlow.Goto(BlockLabel.label(seqs[0])))
seqs.insert(0, funcDefs)
# append function specialization to sequences
subroutines = collect_specializations(seqs)
seqs += subroutines

#expand the channel definitions for anything defined in subroutines
for func in subroutines:
channels |= find_unique_channels(subroutines)

# use seqs[0] as prototype in case we were not given a set of channels
wires = compile_sequence(seqs[0], channels)
Expand All @@ -359,9 +365,8 @@ def compile_sequences(seqs, channels=None, qgl2=False):
wires = compile_sequence(seq, channels)
for chan in wireSeqs.keys():
wireSeqs[chan].append(wires[chan])

#Print a message so for the experiment we know how many sequences there are
print('Compiled {} sequences.'.format(len(seqs)))
print('Compiled {} sequences.'.format(len(seqs) - len(subroutines)))

# Debugging:
if logger.isEnabledFor(logging.DEBUG):
Expand Down Expand Up @@ -450,12 +455,12 @@ def propagate_node_frame_to_edges(wires, chan, frameChange):
return wires

def find_unique_channels(seq):
channels = set([])
channels = set()
for step in flatten(seq):
if not hasattr(step, 'channel'):
continue
if isinstance(step.channel, Channels.Channel):
channels |= set([step.channel])
channels.add(step.channel)
else:
channels |= set(step.channel)
return channels
Expand Down
106 changes: 87 additions & 19 deletions QGL/drivers/APS2Pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@
limitations under the License.
'''

import h5py
import os
import numpy as np
import logging
from warnings import warn
from copy import copy
from itertools import zip_longest

import h5py
import numpy as np

from QGL import Compiler, ControlFlow, BlockLabel, PatternUtils
from QGL.PatternUtils import hash_pulse, flatten

Expand Down Expand Up @@ -51,6 +55,8 @@
SYNC = 0x9
MODULATION = 0xA
LOADCMP = 0xB
PREFETCH = 0xC
NOP = 0XF

# WFM/MARKER op codes
PLAY = 0x0
Expand Down Expand Up @@ -128,9 +134,10 @@ def __repr__(self):

def __str__(self):

opCodes = ["WFM", "MARKER", "WAIT", "LOAD", "REPEAT", "CMP", "GOTO", "CALL", "RET", "SYNC", "MODULATION", "LOADCMP"]
opCodes = ["WFM", "MARKER", "WAIT", "LOAD", "REPEAT", "CMP", "GOTO", "CALL",
"RET", "SYNC", "MODULATION", "LOADCMP", "PREFETCH", "NOP", "NOP", "NOP"]

out = "{0}: ".format(self.label) if self.label else ""
out = "{0} ".format(self.label) if self.label else ""

instrOpCode = (self.header >> 4) & 0xf
out += opCodes[instrOpCode]
Expand All @@ -146,7 +153,7 @@ def __str__(self):
out += "write=0 | "

if self.target:
out += str(self.target) + "/"
out += " {}".format(self.target)

if instrOpCode == WFM:
wfOpCode = (self.payload >> 46) & 0x3
Expand Down Expand Up @@ -183,7 +190,7 @@ def __str__(self):
out += " | " + cmpCodes[cmpCode]
out += ", mask = {}".format(self.payload & 0xff)

elif (instrOpCode == GOTO) or (instrOpCode == CALL) or (instrOpCode == RET) or (instrOpCode == REPEAT):
elif any([instrOpCode == op for op in [GOTO, CALL, RET, REPEAT, PREFETCH]]):
out += " | target addr = {}".format(self.payload & 2**26-1)

elif instrOpCode == LOAD:
Expand Down Expand Up @@ -281,6 +288,12 @@ def Load(count, label=None):
def Repeat(addr, label=None):
return Command(REPEAT, addr, label=label)

def Prefetch(addr, label=None):
return Command(PREFETCH, addr)

def NoOp():
return Instruction.unflatten(0xffffffffffffffff)

def preprocess(seqs, shapeLib):
seqs = PatternUtils.convert_lengths_to_samples(seqs, SAMPLING_RATE, ADDRESS_UNIT)
wfLib = build_waveforms(seqs, shapeLib)
Expand Down Expand Up @@ -498,14 +511,16 @@ def create_seq_instructions(seqs, offsets):
cmpTable = {'==': EQUAL, '!=': NOTEQUAL, '>': GREATERTHAN, '<': LESSTHAN}

# always start with SYNC (stealing label from beginning of sequence)
if isinstance(seqs[0][0], BlockLabel.BlockLabel):
label = seqs[0][0]
timeTuples.pop(0)
indexes[0] += 1
else:
label = None
instructions = [Sync(label=label)]
# unless it is a subroutine (using last entry as return as tell)
label = None
instructions = []
if not isinstance(seqs[0][-1], ControlFlow.Return):
if isinstance(seqs[0][0], BlockLabel.BlockLabel):
label = seqs[0][0]
timeTuples.pop(0)
indexes[0] += 1
instructions.append( Sync(label=label) )
label = None

while len(timeTuples) > 0:
#pop off all entries that have the same time
Expand Down Expand Up @@ -613,17 +628,70 @@ def find_and_pop_entries(predicate):
def create_instr_data(seqs, offsets):
'''
Constructs the complete instruction data vector, and does basic checks for validity.
Subroutines will be placed at least 8 cache lines away from sequences and aligned to cache line
'''
maxlen = max([len(s) for s in seqs])
logger = logging.getLogger(__name__)
logger.debug('')

seq_instrs = []
for seq in zip_longest(*seqs, fillvalue=[]):
seq_instrs.append( create_seq_instructions(list(seq), offsets) )


#concatenate instructions
instructions = []
for ct in range(maxlen):
instructions += create_seq_instructions([s[ct] if ct < len(s) else [] for s in seqs], offsets)
for ct,seq in enumerate(seq_instrs):
#Use last instruction as return to mark start of subroutines
if (seq[-1].header >> 4) == RET:
break
instructions += seq

#if we have any subroutines then group in cache lines
subroutine_instrs = []
subroutine_cache_line = {}
CACHE_LINE_LENGTH = 128
if ct != len(seq_instrs)-1:
offset = 0
for sub in seq_instrs[ct:]:
#Don't unecessarily split across a cache line
if (len(sub) + offset > CACHE_LINE_LENGTH) and (len(sub) < CACHE_LINE_LENGTH):
pad_instrs = 128 - ((offset + 128) % 128)
subroutine_instrs += [NoOp()]*pad_instrs
offset = 0
if offset == 0:
line_label = sub[0].label
subroutine_cache_line[sub[0].label] = line_label
subroutine_instrs += sub
offset += len(sub) % CACHE_LINE_LENGTH
logger.debug("Placed {} subroutines into {} cache lines".format(len(seq_instrs[ct:]), len(subroutine_instrs) // CACHE_LINE_LENGTH))

#inject prefetch commands before waits
wait_idx = [idx for idx,instr in enumerate(instructions) if (instr.header >> 4) == WAIT] + [len(instructions)]
instructions_with_prefetch = instructions[:wait_idx[0]]
last_prefetch = None
for start, stop in zip(wait_idx[:-1], wait_idx[1:]):
call_targets = [instr.target for instr in instructions[start:stop] if (instr.header >> 4) == CALL]
needed_lines = set()
for target in call_targets:
needed_lines.add(subroutine_cache_line[target])
if len(needed_lines) > 8:
raise RuntimeError("Unable to prefetch more than 8 cache lines")
for needed_line in needed_lines:
if needed_line != last_prefetch:
instructions_with_prefetch.append(Prefetch(needed_line))
last_prefetch = needed_line
instructions_with_prefetch += instructions[start:stop]

instructions = instructions_with_prefetch
#pad out instruction vector to ensure circular cache never loads a subroutine
pad_instrs = 7*128 + (128 - ((len(instructions) + 128) % 128))
instructions += [NoOp()]*pad_instrs

instructions += subroutine_instrs

resolve_symbols(instructions)

if instructions[-1] != Goto(0):
instructions.append(Goto(0))

assert len(instructions) < MAX_NUM_INSTRUCTIONS, 'Oops! too many instructions: {0}'.format(len(instructions))
data = np.array([instr.flatten() for instr in instructions], dtype=np.uint64)
return data
Expand Down

0 comments on commit b9e9a9e

Please sign in to comment.