# Generating Architectures

In [1]:
pattern = """architecture:
  version: 0.4
  nodes:
  - !Container
    name: System
    attributes:
      technology: "40nm"
      global_cycle_seconds: 1e-9

  - !Component
    name: MainMemory
    class: DRAM
    attributes:
      width: 256
      datawidth: 8

  - !Component
    name: GlobalBuffer
    class: SRAM
    attributes:
      depth: {_globalBufferSize}
      width: 64
      datawidth: 8

  - !Container
    name: PE
    spatial: {{meshX: {_sizeX}, meshY: {_sizeY}}}

  - !Component
    name: RegisterFile
    class: regfile
    attributes:
      depth: {_regFileSize}
      width: 8
      datawidth: 8

  - !Component
    name: MACC
    class: intmac
    attributes:
      datawidth: 8"""

architectureNamePattern = "customArchitectures/{_sizeX}_{_sizeY}_{_globalBufferSize}_{_regFileSize}.yaml"

In [27]:
pattern = """architecture:
  # ============================================================
  # Architecture Description
  # ============================================================
  version: 0.4
  nodes: # Top-level is hierarchical
  - !Container # Top-level system
    name: system
    attributes:
      technology: "32nm"
      global_cycle_seconds: 1e-9
  
  - !Component # DRAM main memory
    name: DRAM
    class: DRAM
    attributes:
      type: "LPDDR4"
      width: 64
      datawidth: 8

  - !Container # Eyeriss accelerator
    name: eyeriss

  - !Component # Global buffer for inputs & outputs
    name: shared_glb
    class: smartbuffer_SRAM
    attributes:
      depth: {_globalBufferSize}
      width: 64
      n_banks: 32
      datawidth: 8
      read_bandwidth: 16
      write_bandwidth: 16
    constraints:
      dataspace: {{keep: [Inputs, Outputs], bypass: [Weights]}}

  - !Container # Each column of PEs produces a different psum row
    name: PE_column
    spatial: {{meshX: {_sizeX}}}
    constraints:
      spatial:
        permutation: [N, C, P, R, S, Q, M]
        factors: [N=1, C=1, P=1, R=1, S=1]
        split: 7

  - !Container # Each PE in the column receives a different filter row
    name: PE
    spatial: {{meshY: {_sizeY}}}
    constraints:
      spatial:
        split: 4
        permutation: [N, P, Q, R, S, C, M]
        factors: [N=1, P=1, Q=1, R=1]

  - !Parallel # Input/Output/Weight scratchpads in parallel
    nodes:
    - !Component # Input scratchpad
      name: ifmap_spad
      class: smartbuffer_RF
      attributes:
        depth: {_inputBufferSize}
        width: 16
        datawidth: 8
        read_bandwidth: 2
        write_bandwidth: 2
      constraints:
        dataspace: {{keep: [Inputs]}}
        temporal:
          permutation: [N, M, C, P, Q, R, S]
          factors: [N=1, M=1, C=1, P=1, Q=1, R=1, S=1]

    - !Component # Weight scratchpad
      name: weights_spad
      class: smartbuffer_RF
      attributes:
        depth: {_weightBufferSize}
        width: 16
        datawidth: 8
        read_bandwidth: 2
        write_bandwidth: 2
      constraints:
        dataspace: {{keep: [Weights]}}
        temporal:
          permutation: [N, M, P, Q, S, C, R]
          factors: [N=1, M=1, P=1, Q=1, S=1]

    - !Component # Output scratchpad
      name: psum_spad
      class: smartbuffer_RF
      attributes:
        depth: {_outputBufferSize}
        width: 16
        update_fifo_depth: 2
        datawidth: 16
        read_bandwidth: 2
        write_bandwidth: 2
      constraints:
        dataspace: {{keep: [Outputs]}}
        temporal:
          permutation: [N, C, P, Q, R, S, M] 
          factors: [N=1, C=1, R=1, S=1, P=1, Q=1]

  - !Component # MAC unit
    name: mac
    class: intmac
    attributes:
      multiplier_width: 8
      adder_width: 16"""

architectureNamePattern = "customArchitectures/{_sizeX}_{_sizeY}_{_globalBufferSize}_{_bufferSize}.yaml"

In [28]:
peSizes = [(4,64), (8,32), (16,16), (32,8), (64,4)]
globalBufferSizes = [1024, 2048, 4096, 8192, 16384, 32768]
bufferSizes = [2,4,8,16,24,32,64,128,192,256]

In [29]:
for (sizeX, sizeY) in peSizes:
    for globalBufferSize in globalBufferSizes:
        for bufferSize in bufferSizes:
            architecture = pattern.format(_sizeX = sizeX,
                                            _sizeY = sizeY,
                                            _globalBufferSize = globalBufferSize,
                                            _inputBufferSize = bufferSize,
                                            _weightBufferSize = bufferSize,
                                            _outputBufferSize = bufferSize)
            fileName = architectureNamePattern.format(_sizeX = sizeX,
                                            _sizeY = sizeY,
                                            _globalBufferSize = globalBufferSize,
                                            _bufferSize = bufferSize)
            f = open(fileName, "w")
            f.write(architecture)
            f.close()

# Calculating Results

In [None]:
import os
import timeloopfe.v4 as tl
from joblib import Parallel, delayed
from tqdm.auto import tqdm
#vgg16
#layers = ["00", "02", "04", "07", "10"]
#resnet18
layers = ["00", "01", "06", "11", "16"]
#area
#layers = ["00"]

THIS_SCRIPT_DIR = os.getcwd()

print(THIS_SCRIPT_DIR)

architectureDirectory = os.path.join(THIS_SCRIPT_DIR, "customArchitectures4/")
outputDirectory = os.path.join(THIS_SCRIPT_DIR, "outputs4/")

#architectureDirectory = os.path.join(THIS_SCRIPT_DIR, "customArchitectures3Input/")

def run_mapper_with_spec(file, layer):
    filename = file[:-5]
    if not os.path.isdir(os.path.join(THIS_SCRIPT_DIR, outputDirectory + filename + "/" + layer)):
        spec = tl.Specification.from_yaml_files(
                os.path.join(architectureDirectory, file),
                os.path.join("components/*.yaml"),
                os.path.join(THIS_SCRIPT_DIR, "example_designs/layer_shapes/resnet18/" + layer + ".yaml"),
                os.path.join(THIS_SCRIPT_DIR, "mapper.yaml"),
            )
        tl.call_mapper(spec, output_dir=os.path.join(THIS_SCRIPT_DIR, outputDirectory + filename + "/" + layer))

filenames = os.listdir(architectureDirectory)
filenames.remove(".ipynb_checkpoints")

unique_combinations = []
 
for i in range(len(filenames)):
    for j in range(len(layers)):
        unique_combinations.append((filenames[i], layers[j]))

Parallel(n_jobs=8)(
  delayed(run_mapper_with_spec)(file, layer) for file, layer in tqdm(unique_combinations)
)

In [18]:
def parseStatsFile(file):
    with open(file) as f:
        lines = [line.rstrip() for line in f]
    idx = lines.index("Summary Stats")
    
    utilizationStr = lines[idx+3]
    utilizationStr = utilizationStr[13:]
    utilizationStr = utilizationStr.strip('%')
    utilization = float(utilizationStr)/100

    cyclesStr = lines[idx+4]
    cyclesStr = cyclesStr[8:]
    cycles = int(cyclesStr)

    energyStr = lines[idx+5]
    energyStr = energyStr[8:]
    energyStr = energyStr[:-3]
    energy = float(energyStr)

    edpStr = lines[idx+6]
    edpStr = edpStr[14:]
    (edpValueStr, edpExponentStr) = edpStr.split("e+")
    edpExponent = int(edpExponentStr)
    edpValue = float(edpValueStr)
    edp = edpValue * (10**edpExponent)
    
    return {"utilization": utilization, "cycles": cycles, "energy": energy, "edp": edp}

stats = parseStatsFile(os.path.join(THIS_SCRIPT_DIR, "outputs4/simpleOSMaxDim64x64/11/timeloop-mapper.stats.txt"))
for item in stats.items():
    print(item)

('utilization', 0.0479)
('cycles', 589824)
('energy', 254.95)
('edp', 150.0)
