In [22]:
from transformers import AutoConfig
import os

MODEL_DIR = '/home/xuechenhao/hugginface'

config_list = {
    "llama-7b-meta": 'llama7b',
    "llama-13b-meta": 'llama13b',
    "llama-30b-meta": 'llama30b',
    "llama-65b-meta": 'llama65b',
    "opt-6.7b": 'opt6b',
    "opt-13b": 'opt13b',
    "opt-30b": 'opt30b',
    "opt-66b": 'opt66b',
}

def generate_sim_workload(config_name):
    config_path = os.path.join(MODEL_DIR, config_name)
    config = AutoConfig.from_pretrained(config_path)

    # print(config)
    # print(config_name)

    h = config.hidden_size
    L = config.num_hidden_layers
    head = config.num_attention_heads
    S = config.max_position_embeddings

    # they only count linear in olive, so do we
    # s, in; out, in; s, out
    qkvproj = [ [S, h], [h*3, h], [S, h*3], [], [], 4, 1,]
    oproj = [ [S, h], [h, h], [S, h], [], [], 4, 1,]
    if 'llama' in config_name:
        h_ = config.intermediate_size
        fc1 = [ [S, h], [2*h_, h], [S, 2*h_], [], [], 4, 1,]
    else:
        h_ = config.ffn_dim
        fc1 = [ [S, h], [h_, h], [S, h_], [], [], 4, 1,]
    fc2 = [ [S, h_], [h, h_], [S, h], [], [], 4, 1,]
    
    workload = []

    for _ in range(1):
    # for _ in range(L):
        workload.append(qkvproj)
        workload.append(oproj)
        workload.append(fc1)
        workload.append(fc2)

    # print workload op by op
    print(f"{config_list[config_name]} = [")
    for op in workload:
        print(op, ",")
    print(']\n')
    

    return workload


In [23]:
for config_name in config_list:
    generate_sim_workload(config_name)

llama7b = [
[[2048, 4096], [12288, 4096], [2048, 12288], [], [], 4, 1] ,
[[2048, 4096], [4096, 4096], [2048, 4096], [], [], 4, 1] ,
[[2048, 4096], [22016, 4096], [2048, 22016], [], [], 4, 1] ,
[[2048, 11008], [4096, 11008], [2048, 4096], [], [], 4, 1] ,
]

llama13b = [
[[2048, 5120], [15360, 5120], [2048, 15360], [], [], 4, 1] ,
[[2048, 5120], [5120, 5120], [2048, 5120], [], [], 4, 1] ,
[[2048, 5120], [27648, 5120], [2048, 27648], [], [], 4, 1] ,
[[2048, 13824], [5120, 13824], [2048, 5120], [], [], 4, 1] ,
]

llama30b = [
[[2048, 6656], [19968, 6656], [2048, 19968], [], [], 4, 1] ,
[[2048, 6656], [6656, 6656], [2048, 6656], [], [], 4, 1] ,
[[2048, 6656], [35840, 6656], [2048, 35840], [], [], 4, 1] ,
[[2048, 17920], [6656, 17920], [2048, 6656], [], [], 4, 1] ,
]

llama65b = [
[[2048, 8192], [24576, 8192], [2048, 24576], [], [], 4, 1] ,
[[2048, 8192], [8192, 8192], [2048, 8192], [], [], 4, 1] ,
[[2048, 8192], [44032, 8192], [2048, 44032], [], [], 4, 1] ,
[[2048, 22016], [8192, 22016], [2