# Checkpoint 与性能评估
使用 checkpoint 进行性能评估在学术界和工业界都有广泛应用。为了生成 checkpoint，我们首先需要准备 NEMU 环境，并得到 GCPT restorer

本节中会用到一些与 `xs-env/env.sh` 不同的路径和常量，为了方便使用，我们创建了一个 `07-checkpoint-env.sh`，在本节中我们将使用该脚本设置环境变量，您可以运行下面的单元格查看这些环境变量。在后续的单元格中我们仍将输出重定向到 `/dev/null` 减少干扰。

In [None]:
%%bash
source 07-checkpoint-env.sh

In [None]:
%%bash
source 07-checkpoint-env.sh >/dev/null

cd ${NEMU_HOME}
git submodule update --init

# 编译 simpoint
cd ${NEMU_HOME}/resource/simpoint/simpoint_repo
make clean
make

# 编译 NEMU
cd ${NEMU_HOME}
make clean
make riscv64-xs-cpt_defconfig
make -j8

cd ${NEMU_HOME}/resource/gcpt_restore
rm -rf ${GCPT_PATH}
make -C ${NEMU_HOME}/resource/gcpt_restore/ O=${GCPT_PATH} GCPT_PAYLOAD_PATH=${PAYLOAD_PATH}/${WORKLOAD}.bin CROSS_COMPILE=riscv64-linux-gnu-

接下来，我们需要使用 NEMU 运行要进行切片的程序，来收集程序行为

In [None]:
%%bash
source 07-checkpoint-env.sh >/dev/null

rm -rf $RESULT_PATH

_LOG_PATH=$LOG_PATH/profiling
mkdir -p $_LOG_PATH

# 使用 GCPT 作为镜像
# -w：要加载的实际负载为 WORKLOAD
# -C：使用 profiling 配置运行 NEMU
$NEMU ${GCPT} \
    -D ${RESULT_PATH} \
    -w ${WORKLOAD} \
    -C profiling \
    -b \
    --simpoint-profile \
    --cpt-interval ${CHECKPOINT_INTERVAL} \
    > >(tee ${_LOG_PATH}/${WORKLOAD}-out.txt) 2> >(tee ${_LOG_PATH}/${WORKLOAD}-err.txt)


进而，使用 simpoint 对采集到的程序行为进行聚类分析

In [None]:
%%bash
source 07-checkpoint-env.sh >/dev/null

CLUSTER=${RESULT_PATH}/cluster/${WORKLOAD}
mkdir -p ${CLUSTER}

random1=`head -20 /dev/urandom | cksum | cut -c 1-6`
random2=`head -20 /dev/urandom | cksum | cut -c 1-6`

_LOG_PATH=$LOG_PATH/cluster
mkdir -p $_LOG_PATH

$SIMPOINT \
    -loadFVFile ${PROFILING_RESULT_PATH}/${WORKLOAD}/simpoint_bbv.gz \
    -saveSimpoints ${CLUSTER}/simpoints0 \
    -saveSimpointWeights ${CLUSTER}/weights0 \
    -inputVectorsGzipped \
    -maxK 3 \
    -numInitSeeds 2 \
    -iters 1000 \
    -seedkm ${random1} \
    -seedproj ${random2} \
    > >(tee ${_LOG_PATH}/${WORKLOAD}-out.txt) 2> >(tee ${_LOG_PATH}/${WORKLOAD}-err.txt) 


最后，使用 NEMU 重新运行需要采样的程序片段，生成 checkpoint。

checkpoint 文件内包括需要执行的程序段，也包括 checkpoint 起始位置时的内存状态和处理器体系结构状态（通用寄存器堆，CSR）。

In [None]:
%%bash
source 07-checkpoint-env.sh >/dev/null

CLUSTER=${RESULT_PATH}/cluster
_LOG_PATH=${LOG_PATH}/checkpoint
mkdir -p ${_LOG_PATH}

$NEMU ${GCPT} \
    -D ${RESULT_PATH} \
    -w ${WORKLOAD} \
    -C checkpoint \
    -b \
    -S ${CLUSTER} \
    --cpt-interval ${CHECKPOINT_INTERVAL} \
    > >(tee ${_LOG_PATH}/${WORKLOAD}-out.txt) 2> >(tee ${_LOG_PATH}/${WORKLOAD}-err.txt)


我们可以使用 emu 运行一下采集到的 checkpoint，看看效果。

emu 检测到文件是 gzip 压缩的 checkpoint 时，会自动进行解压缩，并从 checkpoint 恢复内存状态和体系结构状态。

In [None]:
%%bash
source 07-checkpoint-env.sh >/dev/null

CHECKPOINT=$(find ${RESULT_PATH}/checkpoint/${WORKLOAD} -type f -name "*_.gz" | tail -1)

${READY2RUN_HOME}/emu \
    -i ${CHECKPOINT} \
    --diff ${NOOP_HOME}/ready-to-run/riscv64-nemu-interpreter-so \
    --max-cycles=50000 \
    2>/dev/null


In [None]:
import os
import re
import json
from pathlib import Path
from itertools import product

app_list = [
    "bwaves", "gamess_cytosine", "gamess_gradient", "gamess_triazolium",
    "milc", "zeusmp", "gromacs", "cactusADM", "leslie3d", "namd", "dealII",
    "soplex_pds-50", "soplex_ref", "povray", "calculix", "GemsFDTD", "tonto",
    "lbm", "wrf", "sphinx3"
]

spec_2017_list = [
    "bwaves_1", "bwaves_2", "bwaves_3", "bwaves_4", "cactuBSSN", "namd",
    "parest", "povray", "lbm", "wrf", "blender", "cam4", "imagick", "nab",
    "fotonik3d", "roms", "perlbench_diff", "perlbench_spam", "perlbench_split",
    "gcc_pp_O2", "gcc_pp_O3", "gcc_ref32_O3", "gcc_ref32_O5", "gcc_small_O3",
    "mcf", "omnetpp", "xalancbmk", "x264_pass1", "x264_pass2", "x264_seek",
    "deepsjeng", "leela", "exchange2", "xz_cld", "xz_combined", "xz_cpu2006"
]

spec2017_int_list = [
    "perlbench_diff", "perlbench_spam", "perlbench_split", "gcc_pp_O2",
    "gcc_pp_O3", "gcc_ref32_O3", "gcc_ref32_O5", "gcc_small_O3", "mcf",
    "omnetpp", "xalancbmk", "x264_pass1", "x264_pass2", "x264_seek",
    "deepsjeng", "leela", "exchange2", "xz_cld", "xz_combined", "xz_cpu2006"
]

spec2017_fp_list = list(set(spec_2017_list) - set(spec2017_int_list))


def profiling_instrs(profiling_log, spec_app, using_new_script=False):
    regex = r".*total guest instructions = (.*)\x1b.*"
    new_path = os.path.join(profiling_log, spec_app, "profiling.out.log")
    old_path = os.path.join(profiling_log, "{}-out.txt".format(spec_app))

    if using_new_script:
        path = new_path
    else:
        path = old_path

    with open(path, "r", encoding="utf-8") as f:
        for i in f.readlines():
            if "total guest instructions" in i:
                match = re.findall(regex, i)
                match = match[0].replace(',', '')
                return match
        return 0


def cluster_weight(cluster_path, spec_app):
    points = {}
    weights = {}

    weights_path = f"{cluster_path}/{spec_app}/weights0"
    simpoints_path = f"{cluster_path}/{spec_app}/simpoints0"

    with open(weights_path, "r") as f:
        for line in f.readlines():
            a, b = line.split()
            weights.update({"{}".format(b): "{}".format(a)})

    with open(simpoints_path, "r") as f:
        for line in f.readlines():
            a, b = line.split()
            points.update({a: weights.get(b)})

    return points


def per_checkpoint_generate_json(profiling_log, cluster_path, app_list,
                                 target_path):
    result = {}
    for spec in app_list:
        result.update({
            spec: {
                "insts": profiling_instrs(profiling_log, spec),
                'points': cluster_weight(cluster_path, spec)
            }
        })
    with open(os.path.join(target_path), "w") as f:
        f.write(json.dumps(result))


def per_checkpoint_generate_worklist(cpt_path, target_path):
    cpt_path = cpt_path + "/"
    checkpoints = []
    for item in os.scandir(cpt_path):
        if item.is_dir():
            checkpoints.append(item.path)

    checkpoint_dirs = []
    for item in checkpoints:
        for entry in os.scandir(item):
            checkpoint_dirs.append(entry.path)

    with open(target_path, "w") as f:
        for i in checkpoint_dirs:
            path = i.replace(cpt_path, "")
            name = path.replace('/', "_", 1)
            print("{} {} 0 0 20 20".format(name, path), file=f)


def generate_result_list(base_path, times, ids):
    result_list = []

    for i, j, k in product(range(ids[0], times[0]), range(ids[1], times[1]),
                           range(ids[2], times[2])):
        cluster = f"cluster"
        profiling = f"profiling"
        checkpoint = f"checkpoint"
        result_list.append({
            "cl_res": os.path.join(base_path, "result", cluster),
            "profiling_log": os.path.join(base_path, "logs", profiling),
            "checkpoint_path": os.path.join(base_path, "result", checkpoint),
            "json_path": os.path.join(base_path, "result", checkpoint, f"{cluster}.json"),
            "list_path": os.path.join(base_path, "result", checkpoint, "checkpoint.lst"),
        })

    print("Result list:")
    print(json.dumps(result_list, indent=2, separators=(",", ": ")))
    return result_list



def dump_result(base_path, spec_app_list, times, ids):
    result_list = generate_result_list(base_path, times, ids)

    for result in result_list:
        per_checkpoint_generate_json(result["profiling_log"], result["cl_res"],
                                     spec_app_list, result["json_path"])
        per_checkpoint_generate_worklist(result["checkpoint_path"],
                                         result["list_path"])


# NOTE: should be same with 07-checkpoint-env.sh
spec_list=["stream_100000"]
base_path = os.path.join(os.getcwd(), "07-checkpoint")
times = [1, 1, 1]
ids = [0, 0, 0]

dump_result(base_path, spec_list, times, ids)