In [6]:
#!/usr/bin/env python3

import os
import json
import shutil
import subprocess


目前：对于RTOS的WCET静态分析方法已有较多的研究成果，但是这些分析工具大部分仅支持单核系统。LLVMTA利用LLVM IR进行指令级时序和缓存行为的分析。
- gcc也是驱动程序，由它驱动将cc、as、ld等程序；
- LLVM（*.ll，*.bc）类似gcc，有自己的优化器;需要用户控制输入/输出；

TAM是基于LLVM的多核实时系统WCET静态分析工具。


In [None]:
# ROOT_DIR="/workspaces/llvmta/testcases/singletest"
# for _f in os.listdir(ROOT_DIR):
#     _t = os.path.join(ROOT_DIR, _f)
#     if os.path.exists(os.path.join(_t, 'ChangeLog.txt')):
#         print(_t)
#         subprocess.run(["python3",
#                         os.path.join(ABS_PATH, "run.py"), 
#                         f"-C{_t}",
#                         f"-H{_t}",
#                         f"-W{os.path.join(_t, 'work')}",
#                         ])
#         # result = subprocess.run(["python", "script.py"], capture_output=True, text=True)

# S0. Set Environment Variables

In [None]:
# os.path.dirname(os.path.abspath(__file__) )
with open('/home/fyj/Desktop/data/WCET_Tool/_OSData.json', 'r') as _f:
# with open('/home/fyj/Desktop/data/WCET_Tool/_TacleData.json', 'r') as _f:
    for _i, _ds in json.load(_f).items():
        print(f"{_i}")
        for _d in _ds:
            print(f"\t{_d}")

## 0.1.1 tacle-bench

In [None]:
PROJECTTYPE='app'
PROJECTNAME='lift'

TACLE_BENCH_DIR='/home/fyj/Desktop/data/1_WCETSpace/tacle-bench'

FILE_DIR=os.path.join(TACLE_BENCH_DIR, 'bench.json')
SOUR_DIR=os.path.join(TACLE_BENCH_DIR, 'bench', PROJECTTYPE, PROJECTNAME)

def __path_code_ana(_s, _d):
    if _d:
        return ' '.join(os.path.join(_s, __dx) for __dx in _d) 
    else: 
        return _s

def __path_head_ana(_h, _d):
    if _d:
        return ' '.join(os.path.join(_h, __dx) for __dx in _d) 
    else: 
        return _h

with open(FILE_DIR, 'r') as _f:
    _d = json.load(_f)
    # 1. code addr
    CODE_DIRS = __path_code_ana(SOUR_DIR, _d[PROJECTTYPE][PROJECTNAME]['c'].split())

    # 2. head addr
    HEAD_DIRS=__path_head_ana(SOUR_DIR, _d[PROJECTTYPE][PROJECTNAME]['h'].split())

## 0.1.2 Nuttx 

In [23]:
# "/home/fyj/Desktop/data/WCET_Tool/temp_dir
# "/home/fyj/Desktop/data/WCET_Tool/pthread_create
# NUTTX_HOME="/home/fyj/Desktop/data/1_WCETSpace/nuttxspace"
NUTTX_HOME="/home/fyj/Desktop/data/1_WCETSpace/vela-opensource"

# 1. code addr
CODE_FILE = [
    os.path.join(NUTTX_HOME, "nuttx/sched/pthread/pthread_create.c"),
]

# 2. head addr
HEAD_DIRS = [
    os.path.join(NUTTX_HOME, "nuttx/sched"),
    #   task/task.h
    os.path.join(NUTTX_HOME, "nuttx/include"),
    #   string.h
    #   stdbool.h
    #   sys/types.h
    #   nuttx/config.h
    # os.path.join(NUTTX_HOME, "nuttx/sched/pthread"),
    # os.path.join(NUTTX_HOME, "nuttx/arch/xtensa/include")
]

## 0.2 Work Path

In [None]:
# 1. work path
WORK_DIR="/home/fyj/Desktop/data/WCET_Tool/WorkSpace"
if os.path.exists(WORK_DIR):
    shutil.rmtree(WORK_DIR)
os.mkdir(WORK_DIR)

# 2. comp path
COMP_DIR=os.path.join(WORK_DIR, "Compile")
if os.path.exists(COMP_DIR):
    shutil.rmtree(COMP_DIR)
os.mkdir(COMP_DIR)

# 3. anal path
ANAL_DIR=os.path.join(WORK_DIR, "Analysis")
if os.path.exists(ANAL_DIR):
    shutil.rmtree(ANAL_DIR)
os.mkdir(ANAL_DIR)

# 4. entry point
# ENTRYPOINT="main"
# ENTRYPOINT="pthread_start"
ENTRYPOINT="nx_pthread_create"

# 5. Core data
COREINFORMATION=os.path.join(WORK_DIR, "CoreInfo.json")

print(f"WORK_DIR:\n\t{WORK_DIR}")
print(f"COMP_DIR:\n\t{COMP_DIR}")
print(f"ANAL_DIR:\n\t{ANAL_DIR}")
print(f"CODE_FILE:\n\t{CODE_FILE}")
# print(f"CODE_DIRS:{'\n\t'.join(CODE_DIRS.split())}")
print(f"HEAD_DIRS:\n\t{HEAD_DIRS}")
# print(f"HEAD_DIRS:{'\n\t'.join(HEAD_DIRS.split())}")

print(f"ENTRYPOINT:\n\t{ENTRYPOINT}")
print(f"COREINFORMATION:\n\t{COREINFORMATION}")

## 0.3 CoreInformation

In [22]:
with open(COREINFORMATION, 'w') as _f:
    json.dump([
        {
            "core": 0,
            "tasks": [
                {
                    "function": ENTRYPOINT
                }
            ]
        }
    ], _f, indent=4)


# S1. Basic Compile(生成IR)

## 1. clang compile:
- *.c -> *.ll

In [None]:
CLANG_INSTRUCT=[
    # (1) instruct;
    "clang",
    "-w",                          # 禁止所有警告信息。
    "-S",                          # clang生成汇编代码（.s 文件而不是可执行文件），结合-emit-llvm选项会生成LLVM IR文件（.ll 文件）。
    "-gline-tables-only",          # 生成最小的调试信息：生成仅包含行号信息的调试信息（最小的调试信息），仅包含源代码行号映射，而不包含完整的调试信息。
    "-O0",                         # 禁用优化（生成未优化的代码，便于调试）。
    "-Xclang -disable-O0-optnone", # 用于在-O0级禁用所有优化（clang前端的选项）。
    "-fno-builtin",                # 禁用内置函数优化。编译器不使用内置函数（如__builtin_memcpy等）优化，仅使用标准库函数。            
    # "-target arm",                 # 指定目标架构(arm / aarch64-linux-gnu / -target=riscv64 )，告诉编译器生成针对ARM架构的代码。
    # "-march=armv4t",               # 指定目标架构版本为(-march=armv8/armv4t/rv64gc/armv7-a)。这会影响生成的代码的指令集和特性。
    "-target aarch64", 
    "-march=armv8-a", 
    # -mcpu=cortex-a7 / arm7tdmi
    "-mfloat-abi=hard",            # 使用硬(软)浮点ABI，浮点运算将直接使用硬(软)件浮点指令，而不是软件浮点。
    # "-D__AVR__ -D__AVR_ATmega128__"
    "-emit-llvm",                  # 生成LLVM中间表示(IR)文件。生成LLVM中间表示（IR）文件。这个选项告诉clang生成.ll 文件，而不是汇编/可执行文件。
    # "-fno-discard-value-names",
    
    # (2) code addr;
    ' '.join(CODE_FILE),
    # ' '.join(f"{_cd}/*.c" for _cd in CODE_DIRS)
    # for _sd in CODE_DIRS.split(","): if not Path(_sd).exists() and not Path(_sd).is_dir():

    # (3) head addr;
    ' '.join(f"-I {_hd}" for _hd in HEAD_DIRS),
    # for _hd in HEAD_DIRS.split(","): if not Path(_hd).exists() and not Path(_hd).is_dir():

    # (4) no print;
    # "> /dev/null 2>&1"
]

print('\n\t '.join(CLANG_INSTRUCT))

clang
	 -w
	 -S
	 -gline-tables-only
	 -O0
	 -Xclang -disable-O0-optnone
	 -fno-builtin
	 -target aarch64
	 -march=armv8-a
	 -mfloat-abi=hard
	 -emit-llvm
	 /home/fyj/Desktop/data/1_WCETSpace/nuttxspace/nuttx/sched/pthread/pthread_create.c
	 -I /home/fyj/Desktop/data/1_WCETSpace/nuttxspace/nuttx/sched -I /home/fyj/Desktop/data/1_WCETSpace/nuttxspace/nuttx/include


In [34]:
os.chdir(COMP_DIR)
if os.system(' '.join(CLANG_INSTRUCT)) != 0:
    exit(1)

## 2. llvm link(链接): 
llvm-link 把多个 .ll 合并 [llvm-link *.ll s → *unoptimized.ll]

- 输ru项:
    - $(ls ./*.ll)

- 输出项:
    - -o optimized.ll: 指定输出文件的名称为 optimized.ll。


In [35]:
os.chdir(COMP_DIR)
if os.system("llvm-link *.ll -o unoptimized.ll") != 0:
    exit(1)

## 3. opt 优化:
- → optimized.ll

### 循环展开优化（Loop Unrolling）

对于固定次数的**`do-while`**循环，可通过**部分展开**消除循环结构：

- **编译选项**：使用**`funroll-loops`**或**`#pragma unroll`**指令触发循环展开。
- **效果**：将循环体复制多次并减少迭代次数，降低LLVMTA的分析复杂度。

**利用LLVM Pass优化循环结构——执行优化Pass（LoopRotate ）**LLVM内置的Pass可自动调整循环结构：

**循环旋转（Loop Rotation）：**把do-while转换成更规范的while，将判断条件前移，利于后续优化和分析。

- **`LoopRotate Pass`**：将**`do-while`**转换为更规范的循环形式。例如，通过循环旋转**（Loop Rotation）**将条件判断移至循环体前，生成类似**`while`**的结构。


In [36]:
OPT_INSTRUCT = [
    "opt",              # opt是LLVM的优化工具，用于对LLVM中间表示（IR）文件进行各种优化。它可以加载和运行LLVM Passes（优化模块）。
    # (1) 输入项;
    "-S unoptimized.ll",   # 输入一个未优化的LLVM模块文件。unoptimized.ll为一个未优化的LLVM模块文件；

    # (2) 优化选项
    # "-S",               # opt输出优化后的结果为LLVM IR文件（.ll 文件），而不是默认的二进制.bc文件。
    # "-lowerswitch",
    "-mem2reg",         # 将内存访问转换为寄存器访问          这个Pass会将alloca指令（堆栈分配）转换为寄存器，从而提高代码的性能。
    "-indvars",         # 优化循环中的归纳变量               这个Pass会简化循环中的变量计算，减少冗余操作。
    "-loop-simplify",   # 简化循环结构(LoopSimplify Pass)： 这个Pass会将复杂的循环结构转换为更简单的形式，便于后续优化。# 其插入Preheader、单一Backedge、唯一Exit Block，消除异常控制流，使循环结构标准化，便于Loop Pass应用。分离循环头（Header）和入口块（Preheader），消除复杂的控制流分支，提升LLVMTA的可分析性。
    "-instcombine",     # 合并冗余指令                      这个Pass会查找并合并重复或冗余的指令，减少指令数量。
    "-globaldce",       # 全局死代码消除                    这个Pass会移除未使用的全局变量和函数。
    "-dce",             # 死代码消除                       这个Pass会移除未使用的指令。
    # "-dot-cfg",         # 生成控制流图（CFG）的 .dot 文件。每个函数的控制流图将保存为一个 .dot 文件，例如 .main.dot。和LLVMTA分析功能无关。去掉它不会影响分析结果。
    # "-licm",
    # "-loop-rotate",
    # "-loop-unroll",
    # "-unroll-loops",   # 启用循环展开（Loop Unrolling）优化，默认依据上面-unroll-count值控制展开。
    # "-unroll-count=2", # 设置循环展开的次数（手动指定为 2 次），即循环体被复制两遍。
    # "-verify",

    # (3) 输出项
    "-o optimized.ll",  # 指定输出文件的名称为 optimized.ll。

    # (4) no print;
    # "> /dev/null 2>&1"
]

print(' '.join(OPT_INSTRUCT))

opt -S unoptimized.ll -mem2reg -indvars -loop-simplify -instcombine -globaldce -dce -o optimized.ll


In [37]:
os.chdir(COMP_DIR)
if os.system(' '.join(OPT_INSTRUCT)) != 0:
    exit(1)

## 4. 输出CFG图:
%0 = call i32 asm sideeffect "mov $0, sp\0A", "=r"() #4, !dbg !32, !srcloc !33

In [None]:
# dot -Tpng ."$entrypoint".dot -o "$entrypoint".png
os.chdir(COMP_DIR)
if os.system(f"dot -Tpng .{ENTRYPOINT}.dot -o {ENTRYPOINT}.png") != 0:
    exit(1)

Error: dot: can't open .nx_pthread_create.dot


## 5. Vertify 编译检测；

In [None]:
# llc optimized.ll -filetype=obj -o optimized.o
os.chdir(COMP_DIR)
if os.system(f"llc optimized.ll -filetype=obj -o optimized.o") != 0:
    exit(1)

# S2. Analysis(分析)

测试代码readme：

- regressionTest：Check whether a change in llvmta has changed the analysis results on any testcase (regression)；

- checkForAsserts：Check whether llvmta crashed on any of the testcases；

- cleanBuildDirs：Remove all (temporary) result files that a llvmta-run can produce；

- eval：**Runs the evaluation for all testcases for the given parameters setting

- determineNeededCtx：A util script that helps to determine the minimal context sensitivity needed to precisely keep track of the stack pointer in the value analysis of LLVMTA;

- Benchmarks:Contains all our benchmarks, ordered into folders.

- runTestcase:Script for running a single testcase inside LLVMTA.

## 0. Initial:

In [None]:
# cp "$COMP_DIR/optimized.ll" "$ANAL_DIR/optimized.ll"
# shutil.copy('WCET.json', out_dir)
shutil.copy(os.path.join(COMP_DIR, "optimized.ll"), 
            os.path.join(ANAL_DIR, "optimized.ll"))

'/home/fyj/Desktop/data/WCET_Tool/WorkSpace/Analysis/optimized.ll'

## 1. parampter and instruct config

In [None]:
MEM_Latency=100     # 150

L1I_Assoc=8
L1I_Nsets=128       # 64
L1I_Linesize=64

L1D_Assoc=8
L1D_Nsets=128       # 64
L1D_Linesize=64

L2C_Assoc=8
L2C_Nsets=1024      # 64
L2C_Linesize=64

# --mem_latency 150
LLVMTA_INSTRUCT = [
    "llvmta",
    # (1) basice
    "-disable-tail-calls", 
    "-float-abi=hard", 
    # "-march=armv8-a",
    "-mattr=-neon,+vfp2", 
    "-O0",
    # "-O0-disable-O0-optnone",
    # "-mcpu=arm7tdmi"

    # 一、 LLVMTA General Options:
    # "--ta-ana-type=timing "                  # 1.***()
    #"--ta-anainfo-policy=precompall "         # 2.
    f"--ta-analysis-entry-point={ENTRYPOINT}", # 3.  Run WCET Analysis itself 针对每个入口函数:
    #"--ta-comp-type= "                        # 4.  会报错放弃  #  icache,dcache,dramrefresh,sharedbusblocking
    #"--ta-compana-joint-ilp=true "            # 5.  "--ta-compana-joint-ilp "
    #"--ta-enable-fixpoint-checks=true " # 6.
    #"--ta-enable-muarchjoin=true "      # 7.
    #"--ta-follow-localwc=<value> "      # 8.  选择要专门跟踪的本地最坏情况
    #"--ta-metric-wcep=<value> "         # 9.  在最坏情况时间路径上需要最大化的指标
    #"--ta-metric-max=<value> "          # 10. 除时间外需要最大化的指标
    #"--ta-pathana-type=<value> "        # 11. 选择路径分析类型，默认为'simpleilp'（简单ILP解决方案）
    # "--ta-pathana-type=graphilp",
    # "--ta-quiet "                        # 12. 静默模式(不报告进度，不转储详细分析信息) 
    #"--ta-restart-after-external"       # 13. 外部函数调用后重置微架构状态(默认false)
    #"--ta-start-address-code=<uint>"    # 14. 二进制文件中代码段的起始地址
    "--ta-strict=false",                 # 15. 启用严格模式(遇到未知情况时断言，默认true， true可能报错)；
    #"--ta-ucb-clever=true"              # 16. 启用更精确处理空间局部性的 UCB 模式 (默认false)；
    # 1. 求解器相关；
    # "--ta-lpsolver=gurobi",              # 1.1 路径分析中的LP求解器选择
    "--ta-lpsolver-effort=maximal",      # 1.2 LP求解器求解精度
    # 2. 输出文件：
    # --ta-dumpb-vcg-graph                    # 2.1 以VCG格式转储状态图(默认为.dot格式)
    # --ta-output-unknown-extfuncs            # 2.2 输出外部函数注解模板文件(默认false)
    # --ta-output-unknown-loops               # 2.3 输出循环注解文件(默认false)
    # 3. 输入文件：
    # --ta-loop-bounds-file=<string>          # 3.1.包含循环界限的文件路径
    # --ta-loop-lowerbounds-file=<string>     # 3.2.包含循环下界的文件路径
    # --ta-extfunc-annotation-file=<string>   # 3.3.外部函数注解文件路径
    # 4. Cache的持久性配置: elementwise; conditionalmust;
    "--ta-icache-persistence=conditionalmust",
    "--ta-dcache-persistence=conditionalmust",
    "--ta-l2cache-persistence=conditionalmust",
    # "--ta-l2cache-persistence=elementwise",
    # 5. 抢占：
    # --ta-preemption-dcache-budget=<uint>              # 5.1 抢占可能导致的数据缓存未命中额外数量(默认0)
    # --ta-preemption-icache-budget=<uint>              # 5.2 抢占可能导致的指令缓存未命中额外数量(默认0)
    # --ta-preemptive                                   # 5.3 被分析程序是否可被抢占(默认false)
    # --ta-calculate-slope-interference-curve=<value>   # 5.4. 计算干扰响应曲线的线性近似斜率
    # --ta-compositional-base-bound=<value>             # 5.5.启用组合基础界限计算
    # --ta-dump-interference-response-curve=<value>     # 5.6.转储干扰响应曲线
    # 6. 寄存器地址：
    # --ta-initial-link-register=<uint>                 # 6.1 初始链接寄存器值(默认0x00000000)
    # --ta-initial-stack-pointer=<uint>                 # 6.2 初始栈指针(默认0x08000000)

    # 二、 Context Sensitivity Options:
    # --ta-loop-peel=<uint>                             # 1. 指定在跟踪分区期间要区分的初始循环迭代次数
    # --ta-num-callee-tokens=<int>                      # 2. 调用时区分上下文的被调用方令牌最大数量
    "--ta-num-callsite-tokens=3",                       # 3. 调用时区分上下文的调用点令牌最大数量   # 指定调用点令牌的数量。 这影响分析工具如何处理函数调用。
    # --ta-num-loop-tokens=<int>                        # 4. 循环相关令牌的最大数量

    # 三、 Hardware Platform Configuration Options:
    # 1. L2缓存延迟(默认4，传输单个字需要5周期)
    f"--ta-mem-latency={MEM_Latency}",
    # --ta-L2-latency=<uint>
    # --ta-bgmem-type=<value>         # “sram，simpledram”
    # --ta-dram-refresh-latency=<uint>
    # --ta-mem-per-word-latency=<uint>
    # 2. 指定内存类型为分离缓存（separate caches）。 这影响分析工具如何模拟内存访问。
    "--ta-memory-type=separatecaches",    # none，single，separatecaches，priv-instr-spm-data-shared
    # 3. 指定微架构类型为顺序执行（inorder/outoforder）。 这影响分析工具如何模拟指令的执行顺序。 
    "--ta-muarch-type=outoforder",        # fixedlatency，pret，inorder，strictlyinorder，outoforder(inorder/outoforder)
    # --ta-num-concurrent-cores=<uint>                      # none，roundRobin
    # --ta-stall-on=<value>                                 # dramrefresh，busblocking
    # 4. 指定调用点令牌的数量。 这影响分析工具如何处理函数调用。
    "--ta-unblock-stores=true",

    # 四、 Cache Configuration:
    "--ta-dcache-write-back=true", 
    "--ta-dcache-write-allocate=true", 
    # f"--ta-Dcache-latency={args.l1_latency}",
    # --ta-dcache-replpol=<value>     # lru，fifo，alwayshit，alwaysmiss
    f"--ta-dcache-assoc={L1D_Assoc}",
    f"--ta-dcache-nsets={L1D_Nsets}",
    f"--ta-dcache-linesize={L1D_Linesize}",
    # f"--ta-Icache-latency={args.l1_latency}",
    # --ta-icache-replpol=<value>     # lru，fifo，alwayshit，alwaysmiss
    f"--ta-icache-assoc={L1I_Assoc}",
    f"--ta-icache-nsets={L1I_Nsets}",
    f"--ta-icache-linesize={L1I_Linesize}",
    # f"--ta-L2-latency={args.l2_latency}",
    # --ta-l2cache-replpol=<value>    # lru，fifo，alwayshit，alwaysmiss
    f"--ta-l2cache-assoc={L2C_Assoc}",
    f"--ta-l2cache-nsets={L2C_Nsets}",
    f"--ta-l2cache-linesize={L2C_Linesize}",

    # 五、Writeback Analysis:
    # --ta-assume-clean-cache 
    # --ta-dfs-persistence
    # --ta-dirtiness-analysis
    # --ta-statically-refute-writebacks
    # --ta-wb-bound=<value>             # none，store，dfs

    # 六、Array-aware Cache Analysis:
    "--ta-array-analysis=True",
    # --ta-array-must=<value>           # none,conflict-set-intersect,conflict-set-union,conflict-powerset 
    # --ta-array-persis=<value>         # none,setwise

    # 七、Multi-Core Corunner-sensitive Analysis:
    # --ta-access-cycles-joinable-partition-size=<uint>                     - Size of the partitions that are used to decide if two upper bounds on the number of access cycles should be joined. Only has an effect for the arrival curve calculation. By default, all bounds are joined. A value of 1 means only join identical bounds. A value of X means only join bounds that result in the same number if they are both integer divided by X.
    # --ta-arrival-curve-calculation-method=<value>                         - Method used for the calculation of the arrival curve values used in co-runner-sensitive analysis. (default 'ilpbased')
    #   =programGranularity                                                 -   Based on a minimum period and an upper bound on the number of accesses per program run. (potentially less precise)
    #   =ilpbased                                                           -   Based on an ILP implicit path enumeration.
    # --ta-arrival-curve-call-site-get-inner-edges-method=<value>           - Which method to use to detect the inner edges of call sites in the calculation of values on an arrival curve. (default 'insideProgramRuns')
    #   =all                                                                -   all
    #   =insideProgramRuns                                                  -   insideProgramRuns
    #   =betweenInOutReachableSimple                                        -   betweenInOutReachableSimple
    #   =betweenInOutReachableSimplePlus                                    -   betweenInOutReachableSimplePlus
    #   =betweenInOutReachableDetailed                                      -   betweenInOutReachableDetailed
    # --ta-arrival-curve-ilp-objective=<value>                              - Use one of two orthogonal objectives. Or use a combined version that is at least as precise as each variant, but potentially more precise than each one. (default 'variant1')
    #   =variant1                                                           -   Sum up upper bound on event occurrences for each edge.
    #   =variant2                                                           -   Sum up upper bound on event occurrences for each edge that is not first or the last of the path. For every remaining clock cycle of the time window, assume an event.
    #   =combined                                                           -   For each ILP valuation, use the minimum of above two variants as objective.
    # --ta-arrival-curve-ilp-time-limit=<number>                            - The time limit in seconds for one arrival curve value calculation via ILP. 0.0 is the default value. 0.0 means there is no time limit. Note that we strongly recommend using a time limit for the calculation of arrival curve values as it is significantly more complex than the calculation of per-execution-run event bounds.
    # --ta-arrival-curve-loop-get-inner-edges-method=<value>                - Which method to use to detect the inner edges of loops in the calculation of values on an arrival curve. (default 'insideProgramRuns')
    #   =all                                                                -   all
    #   =insideProgramRuns                                                  -   insideProgramRuns
    #   =betweenInOutReachableSimple                                        -   betweenInOutReachableSimple
    #   =betweenInOutReachableSimplePlus                                    -   betweenInOutReachableSimplePlus
    #   =betweenInOutReachableDetailed                                      -   betweenInOutReachableDetailed
    # --ta-blocking-joinable-partition-size=<uint>                          - Size of the partitions that are used to decide if two lower blocking bounds should be joined. Only has an effect in combination with -ta-shared-memory-blocking-type=roundrobin+UBconcurrentaccesscycles. By default, all lower blocking bounds are joined. A value of 1 means only join identical bounds. A value of X means only join bounds that result in the same number if they are both integer divided by X.
    # --ta-co-runner-sensitive                                              - Enables the co-runner-sensitive analysis. By default, is is disabled (false).
    # --ta-co-runner-sensitive-analysis-fixed-point-type=<value>            - Determines which type of fixed point the iterative co-runner-sensitive analysis pursues (default 'greatest')
    #   =least                                                              -   Obtain least fixed point.
    #   =greatest                                                           -   Obtain greatest fixed point.
    # --ta-co-runner-sensitive-dumped-blocked-cycles-scaled-to-accesses     - Dump the impact of the number of concurrently interfering accesses on the WCET bound instead of the concurrently interfering access cycles (default false).
    # --ta-co-runner-sensitive-no-arrival-curve-values                      - Do not perform an arrival curve value calculation in co-runner-sensitive analysis. (default false)
    # --ta-co-runner-sensitive-no-wcet-bound                                - Do not perform a WCET bound calculation in co-runner-sensitive analysis. A value true is not allowed if a relative period is specified. (default false)
    # --ta-conc-acc-cyc-bounds=<string>                                     - Expects a list of natural numbers with semicolon as delimeter. The number of elements in the list must be a multiple of -ta-num-concurrent-cores and must not be 0.
    # --ta-program-period=<uint>                                            - How many cycles at least have to pass between two starts of the program under a periodic scheduling. The default value 0 means that no periodic scheduling is assumed to lower bound the time between two program starts.
    # --ta-program-period-rel=<number>                                      - value >= 1 means period is value * wcetAssumingMaxInterference. value < 1 means period is wcetAssumingNoInterference + value * (wcetAssumingMaxInterference - wcetAssumingNoInterference). Negative factor values are OK. If the resulting absolute period would be negative, it is assumed as 0. If the relative period is specified, it overwrites a possible absolute period.
    # --ta-program-period-rel-eval-wrt-wcet-ignoring-interference           - If this is true, the relative period is multiplied with a wcet bound ignoring the interference to obtain the absolute period.
    # --ta-program-period-subpath-method                                    - Use the subpath method for more precision when taking into account the program period.
    # --ta-until-iteration-measurement=<int>                                - Perform dedicated measurements from the start until the end of the iteration for as many iterations as this parameter states. Note that there is also an iteration 0. A negative value disables this dedicated measurements. This parameter is ignored in co-runner-insensitive analysis.

    # 八、 TODo:
    # --Timing anomaly analysis, state splitting                            - (default 'T')
    # --core=<uint>                                                         - The core for the analysis (default '0')
    # --parallel-programs                                                   - Parallel Program Analysis
    # --ta-multicore-type=<value> # :多核分析;：liangy、none
    #"--ta-multicore-type=none "
    f"--core-info={COREINFORMATION}",
    f"--core-numbers=1",
    "--shared-cache-persistence-analysis=false",

    # (*)
    "-debug-only=", 
    # -debug-only=all 
    # --debug-entry-values
    
    # (1) input file
    "optimized.ll",
]

print('\t\n '.join(LLVMTA_INSTRUCT))

## 2 Output File;

### (a) 外部函数信息(extfuncs)
使用llvmta分析外部函数信息。
输出外部函数注解模板文件(默认false)。
--ta-output-unknown-extfuncs

In [None]:
# llvmta $LLVMTA_INSTRUCT --ta-output-unknown-extfuncs=true optimized.ll > /dev/null 2>&1
os.chdir(ANAL_DIR)
if os.system(' '.join(LLVMTA_INSTRUCT + ["--ta-output-unknown-extfuncs=true", 
                                         # "> /dev/null 2>&1"
                                         ])) != 0:
    exit(1)

Unknown arch: 3
llvmta: /home/fyj/Desktop/data/1_WCETSpace/TAM/lib/LLVMPasses/AsmDumpAndCheckPass.cpp:53: TimingAnalysisPass::AsmDumpAndCheckPass::AsmDumpAndCheckPass(llvm::TargetMachine&): Assertion `0 && "not implemented"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: llvmta -disable-tail-calls -float-abi=hard -O0 --ta-analysis-entry-point=nx_pthread_create --ta-strict=false --ta-lpsolver-effort=maximal --ta-icache-persistence=conditionalmust --ta-dcache-persistence=conditionalmust --ta-l2cache-persistence=conditionalmust --ta-num-callsite-tokens=3 --ta-mem-latency=100 --ta-memory-type=separatecaches --ta-muarch-type=outoforder --ta-unblock-stores=true --ta-dcache-write-back=true --ta-dcache-write-allocate=true --ta-dcache-assoc=8 --ta-dcache-nsets=128 --ta-dcache-linesize=64 --ta-icache-assoc=8 --ta-icache-nsets=128 --ta-icache-linesize=64 --ta-l2cache-assoc=8 --ta-l2cache-nsets=1

### (b) 循环信息(loops)
使用llvmta分析循环信息。
输出循环注解文件(默认false)。
--ta-output-unknown-loops

In [None]:
# llvmta $LLVMTA_INSTRUCT --ta-output-unknown-loops=true optimized.ll > /dev/null 2>&1
os.chdir(ANAL_DIR)
if os.system(' '.join(LLVMTA_INSTRUCT + ["--ta-output-unknown-loops", 
                                         # "> /dev/null 2>&1"
                                         ])) != 0:
    exit(1)

Unknown arch: 3
llvmta: /home/fyj/Desktop/data/1_WCETSpace/TAM/lib/LLVMPasses/AsmDumpAndCheckPass.cpp:53: TimingAnalysisPass::AsmDumpAndCheckPass::AsmDumpAndCheckPass(llvm::TargetMachine&): Assertion `0 && "not implemented"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: llvmta -disable-tail-calls -float-abi=hard -O0 --ta-analysis-entry-point=pthread_start --ta-strict=false --ta-lpsolver-effort=maximal --ta-icache-persistence=conditionalmust --ta-dcache-persistence=conditionalmust --ta-l2cache-persistence=conditionalmust --ta-num-callsite-tokens=3 --ta-mem-latency=100 --ta-memory-type=separatecaches --ta-muarch-type=outoforder --ta-unblock-stores=true --ta-dcache-write-back=true --ta-dcache-write-allocate=true --ta-dcache-assoc=8 --ta-dcache-nsets=128 --ta-dcache-linesize=64 --ta-icache-assoc=8 --ta-icache-nsets=128 --ta-icache-linesize=64 --ta-l2cache-assoc=8 --ta-l2cache-nsets=1024 

### (c) vcg

In [None]:
# llvmta $LLVMTA_INSTRUCT --ta-dumpb-vcg-graph optimized.ll > /dev/null 2>&1
# if os.system(' '.join(_llvm_comm + ["--ta-dumpb-vcg-graph", "optimized.ll", "> /dev/null 2>&1"])) != 0:
#     exit(1)

## 3. Loop Bound Update;

In [None]:
# cp -f LoopAnnotations.csv ${SOUR_DIR}
# pushd "$SOUR_DIR" > /dev/null
#     cp -f LoopAnnotations.csv LLoopAnnotations.csv
#     python3 /workspaces/llvmta/testcases/quickGetBound.py -s "./"   # > /dev/null 2>&1
#     mv -f LoopAnnotations.csv $ANAL_DIR
#     mv -f LLoopAnnotations.csv $ANAL_DIR
# popd > /dev/null

shutil.copy(os.path.join(ANAL_DIR, "LoopAnnotations.csv"), 
            # os.path.join(CODE_DIRS[0], "LoopAnnotations.csv"))
            os.path.join(CODE_DIRS.split()[0], "LoopAnnotations.csv"))

os.chdir(CODE_DIRS.split()[0])
shutil.copy("LoopAnnotations.csv", "LLoopAnnotations.csv")
subprocess.run(["python3", 
                "/home/fyj/Desktop/data/WCET_Tool/quickGetBound.py", 
                # "/workspaces/llvmta/testcases/quickGetBound.py", 
                f"-s./" ])

# p = subprocess.Popen(['python3', 'quickGetBound.py', '-s', f'{str(src_dir)}',], 
#                         env = os.environ.copy(), cwd = pwd,
#                         stdout = subprocess.PIPE,
#                         stderr = subprocess.PIPE)
# p.wait()

# shutil.move("LoopAnnotations.csv", os.path.join(ANAL_DIR, "LoopAnnotations.csv"))
# shutil.move("LLoopAnnotations.csv", os.path.join(ANAL_DIR, "LLoopAnnotations.csv"))

# shutil.copy('LoopAnnotations.csv', out_dir / 'LoopAnnotations.csv')
# shutil.copy('LoopAnnotations.csv', 'LLoopAnnotations.csv')

# with open('LoopAnnotations.csv', 'r') as f:
#     data = f.readlines()
#     output_data = list()
#     for i in range(len(data)):
#         if i != 0 and data[i].startswith('#'):
#             continue
#         output_data.append(data[i])

# with open('LoopAnnotations.csv', 'w') as f:
#     f.writelines(output_data)

# shutil.copy('LoopAnnotations.csv', src_dir / 'LoopAnnotations.csv')
# shutil.copy('LoopAnnotations.csv', src_dir / 'LLoopAnnotations.csv')
# shutil.copy('LoopAnnotations.csv', out_dir / 'LoopAnnotations.csv')
# shutil.copy('LoopAnnotations.csv', out_dir / 'LLoopAnnotations.csv')

# with open('LLoopAnnotations.csv', 'r') as f:
#   data = f.readlines()

python3: can't open file '/workspaces/llvmta/testcases/quickGetBound.py': [Errno 2] No such file or directory


CompletedProcess(args=['python3', '/workspaces/llvmta/testcases/quickGetBound.py', '-s./'], returncode=2)

## 4. WCET Analysis

In [None]:
# bound=$(llvmta $LLVMTA_INSTRUCT \
#     --ta-loop-bounds-file=LoopAnnotations.csv \
#     --ta-loop-lowerbounds-file=LLoopAnnotations.csv \
#     --ta-restart-after-external \
#     --ta-extfunc-annotation-file=ExtFuncAnnotations.csv \
#     -debug-only= \
#     optimized.ll \
# 2>&1 | grep "Calculated Timing Bound" | awk '{print $NF}')
# echo "Bound:$bound"
os.chdir(ANAL_DIR)
if os.system(' '.join(LLVMTA_INSTRUCT + ["--ta-loop-bounds-file=LoopAnnotations.csv",
                                         "--ta-loop-lowerbounds-file=LLoopAnnotations.csv",
                                         "--ta-restart-after-external",
                                         "--ta-extfunc-annotation-file=ExtFuncAnnotations.csv"])) != 0:
    exit(1)


## 5. 输出AEG图

In [None]:
# sed 's/|->/maps to/g' StateGraph_Time.dot > AEG.dot
# dot -Tpng AEG.dot -o output.png
# dot -Tpng StateGraph_Time.dot -o output.png    # _BUFFX

# S3. 总编译

编译与执行可执行文件

./S3_TCompile.sh $PROJECT 
./run.sh \
  /workspaces/llvmta/workspace/test \
  /workspaces/llvmta/workspace/test/code \
  /workspaces/llvmta/workspace/test/head

In [None]:
# (1) .ll转成 .bc
# llvm-as optimized.ll -o /dev/null
# llvm-as optimized.ll -o optimized.bc

# (2) .bc 编译成 .o
# 指定 ARM 架构
# llc                                               optimized.bc -filetype=obj -o optimized.o
# llc -march=arm                    -float-abi=hard optimized.bc -filetype=obj -o optimized.o
# llc -march=arm -mattr=-neon,+vfp2 -float-abi=hard optimized.bc -filetype=obj -o optimized.o
# llc -march=arm -mattr=+v4t,+vfp2  -float-abi=hard optimized.bc -filetype=obj -o optimized.o

# (3) 链接.o → .elf.o 生成可执行文件
# arm-linux-gnueabihf-gcc optimized.o -static -o $last_part
# arm-linux-gnueabihf-gcc optimized.o -static -o /workspaces/llvmta/testcases/workspace/ret/optimized
# arm-linux-gnueabihf-gcc optimized.o -static -o /workspaces/llvmta/testcases/workspace/ret/$last_part

# arm-linux-gnueabihf-gcc optimized.o -static -o optimized
# arm-linux-gnueabihf-gcc optimized.o -static -o /workspaces/llvmta/testcases/workspace/ret/$PRO_NAME
# arm-linux-gnueabihf-gcc optimized.o         -o /workspaces/llvmta/testcases/workspace/ret/$PRO_NAME

# ld optimized.o -o optimized.elf   # 用交叉gcc，不用ld）
# aarch64-linux-gnu-gcc -O0 -fno-builtin "$SOUR_DIR"/*.c    -o /workspaces/llvmta/testcases/workspace/ret/$PRO_NAME
# aarch64-linux-gnu-gcc                         optimized.o -o /workspaces/llvmta/testcases/workspace/ret/$PRO_NAME
# clang --target=aarch64-linux-gnu -fuse-ld=lld optimized.o -o /workspaces/llvmta/testcases/workspace/ret/$PRO_NAME

# S4. 处理并输出到ll文件

In [None]:
# while read line; do
#     if [[ $line =~ ^[[:space:]]*%0 ]]; then
#         echo "\t$line"
#         IFS=', '
#         read -ra parts <<< "$line"
#         unset IFS
#         for item in "${parts[@]}"; do
#             echo "$item"
#             if [[ $item =~ ^!([0-9]+) ]]; then
#                 # echo "数字是："${BASH_REMATCH[1]}
#                 sed -i '/^'$item'/d' optimized.ll
#             fi
#         done
#     fi
# done < optimized.ll
# sed -i '/^[[:space:]]*%0/d' optimized.ll

# S5. 如果指定 --llvmta-build-mif：

Convert the compiled binary to a FPGA compatible .mif file    
    
    - 编译成 .s → .o

    - 链接生成 binary


In [None]:
# if [[ "$build_mif" = true ]]; then
#   First ensure that all the required compiled files exist
# 	ensure_mif_prereqs
# 	cp "$MIF_UTILS/init.o" "$MIF_UTILS/hex_to_mif/hex2mif" "$MIF_UTILS/minimal.ld" .
# 	arm-none-eabi-as "${llvmta_entry[0]}/optimized.s" -o optimized.o
# 	arm-none-eabi-ld -Tminimal.ld  init.o optimized.o -o final_binary -static -L/usr/lib/gcc/arm-none-eabi/7.2.0 -lgcc
# 	arm-none-eabi-objcopy -O ihex --reverse-bytes=4 final_binary final_binary.hex
# 	"$MIF_UTILS"/hex_to_mif/hex2mif final_binary
# fi

# S6. 执行结束后cleanup把WORKDIR移动为TESTCASE_DIR/build
测试可执行文件(Excution test)

In [None]:
# if [ -f a.out ]; then
#     $EXEC ./a.out &>/dev/null
#     RETURNVALUE=$(echo $?)
#     if [ $RETURNVALUE -eq 0 ]; then
#         printf "passed. \n"
#         ((PASS++))
#     else
#         printf "failed (wrong return value $RETURNVALUE). \n"
#         ((FAIL_EXEC++))
#     fi
# else
#     printf "failed (compiled with errors/warnings). \n"
#     ((FAIL_COMP++))
# fi