In [1]:
import re
import os
import numpy as np
import glob
from colorama import Fore, Back, Style
from tqdm import tqdm
import pandas as pd
from subprocess import Popen, PIPE, call, check_call
import datetime
import shutil
from datetime import datetime
import time

from help_functions import replace_gpr #user def
from lut import tests #user def 


## General defines, paths etc

In [2]:
transcript_path = os.environ['NOELV']
grlib_path = os.environ['GRLIB']

riscv_dv = "/home/jonathanjonsson/MasterThesis/riscv-dv/"
test_number = 3
print("Running the test: {}".format(tests[test_number]))
now = datetime.now()
current_time = now.strftime("%Y-%m-%d")

Running the test: riscv_loop_test


## Generate RISCV-DV tests and run OVPsim
- Tests that work: 0
- Tests that sometimes timeout but sometimes doesn't: 1, 2, 10
- Tests that fail due to add instruction after **mret**: 3, 4
- Tests that doesn't run at all: 5, 6 (Never enters program after running the bootloader)
- Test 7 seems to outright fail
- Test 8,9 is maybe not applicable since ebreak is handled differently between ovpSIM and NOEL-V

In [3]:
param = "python3 run.py --custom_target target/rv64_noelv/ --iss ovpsim --simulator riviera --isa rv64gc --mabi lp64 --test "+tests[test_number]+" --iterations 1"
process = Popen('cd '+riscv_dv+' && source ~/.bashrc && module load aldec/riviera/2021.10 &&'+param, shell=True, stdout=PIPE, stderr=PIPE)
#process.wait()
stdout, stderr = process.communicate()
timeout = False
error = stderr.decode('ascii')

if(error.find("You do not have a valid license to run Riviera-PRO") == -1):
    if(error.find("Timeout") == -1):
        print("No timeout")
        timeout = False
    else:
        print("OVPsim timed out")
        timeout = True
else:
    raise RuntimeError("License not found for Riviera PRO")

no_instr_timeout = 1000000
clock_period = 10 #ns
ipc_compensate = 1.2 #compensate with 20% extra simulation time in case ipc != 1

no_timeout_arg = ['run -all\n', 'quit\n', '\n']
timeout_arg = ['run '+str(int(no_instr_timeout*clock_period*ipc_compensate))+'ns\n', 'quit\n', '\n']

with open(grlib_path+"/bin/runvsim.do","w") as f:
    if(timeout):
        f.writelines(timeout_arg)
    else:
        f.writelines(no_timeout_arg)
f.close()
    

OVPsim timed out


## Run RTL simulation
### For now only consider tests generated today
- How do we break simulation if we have a timeout in OVPsim?
    - It seems like execution time scales quite well to number of instructions*clock_period, add 20% extra sim time to compensate for IPC differences

In [4]:
srec_tests = glob.glob(riscv_dv+"out_"+current_time+"/asm_test/"+tests[test_number]+"*.srec")
print(srec_tests)
shutil.copy(srec_tests[0], transcript_path+'ram.srec') #copy the current test to the sim dir

#launch sim
process = Popen('cd '+transcript_path+' && source ~/.bashrc && module add mentor/questasim/2021.3 && make sim-run', shell=True, stdout=PIPE, stderr=PIPE)
#process.wait()
stdout, stderr = process.communicate()
if(stdout.decode('ascii').find("# ** Failure: *** IU in error mode, simulation halted ***") == -1):
    print("RTL Simulation failed")

if not os.path.exists(riscv_dv+"out_"+current_time+"/rtl_log/"):
    os.mkdir(riscv_dv+"out_"+current_time+"/rtl_log/")
shutil.copy(transcript_path+"transcript", riscv_dv+"out_"+current_time+"/rtl_log/"+tests[test_number]+"_"+re.search("\d*(?=.srec)",str(srec_tests)).group(0)+".log")

['/home/jonathanjonsson/MasterThesis/riscv-dv/out_2022-02-23/asm_test/riscv_loop_test_0.srec']
RTL Simulation failed


'/home/jonathanjonsson/MasterThesis/riscv-dv/out_2022-02-23/rtl_log/riscv_loop_test_0.log'

## Parse RTL transcript and create rtl_log

In [5]:
lookup = re.compile("#\s*\d*\s*ns\s*:\sC\d I\d : \d*\s*\[\d\] @0x[0-9a-fA-F]{16} \(0x[0-9a-fA-F]{4,8}\)\s*.*")
match = 0
f = open(transcript_path+"transcript")
transcript = f.readlines()
f.close()

#regex to extract content relevant to rtl sim
error_re = re.compile("[eE]rror | [wW]arning | [fF]ailure")
pc_csr_status_tval_re = re.compile("[0-9a-fA-F]{16}|X{16}")
binary_re = re.compile("(?<=(\(0x))[a0-z9]{4,8}(?=\))")
instruction_re = re.compile("\w{2,8}(?=(\s\w{2,3},))|(\w*\.[a-z])|nop|mret|ecall|fence|unknown instruction|ebreak")
instr_str_re = re.compile("\w*\s\w*,\s\w*,\s-?\w*(?=\s*W)|\w*(?=\s*W)|\w*\s\w*,\s\w*(?=\s*W)") #ugly
gpr_re = re.compile("(?<=([a-z]{2}\s))\w{2,3}(?=,)")
op1_re = re.compile("(?<=,\s)\w{1,8}((?=,)|(?=\s*W))|0\(\w{2,3}\)") 
op2_re = re.compile("(\w*,\s\w*,\s-?\w*)") #use split to get op2, don't have a nicer regex atm
csr_re = re.compile("(?<=\[)\w{2,3}(?=\s?=)")
mode_re = re.compile("(?<=PRV\[)\d*")


pc = []
binary = []
instr = []
instr_str = []
gpr = []
csr = []
gpr_val = []
op_0 = []
op_1 = []
status = []
tval = []
mode = []

for ln,line in tqdm(enumerate(transcript)):
#    print(line)
    if(re.search(lookup,line)): #check that line conforms to format then extract information
        pc_i = re.search(pc_csr_status_tval_re, line)
        if(pc_i):
            pc.append(pc_i.group(0))
        else:
            print(line)
            break
        binary.append(re.search(binary_re, line).group(0))
        instruction = re.search(instruction_re, line)
        if(instruction):
            instr.append(instruction.group(0))
        else:
            raise RuntimeError("Instruction not present in RegEx\n"+line)
        instr_str.append(re.search(instr_str_re, line).group(0))
        match3 = re.search(gpr_re, line)
        if(match3):
            gpr.append(match3.group(0))
        else:
            gpr.append("")
        match4 = re.search(op1_re, line)
        if(match4):
            op_0.append(match4.group(0))
        else:
            op_0.append("")
        match5 = re.search(op2_re,line)
        if(match5):
            op_1.append(match5.group(0).split(", ")[2])
        else:
            op_1.append("")
        match6 = re.search(csr_re,line)
        if(match6):
            csr.append(match6.group(0))
        else:
            print(line)
            raise RuntimeError("CSR RegEx not complete")

        match7 = re.findall(pc_csr_status_tval_re,line)
        if(len(match7) == 3):
            print(match7)
            print(line)
        gpr_val.append(match7[1])
        status.append(match7[2])
        tval.append(match7[3])
        mode.append(re.search(mode_re,line).group(0))



columns = ['pc','instr','gpr','csr','binary','mode','instr_str','operand','pad']
rtl_log = pd.DataFrame(columns=columns)
rtl_log[columns[0]] = pc
rtl_log[columns[1]] = instr
rtl_log[columns[2]] = pd.Series(gpr) +":"+ pd.Series(gpr_val)
rtl_log[columns[3]] = csr
rtl_log[columns[4]] = binary
rtl_log[columns[5]] = mode
rtl_log[columns[6]] = instr_str
rtl_log[columns[7]] = pd.Series(gpr)+","+pd.Series(op_0)+","+pd.Series(op_1)
#log[columns[9]] = tval
rtl_log = rtl_log[41:].reset_index(drop=True) #41 is where the bootloader ends
def replace_gpr(pd_series,gpr,re_gpr):
    tmp = []
    for line in tqdm(pd_series):
        split_line = line.split(":")
        if(len(split_line[0]) > 0 and split_line[0] == gpr):
            split_line[0] = re_gpr
        ln = split_line[0]+":"+split_line[1]
        tmp.append(ln)
    return pd.Series(tmp)
rtl_log.gpr = replace_gpr(rtl_log.gpr, 'fp','s0') #replace fp with s0 to keep convention consistent
#rtl_log = rtl_log[~((rtl_log['instr'] == "addi") & (rtl_log.shift(1)['instr'] == "mret"))].reset_index(drop=True) #deal with gaislers addi injection after mret, this removes all of those lines


940657it [02:01, 7767.24it/s]
100%|██████████| 937064/937064 [00:00<00:00, 1069965.57it/s]


## Convert OVPsimlog to csv

In [6]:
logs = glob.glob(riscv_dv+"out_"+current_time+"/ovpsim_sim/"+tests[test_number]+"*.log")
csvs = glob.glob(riscv_dv+"out_"+current_time+"/ovpsim_sim/"+tests[test_number]+"*.csv")
for log in logs:
    process = Popen("python3 "+riscv_dv+"scripts/ovpsim_log_to_trace_csv.py --dont_truncate_after_first_ecall --log "+log+ " --csv "+log.strip(".log")+".csv", shell=True, stdout=PIPE, stderr=PIPE)
    stdout, stderr = process.communicate()
csvs = glob.glob(riscv_dv+"out_"+current_time+"/ovpsim_sim/"+tests[test_number]+"*.csv")
sim_log = pd.read_csv(csvs[0])
sim_log['mode'] = sim_log['mode'].astype('Int64')
sim_log['gpr'] = sim_log['gpr'].astype('str')
if(not timeout):
    sim_log = sim_log.truncate(after=len(sim_log)-3) #exit sequence slightly different between the rtl_sim and ovpsim

## Compare series to see if run was successfull

In [134]:
if(abs(len(sim_log)-len(rtl_log) < 1000)):
#if(abs(len(sim_log)-len(rtl_log) < 1000000)):

# This limit is somewhat arbitrary, not sure what best course of action is for different length logs.
# We can expect some difference in length when OVPsim timeouts since the runtime is then arbitrarily set, however, the rtl_log should be almost the same length
# as that of the sim_log. 
    
    if(len(sim_log) < len(rtl_log)):
        rtl_log=rtl_log.truncate(after=len(sim_log)-1)
        print("Truncating rtl_log to {} elements to meet size of sim_log".format(len(rtl_log)))
    elif(len(sim_log) == len(rtl_log)):
        print("Both logs are equally sized")

    else:
        sim_log=sim_log.truncate(after=len(rtl_log)-1)
        print("Truncating sim_log to {} elements to meet size of rtl_log".format(len(rtl_log)))
else:
    raise RuntimeError("Lengths of series differs with "+str(abs(len(sim_log)-len(rtl_log)))+" elements") 


matches = len(sim_log[(sim_log.gpr != "nan") & (rtl_log.gpr == sim_log.gpr)])
nan_count = len(sim_log[sim_log.gpr == "nan"])

tot_count = matches + nan_count
gpr_match = (tot_count == len(rtl_log.gpr))
binary_match = rtl_log.binary.eq(sim_log.binary).all()
pc_match = rtl_log.pc.eq(sim_log.pc).all()

if(gpr_match and binary_match and pc_match):
    print("GPR, Binary and PC contents match")
else:
    print("PC match: {}\nBinary match: {}\nGPR match: {} ".format(pc_match,binary_match,gpr_match))

Both logs are equally sized
PC match: True
Binary match: True
GPR match: False 


In [157]:
rtl_log[190:205]

Unnamed: 0,pc,instr,gpr,csr,binary,mode,instr_str,operand,pad
190,00000000000001bc,addi,a5:0000000a00000800,a5,80078793,3,"addi a5, a5, -2048","a5,a5,-2048",
191,00000000000001c0,csrrw,x0:0000000000000000,x0,30079073,3,"csrrw x0, mstatus, a5","x0,mstatus,a5",
192,00000000000001c4,addi,a5:0000000000000000,a5,4781,3,"addi a5, x0, 0","a5,x0,0",
193,00000000000001c6,csrrw,x0:0000000000000000,x0,30479073,3,"csrrw x0, mie, a5","x0,mie,a5",
194,00000000000001ca,lui,a5:0000000000008000,a5,67a1,3,"lui a5, 0x00008","a5,0x00008,",
195,00000000000001cc,addiw,a5:0000000000008003,a5,278d,3,"addiw a5, a5, 3","a5,a5,3",
196,00000000000001ce,slli,a5:00000002000c0000,a5,07ca,3,"slli a5, a5, 18","a5,a5,18",
197,00000000000001d0,csrrw,x0:0000000000000000,x0,10079073,3,"csrrw x0, sstatus, a5","x0,sstatus,a5",
198,00000000000001d4,addi,a5:0000000000000000,a5,4781,3,"addi a5, x0, 0","a5,x0,0",
199,00000000000001d6,csrrw,x0:0000000000000000,x0,10479073,3,"csrrw x0, sie, a5","x0,sie,a5",


In [79]:
print(sim_log[rtl_log.pc.ne(sim_log.pc)][['pc','gpr','instr','operand','binary']].head(5))
print("\n")
print(rtl_log[rtl_log.pc.ne(sim_log.pc)][['pc','gpr','instr','operand','binary']].head(5))


                   pc                  gpr  instr         operand    binary
201  0000000000002000                  nan  csrrw  s1,mscratch,s1  340494f3
202  0000000000002004  s1:000000000003fcf8    add      s1,tp,zero  000204b3
203  0000000000002008  s1:000000000003fc00   addi      s1,s1,-248  f0848493
204  000000000000200c                  nan     sd         ra,s1,8  0014b423
205  0000000000002010                  nan     sd        sp,s1,16  0024b823


                   pc                  gpr  instr         operand    binary
201  00000000000001de  x0:0000000000000000   addi         x0,x0,1      0004
202  0000000000002000  s1:0000000000000000  csrrw  s1,mscratch,s1  340494f3
203  0000000000002004  s1:000000000003fcf8    add        s1,tp,x0  000204b3
204  0000000000002008  s1:000000000003fc00   addi      s1,s1,-248  f0848493
205  000000000000200c  ra:0000000000000000     sd            ra,,  0014b423


## Why addi x0,x0,1 in rtl_log but not in OVPsim? mret seems to be handled differently?
- Line 194 is different in rtl_log
- riscv_loop_test 18e feb
- repeats for many tests, something misconfigured or a bug?

In [81]:
sort = rtl_log.pc.ne(sim_log.pc).head(5)
print(sim_log[185:210][['pc','gpr','instr','operand','binary']])
print("\n")
print(rtl_log[185:210][['pc','gpr','instr','operand','binary']])

                   pc                  gpr    instr          operand    binary
185  00000000000001ac                  nan      and         a5,a5,t1  0067f7b3
186  00000000000001b0                  nan    csrrs     zero,satp,a5  1807a073
187  00000000000001b4  a5:0000000000a00000      lui         a5,0xa00  00a007b7
188  00000000000001b8  a5:0000000000a00001  c.addiw             a5,1      2785
189  00000000000001ba  a5:0000000a00001000   c.slli           a5,0xc      07b2
190  00000000000001bc  a5:0000000a00000800     addi      a5,a5,-2048  80078793
191  00000000000001c0                  nan    csrrw  zero,mstatus,a5  30079073
192  00000000000001c4  a5:0000000000000000     c.li             a5,0      4781
193  00000000000001c6                  nan    csrrw      zero,mie,a5  30479073
194  00000000000001ca  a5:0000000000008000    c.lui           a5,0x8      67a1
195  00000000000001cc  a5:0000000000008003  c.addiw             a5,3      278d
196  00000000000001ce  a5:00000002000c0000   c.slli 

In [207]:
print(sim_log[195:203])
print(rtl_log[195:203])
#different on line 201, why?
#EBREAK DOESN'T WORK

                   pc    instr                  gpr  \
195  00000000000001e2  c.addiw  s3:0000000000008001   
196  00000000000001e4   c.slli  s3:0000000200040000   
197  00000000000001e6    csrrw                  nan   
198  00000000000001ea     c.li  s3:0000000000000000   
199  00000000000001ec    csrrw                  nan   
200  00000000000001f0     mret                  nan   
201  0000000000002000      jal                  nan   
202  0000000000002040    csrrw                  nan   

                                                   csr    binary  mode  \
195                                                NaN      2985     3   
196                                                NaN      09ca     3   
197  sstatus:0000000200040000;mstatus:0000000a00140800  10099073     3   
198                                                NaN      4981     3   
199                                                NaN  10499073     3   
200  mstatus:0000000a00140080;mstatus:0000000a00140...  3020