In [1]:
import json
import pandas as pd
import numpy as np
import re

In [2]:
## Testing with the first two instructions from the file
#decoded_instructions = []
#for hex_instr in hex_content[:1]:
#    for index, line in df_sorted.iterrows():
#        #print(line['bit_ranges'])
#        #line['bit_ranges']
#    hex_instr = hex_instr.strip()  # Removing newline characters
#    print(hex_to_bin(hex_instr))
#    #decoded_instructions.append(decode_instruction(hex_instr))
#
#decoded_instructions  # Display the decoded results

In [3]:
def hex_to_bin(hex_str):
    return (bin(int(hex_str, 16))[2:])

def decode_instruction(hex_instr):
    bin_instr = hex_to_bin(hex_instr) 
    opcode = bin_instr[-7:]
    instr_type = df[df['instruction']].get(opcode, 'Unknown')
    return bin_instr, opcode, instr_type

In [4]:
def parse_range(s: str):
    # Check if the string is in the 'X..Y=Z' format
    if '..' in s:
        # Use regex to extract X, Y, and Z
        match = re.match(r'(\d+)\.\.(\d+)=(-?0x[\da-fA-F]+|-?\d+)', s)
        if match:
            X = int(match.group(1))
            Y = int(match.group(2))
            Z = match.group(3)
            # Check if Z is hexadecimal, convert to binary if it is
            if Z.startswith('0x') or Z.startswith('-0x'):
                Z = hex_to_bin(Z)
            else:
                Z = bin(int(Z))[2:]  # Convert to binary if not hex
            return [X, Y, Z]
    else:
        # Use regex to extract X and Z in the 'X=Z' format
        match = re.match(r'(\d+)=(-?0x[\da-fA-F]+|-?\d+)', s)
        if match:
            X = int(match.group(1))
            Z = match.group(2)
            # Check if Z is hexadecimal, convert to binary if it is
            if Z.startswith('0x') or Z.startswith('-0x'):
                Z = hex_to_bin(Z)
            else:
                Z = bin(int(Z))[2:]  # Convert to binary if not hex
            return [X, Z]

In [5]:
json_file = './output_opcodes.json'

In [6]:
df = pd.read_json(json_file)
df.head()

Unnamed: 0,instruction,bit_ranges,extension
0,pseudo_op-slli,"[[31..25=0, 14..12=1, 6..2=0x04, 1..0=3]]",[rv32_i]
1,pseudo_op-srli,"[[31..25=0, 14..12=5, 6..2=0x04, 1..0=3]]",[rv32_i]
2,pseudo_op-srai,"[[31..25=32, 14..12=5, 6..2=0x04, 1..0=3]]",[rv32_i]
3,pseudo_op-slli_rv32,"[[31..25=0, 14..12=1, 6..2=0x04, 1..0=3]]",[rv32_i]
4,pseudo_op-srli_rv32,"[[31..25=0, 14..12=5, 6..2=0x04, 1..0=3]]",[rv32_i]


In [7]:
df['bit_ranges'] = df['bit_ranges'].apply(lambda x: np.array(x).ravel())

In [8]:
df[df['instruction'] == 'addi']

Unnamed: 0,instruction,bit_ranges,extension
95,addi,"[14..12=0, 6..2=0x04, 1..0=3]",[rv_i]


In [9]:
df_sorted = df.sort_values(by='bit_ranges',key = lambda x: x.apply(len), ascending= False)

In [10]:
df_sorted['bit_ranges'] = df_sorted['bit_ranges'].apply(lambda x: [parse_range(y) for y in x])
df_sorted.head(5)

Unnamed: 0,instruction,bit_ranges,extension
7,pseudo_op-unzip,"[[31, 25, 100], [24, 20, 1111], [14, 12, 101],...","[rv32_zbkb, rv32_zk, rv32_zkn, rv32_zks]"
6,pseudo_op-zip,"[[31, 25, 100], [24, 20, 1111], [14, 12, 1], [...","[rv32_zbkb, rv32_zk, rv32_zkn, rv32_zks]"
176,pseudo_op-brev8,"[[31, 20, 11010000111], [14, 12, 101], [6, 2, ...","[rv_zbkb, rv_zk, rv_zkn, rv_zks]"
48,pseudo_op-rev8,"[[31, 20, 11010111000], [14, 12, 101], [6, 0, ...","[rv64_zbb, rv64_zbkb, rv64_zk, rv64_zkn, rv64_..."
112,pseudo_op-pause,"[[27, 24, 1], [23, 20, 0], [19, 15, 0], [14, 1...",[rv_i]


In [11]:
df_sorted[df_sorted['instruction'] == 'addi']

Unnamed: 0,instruction,bit_ranges,extension
95,addi,"[[14, 12, 0], [6, 2, 100], [1, 0, 11]]",[rv_i]


In [12]:
file_path = './tests/memory/000-addi.hex'
with open(file_path, 'r') as file:
    hex_content = file.readlines()

In [13]:
df_sorted[df_sorted['instruction'] == 'addi']['bit_ranges'].to_list()[0][-1:]

[[1, 0, '11']]

In [14]:
hex_content[0].strip()

'00500593'

In [15]:
df_sorted[df_sorted['instruction'] == 'sw']

Unnamed: 0,instruction,bit_ranges,extension
94,sw,"[[14, 12, 10], [6, 2, 1000], [1, 0, 11]]",[rv_i]


In [16]:
df_sorted.head()

Unnamed: 0,instruction,bit_ranges,extension
7,pseudo_op-unzip,"[[31, 25, 100], [24, 20, 1111], [14, 12, 101],...","[rv32_zbkb, rv32_zk, rv32_zkn, rv32_zks]"
6,pseudo_op-zip,"[[31, 25, 100], [24, 20, 1111], [14, 12, 1], [...","[rv32_zbkb, rv32_zk, rv32_zkn, rv32_zks]"
176,pseudo_op-brev8,"[[31, 20, 11010000111], [14, 12, 101], [6, 2, ...","[rv_zbkb, rv_zk, rv_zkn, rv_zks]"
48,pseudo_op-rev8,"[[31, 20, 11010111000], [14, 12, 101], [6, 0, ...","[rv64_zbb, rv64_zbkb, rv64_zk, rv64_zkn, rv64_..."
112,pseudo_op-pause,"[[27, 24, 1], [23, 20, 0], [19, 15, 0], [14, 1...",[rv_i]


In [17]:
file_path = './tests/memory/001-add.hex'
with open(file_path, 'r') as file:
    hex_content = file.readlines()

In [30]:
instructs = []
for line in hex_content:
    line = line.strip()
    line = hex_to_bin(line)[::-1]
    for index, row in df_sorted.iterrows():
        #print(f"Instruction:{row['instruction']}")
        count = len(row['bit_ranges'])
        #print(row)
        #print(count)
        for rg in row['bit_ranges']:
            if len(rg) == 3:
                end = rg[0]
                start = rg[1]
                value = rg[2]
                inter = line[start:end+1]
                #print(f'start {start} end {end}: {value} - {inter}')
                if (start > len(line)) or (end > len(line)):
                    continue
                if int(inter[::-1],2) != int(value,2):
                    continue
                count = count-1
            else:
                position = rg[0]
                value = rg[1]
                if (position >= len(line)):
                    continue
                if line[position] != value:
                    continue
                count = count-1
            if count == 0:
                instructs.append(row)
                break
        if count == 0:
            break
print(instructs)

[instruction                                      addi
bit_ranges     [[14, 12, 0], [6, 2, 100], [1, 0, 11]]
extension                                      [rv_i]
Name: 95, dtype: object, instruction            fence
bit_ranges     [[14, 12, 0]]
extension             [rv_i]
Name: 111, dtype: object, instruction                                          sw
bit_ranges     [[14, 12, 10], [6, 2, 1000], [1, 0, 11]]
extension                                        [rv_i]
Name: 94, dtype: object]


In [37]:
extenss = [i['extension'][0].split("_", 1)[1].upper() for i in instructs]

In [38]:
extenss

['I', 'I', 'I']

In [20]:
line = hex_content[0].strip()
line = hex_to_bin(line)
line = line[::-1]
print(line)
for rg in df_sorted[df_sorted['instruction'] == 'addi']['bit_ranges'].to_list()[0]:
    if len(rg) == 3:
        end = rg[0]
        start = rg[1]
        value = rg[2]
        print(f"{start} {end}: {value}")        
        inter = line[start:end+1]
        print(inter)
        if int(inter[::-1],2) == int(value,2):
            print("YES")
        
    else:
        position = rg[0]
        value = rg[1]
        print('no')

11001001101000000000101
12 14: 0
000
YES
2 6: 100
00100
YES
0 1: 11
11
YES


In [55]:
perfils = pd.read_csv('./profile_mapping.csv',index_col='extension')

In [56]:
perfils.head()

Unnamed: 0_level_0,RVI20,RVA20 \n(64 only) – U,RVA20 \n(64 only) – S,RVA22 \n(64 only) – U,RVA22 \n(64 only) – S,RVI23 – U,RVA23 (64 only) – U,RVA23 (64 only) – S,RVB23 (64 only) – U,RVB23 (64 only) – S
extension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A,True,True,True,True,True,True,True,True,True,True
B,False,False,False,True,True,True,True,True,True,True
C,True,True,True,True,True,True,True,True,True,True
D,True,True,True,True,True,True,True,True,True,True
F,True,True,True,True,True,True,True,True,True,True


In [61]:
perfils.index.to_list()

['A',
 'B',
 'C',
 'D',
 'F',
 'H',
 'M',
 'N',
 'Q',
 'P',
 'RERI',
 'RV32E',
 'RV32I',
 'RV64E',
 'RV64I',
 'S',
 'Sddbltrp',
 'Sdext',
 'Sdtrig',
 'Shcounterenw',
 'Shgatpa',
 'Shtvala',
 'Shvsatpa',
 'Shvstvala',
 'Shvstvecd',
 'Sm1p11',
 'Sm1p12',
 'Sm1p13',
 'Smaia',
 'Smclic',
 'Smcdeleg',
 'Smcntrpmf',
 'Smcsrind',
 'Smdbltrp',
 'Smepmp',
 'Smmpm',
 'Smrntt',
 'Smnpm',
 'Smpmpmt',
 'Smrnmi',
 'Smstateen',
 'Smctr',
 'Ss1p11',
 'Ss1p12',
 'Ss1p13',
 'Ssaia',
 'Ssccfg',
 'Ssccptr',
 'Sscntrcfg',
 'Sscofpmf',
 'Sscounterenw',
 'Sscsrind',
 'Ssctr',
 'Ssdbltrp',
 'Ssdtso',
 'Sshpmcfg',
 'Ssnpm',
 'Ssptead',
 'Sspm',
 'Ssqosid',
 'Ssstateeen',
 'Ssstrict',
 'Sstc',
 'Sstvala',
 'Sstvecd',
 'Sstvecv',
 'Ssu32xl',
 'Ssu64xl',
 'Ssube',
 'Supm',
 'Sv32',
 'Sv39',
 'Sv48',
 'Sv57',
 'Svade',
 'Svadu',
 'Svbare',
 'Svinval',
 'Svnapot',
 'Svpbmt',
 'Svukte',
 'Svvptc',
 'U',
 'V',
 'Za128rs',
 'Za64rs',
 'Zaamo',
 'Zabha',
 'Zacas',
 'Zalrsc',
 'Zama16b',
 'Zawrs',
 'Zba',
 'Zbb',
 'Zbc'

In [None]:
profiles_with_all_extensions = filtered_df.all(axis=0)  # Check columns with all True for required extensions
matching_profiles = profiles_with_all_extensions[profiles_with_all_extensions].index