In [1]:
import json
import pandas as pd
import numpy as np
import re

In [3]:
def hex_to_bin(hex_str):
    return (bin(int(hex_str, 16))[2:])

def decode_instruction(hex_instr):
    bin_instr = hex_to_bin(hex_instr) 
    opcode = bin_instr[-7:]
    instr_type = df[df['instruction']].get(opcode, 'Unknown')
    return bin_instr, opcode, instr_type

In [4]:
def parse_range(s: str):
    # Check if the string is in the 'X..Y=Z' format
    if '..' in s:
        # Use regex to extract X, Y, and Z
        match = re.match(r'(\d+)\.\.(\d+)=(-?0x[\da-fA-F]+|-?\d+)', s)
        if match:
            X = int(match.group(1))
            Y = int(match.group(2))
            Z = match.group(3)
            # Check if Z is hexadecimal, convert to binary if it is
            if Z.startswith('0x') or Z.startswith('-0x'):
                Z = hex_to_bin(Z)
            else:
                Z = bin(int(Z))[2:]  # Convert to binary if not hex
            return [X, Y, Z]
    else:
        # Use regex to extract X and Z in the 'X=Z' format
        match = re.match(r'(\d+)=(-?0x[\da-fA-F]+|-?\d+)', s)
        if match:
            X = int(match.group(1))
            Z = match.group(2)
            # Check if Z is hexadecimal, convert to binary if it is
            if Z.startswith('0x') or Z.startswith('-0x'):
                Z = hex_to_bin(Z)
            else:
                Z = bin(int(Z))[2:]  # Convert to binary if not hex
            return [X, Z]

## Building dataframe with Instructions-opcodes-extension set

In [111]:
json_file = './output_opcodes.json'
df = pd.read_json(json_file)
df['bit_ranges'] = df['bit_ranges'].apply(lambda x: np.array(x).ravel())
df_sorted = df.sort_values(by='bit_ranges',key = lambda x: x.apply(len), ascending= False)
df_sorted['bit_ranges'] = df_sorted['bit_ranges'].apply(lambda x: [parse_range(y) for y in x])

### Load a hex 

In [112]:
file_path = './tests/memory/000-addi.hex'
with open(file_path, 'r') as file:
    hex_content = file.readlines()

In [113]:
def getInstructsFromHex(hex_content):    
    instructs = []
    for line in hex_content:
        line = line.strip()
        line = hex_to_bin(line)[::-1]
        for index, row in df_sorted.iterrows():
            #print(f"Instruction:{row['instruction']}")
            count = len(row['bit_ranges'])
            #print(row)
            #print(count)
            for rg in row['bit_ranges']:
                if len(rg) == 3:
                    end = rg[0]
                    start = rg[1]
                    value = rg[2]
                    inter = line[start:end+1]
                    #print(f'start {start} end {end}: {value} - {inter}')
                    if (start > len(line)) or (end > len(line)):
                        continue
                    if int(inter[::-1],2) != int(value,2):
                        continue
                    count = count-1
                else:
                    position = rg[0]
                    value = rg[1]
                    if (position >= len(line)):
                        continue
                    if line[position] != value:
                        continue
                    count = count-1
                if count == 0:
                    instructs.append(row)
                    break
            if count == 0:
                break
    return instructs

In [114]:
instructs = getInstructsFromHex(hex_content)
print(instructs)

[instruction                                      addi
bit_ranges     [[14, 12, 0], [6, 2, 100], [1, 0, 11]]
extension                                      [rv_i]
Name: 95, dtype: object, instruction                                          sw
bit_ranges     [[14, 12, 10], [6, 2, 1000], [1, 0, 11]]
extension                                        [rv_i]
Name: 94, dtype: object]


In [115]:
extensions = [i['extension'][0].split("_", 1)[1].upper() for i in instructs]
print(extensions)

['I', 'I']


## Load profile_mapping 

In [None]:
perfils = pd.read_csv('./profile_mapping.csv',index_col='extension')
perfils.loc['TOTAL'] = perfils.sum()
perfils = perfils.rename(index={'RV32E':'E','RV32I':'I'}) #,'RV64I':'I','RV64E':'E'

In [110]:
perfils

Unnamed: 0_level_0,RVI20,RVA20 \n(64 only) – U,RVA20 \n(64 only) – S,RVA22 \n(64 only) – U,RVA22 \n(64 only) – S,RVI23 – U,RVA23 (64 only) – U,RVA23 (64 only) – S,RVB23 (64 only) – U,RVB23 (64 only) – S
extension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A,1,1,1,1,1,1,1,1,1,1
B,0,0,0,1,1,1,1,1,1,1
C,1,1,1,1,1,1,1,1,1,1
D,1,1,1,1,1,1,1,1,1,1
F,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...
Zvksed,0,0,0,0,0,0,0,0,0,0
Zvksg,0,0,0,0,0,1,1,1,1,1
Zvksh,0,0,0,0,0,0,0,0,0,0
Zvkt,0,0,0,0,0,0,0,0,0,0


In [98]:
def fromExtensionsGetProfiles(extensions):
    profiles_with_all_extensions = perfils.loc[extensions].all(axis=0)  # Check columns with all True for required extensions
    profiles_with_all_extensions = profiles_with_all_extensions[profiles_with_all_extensions == True].index.to_list()
    return profiles_with_all_extensions

In [109]:
perfils[profiles_with_all_extensions].loc['TOTAL']

RVI20        19
RVI23 – U    72
Name: TOTAL, dtype: int64