In [8]:
import re
import os
import json

In [3]:
def extract_opcodes(file_path):
    """
    Extracts opcodes, bit ranges, and corresponding instruction names from the given file,
    handling exceptions for lines starting with $pseudo_op.
    
    :param file_path: Path to the input file containing RISC-V opcodes and instructions.
    :return: A list of tuples with (instruction_name, bit_range, opcode, extension).
    """
    opcode_data = []

    # Regular expression to extract instruction names, bit ranges, and opcodes
    pattern = re.compile(r'(?P<instruction>\w+\.\w+|\w+)\s+.*?(?P<bit_range>\d+\.\.\d+)\s*=\s*(?P<opcode>0x[0-9A-Fa-f]+|\d+)')

    # Regular expression to match $pseudo_op lines
    pseudo_op_pattern = re.compile(r'\$pseudo_op\s+(?P<extension>[\w.]+)::(?P<base_instruction>\w+)\s+(?P<instruction>\w+)\s+(?P<args>.*)')

    # Open the file and read line by line
    with open(file_path, 'r') as file:
        for line in file:
            # Check for $pseudo_op exception
            if line.startswith('$pseudo_op'):
                # Extract extension, base instruction, and instruction name
                pseudo_match = pseudo_op_pattern.match(line)
                if pseudo_match:
                    instruction_name = pseudo_match.group('instruction')
                    extension = pseudo_match.group('extension')

                    # Normally pseudo-op instructions do not have opcodes, so store with a special mark
                    opcode_data.append((instruction_name, "pseudo-op", "N/A", extension))
                continue  # Skip further processing of this line

            # Split the line into instruction name and relevant data
            parts = line.split()
            if len(parts) < 2:
                continue  # Skip lines that don't contain enough data
            
            # Extract the instruction name (first part)
            instruction_name = parts[0]

            # Find all matches of bit ranges and opcodes in the line
            matches = pattern.findall(line)

            for match in matches:
                bit_range, opcode = match[1], match[2]
                opcode_data.append((instruction_name, bit_range, opcode, "N/A"))

    return opcode_data

In [4]:
def display_opcodes(opcode_data):
    """
    Displays the extracted opcodes in a readable format.
    
    :param opcode_data: List of tuples with (instruction_name, bit_range, opcode)
    """
    print(f"{'Instruction':<12} {'Bit Range':<10} {'Opcode':<10}")
    print('-' * 35)
    for instruction, bit_range, opcode, extension in opcode_data:
        print(f"{instruction:<12} {bit_range:<10} {opcode:<10} {extension}")

In [5]:
folder_path = './opcodes/'

# Initialize a list to store all opcode data
all_opcode_data = []

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    
    # Check if the current item is a file (ignore directories)
    if os.path.isfile(file_path):
        # Extract the part of the filename after 'rv'
        suffix = filename.split('rv', 1)[-1]
        
        # Extract opcodes and their corresponding instructions and bit ranges from the file
        opcode_data = extract_opcodes(file_path)
        
        # Modify each tuple to add the extracted suffix from the filename as a new column
        opcode_data_with_suffix = [(opcode[0], opcode[1], opcode[2], suffix) for opcode in opcode_data]
        
        # Append the modified opcode data to the combined list
        all_opcode_data.extend(opcode_data_with_suffix)

# Display the combined opcodes
display_opcodes(all_opcode_data)
#print(all_opcode_data)


Instruction  Bit Range  Opcode    
-----------------------------------
c.jal        1..0       1          32_c
c.flw        1..0       0          32_c_f
c.fsw        1..0       0          32_c_f
c.flwsp      1..0       2          32_c_f
c.fswsp      1..0       2          32_c_f
fmvh.x.d     24..20     1          32_d_zfa
fmvh.x.d     14..12     0          32_d_zfa
fmvh.x.d     6..2       0x14       32_d_zfa
fmvp.d.x     31..27     0x16       32_d_zfa
fmvp.d.x     26..25     1          32_d_zfa
fmvp.d.x     1..0       3          32_d_zfa
slli         pseudo-op  N/A        32_i
srli         pseudo-op  N/A        32_i
srai         pseudo-op  N/A        32_i
slli_rv32    pseudo-op  N/A        32_i
srli_rv32    pseudo-op  N/A        32_i
srai_rv32    pseudo-op  N/A        32_i
zip          pseudo-op  N/A        32_zbkb
unzip        pseudo-op  N/A        32_zbkb
rdcycleh     pseudo-op  N/A        32_zicntr
rdtimeh      pseudo-op  N/A        32_zicntr
rdinstreth   pseudo-op  N/A        32_zic

In [9]:
def save_to_json(opcode_data, json_file_path):
    """
    Saves the extracted opcode data to a JSON file.
    
    :param opcode_data: List of dictionaries containing opcode information.
    :param json_file_path: Path to the JSON file where data will be saved.
    """
    with open(json_file_path, 'w') as json_file:
        json.dump(opcode_data, json_file, indent=4)

# Path to save the JSON file (replace with your desired file path)
json_file_path = 'output_opcodes.json'

# Save the extracted opcode data to a JSON file
save_to_json(all_opcode_data, json_file_path)

# Optional: Print confirmation message
print(f"Opcode data successfully saved to {json_file_path}")

Opcode data successfully saved to output_opcodes.json
