# RISC-V Instruction Analyzer

This notebook analyzes RISC-V instructions from hex files and determines compatible profiles based on instruction extensions.
It provides functionality for:
- Converting hex instructions to binary
- Decoding instructions and extracting opcodes
- Matching bit patterns to identify instructions
- Finding compatible RISC-V profiles

## Code Organization:
1. Core Utilities - Basic conversion and helper functions
2. Instruction Analysis - Functions for parsing and analyzing instructions
3. Profile Analysis - Functions for determining compatible profiles

## 1. Setup and Imports

In [1]:
# Required imports
import json          # For reading opcode definitions
import pandas as pd  # For data manipulation
import numpy as np   # For array operations
import re           # For pattern matching

## 2. Core Utilities

In [4]:
def hex_to_bin(hex_str: str) -> str:
    """Convert hexadecimal string to binary string with proper padding.
    
    Args:
        hex_str (str): Hexadecimal string to convert
        
    Returns:
        str: Binary representation padded to 32 bits
    """
    # Remove whitespace and '0x' prefix
    hex_str = hex_str.strip().replace('0x', '')
    if not hex_str:
        return '0' * 32
    
    # Convert to binary and remove '0b' prefix
    binary = bin(int(hex_str, 16))[2:]
    
    # Pad to 32 bits for RISC-V instructions
    padding = (32 - len(binary)) if len(binary) < 32 else 0
    return '0' * padding + binary

def parse_range(s: str) -> list:
    """Parse a range string in either 'X..Y=Z' or 'X=Z' format.
    
    Args:
        s (str): Range string to parse
        
    Returns:
        list: For 'X..Y=Z' format: [X, Y, Z] where:
             - X: start position (int)
             - Y: end position (int)
             - Z: binary value (str)
             For 'X=Z' format: [X, Z] where:
             - X: bit position (int)
             - Z: binary value (str)
    """
    # Handle range format 'X..Y=Z'
    if '..' in s:
        match = re.match(r'(\d+)\.\.(\d+)=(-?0x[\da-fA-F]+|-?\d+)', s)
        if match:
            start_pos = int(match.group(1))
            end_pos = int(match.group(2))
            value = match.group(3)
            # Convert value to binary
            binary_value = hex_to_bin(value) if (value.startswith('0x') or value.startswith('-0x')) \
                          else bin(int(value))[2:]
            return [start_pos, end_pos, binary_value]
    
    # Handle single bit format 'X=Z'
    else:
        match = re.match(r'(\d+)=(-?0x[\da-fA-F]+|-?\d+)', s)
        if match:
            bit_pos = int(match.group(1))
            value = match.group(2)
            # Convert value to binary
            binary_value = hex_to_bin(value) if (value.startswith('0x') or value.startswith('-0x')) \
                          else bin(int(value))[2:]
            return [bit_pos, binary_value]
    return None

## 3. Data Loading and Preprocessing

In [5]:
# Load opcode definitions
json_file = './data/output_opcodes.json'
df = pd.read_json(json_file)

# Flatten bit_ranges array and sort by length for efficient matching
df['bit_ranges'] = df['bit_ranges'].apply(lambda x: np.array(x).ravel())
df_sorted = df.sort_values(by='bit_ranges', key=lambda x: x.apply(len), ascending=False)

# Parse bit ranges into structured format
df_sorted['bit_ranges'] = df_sorted['bit_ranges'].apply(lambda x: [parse_range(y) for y in x])

## 4. Instruction Analysis

In [12]:
def getInstructsFromHex(hex_content: list) -> list:
    instructs = []
    for line in hex_content:
        line = line.strip()
        line = hex_to_bin(line)[::-1]
        for index, row in df_sorted.iterrows():
            #print(f"Instruction:{row['instruction']}")
            count = len(row['bit_ranges'])
            #print(row)
            #print(count)
            for rg in row['bit_ranges']:
                if len(rg) == 3:
                    end = rg[0]
                    start = rg[1]
                    value = rg[2]
                    inter = line[start:end+1]
                    #print(f'start {start} end {end}: {value} - {inter}')
                    if (start > len(line)) or (end > len(line)):
                        continue
                    if int(inter[::-1],2) != int(value,2):
                        continue
                    count = count-1
                else:
                    position = rg[0]
                    value = rg[1]
                    if (position >= len(line)):
                        continue
                    if line[position] != value:
                        continue
                    count = count-1
                if count == 0:
                    instructs.append(row)
                    break
            if count == 0:
                break
    return instructs

## 5. Profile Analysis

In [26]:
def fromExtensionsGetProfiles(extensions: list, profiles_df) -> list:
    """Find RISC-V profiles that support all given extensions.
    
    Args:
        extensions (list): List of required RISC-V extensions
        
    Returns:
        list: List of profile names that support all required extensions
    """
        
    # Check which profiles support all required extensions
    profiles_with_all_extensions = profiles_df.loc[extensions].all(axis=0)
    
    # Return profiles that support all extensions
    return profiles_with_all_extensions[profiles_with_all_extensions == True].index.to_list()

## 6. Example Usage

In [24]:
perfils = pd.read_csv('./data/csv/profile_mapping.csv',index_col='extension')
perfils.loc['TOTAL'] = perfils.sum()
#perfils = perfils.rename(index={'RV32E':'E','RV32I':'I'}) #,'RV64I':'I','RV64E':'E'
# Combine RV64I and RV32I into I, RV64E and RV32E into E using logical OR
perfils.loc['I'] = perfils.loc[['rv64_i', 'rv34_i']].any()
perfils.loc['E'] = perfils.loc[['rv64_e', 'rv34_e']].any()
# Drop the original rows
perfils = perfils.drop(['rv64_i', 'rv34_i', 'rv64_e', 'rv34_e'])

In [28]:
# Load and analyze a test hex file
test_file = './data/tests/memory/003-and.hex'
with open(test_file, 'r') as file:
    hex_content = file.readlines()

# Find instructions in the hex file
instructions = getInstructsFromHex(hex_content)
print("Found instructions:")
print(instructions)

# Extract required extensions
extensions = [instr['extension'][0].split("_", 1)[1].upper() for instr in instructions]
print("\nRequired extensions:")
print(extensions)

# Find compatible profiles
compatible_profiles = fromExtensionsGetProfiles(extensions,perfils)
print("\nCompatible profiles sorted by size (smallest first):")
print(perfils[compatible_profiles].loc['TOTAL'].sort_values())

Found instructions:
[instruction                                                 addi
bit_ranges     [[14, 12, 0], [6, 2, 0000000000000000000000000...
extension                                                 [rv_i]
Name: 95, dtype: object, instruction                                                 addi
bit_ranges     [[14, 12, 0], [6, 2, 0000000000000000000000000...
extension                                                 [rv_i]
Name: 95, dtype: object, instruction                                                  and
bit_ranges     [[31, 25, 0], [14, 12, 111], [6, 2, 0000000000...
extension                                                 [rv_i]
Name: 110, dtype: object, instruction                                                   sw
bit_ranges     [[14, 12, 10], [6, 2, 000000000000000000000000...
extension                                                 [rv_i]
Name: 94, dtype: object]

Required extensions:
['I', 'I', 'I', 'I']

Compatible profiles sorted by size (smallest first):
R