In [1]:
import fitz  # type: ignore
import re
import csv
from concurrent.futures import ThreadPoolExecutor
from colorama import init, Fore, Style # type: ignore
# Compile regex patterns once
address_pattern = re.compile(r'0x[0-9A-Fa-f]+', re.IGNORECASE)
register_pattern = re.compile(r'\b[A-Z][A-Z0-9_]+\b', re.IGNORECASE)

def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file."""
    text = ""
    doc = fitz.open(pdf_path)
    for page in doc:
        text += page.get_text()
    doc.close()
    return text

def search_address_positions(text):
    """Find all addresses in the text and return them separately."""
    addresses = [m.group() for m in address_pattern.finditer(text)]
    positions = [m.start() for m in address_pattern.finditer(text)]
    return addresses, positions

def extract_register_names_from_context(context):
    """Extract register names from the context around the address."""
    return register_pattern.findall(context)

def get_register_names_from_address(text, address):
    """Get all potential register names for a given address from the PDF text."""
    address_matches = [m.start() for m in re.finditer(re.escape(address), text, re.IGNORECASE)]
    register_names_list = []
    
    for match in address_matches:
        context = get_context_around_address(text, match, context_chars=25)
        register_names = extract_register_names_from_context(context)
        register_names_list.extend(register_names)
    
    return register_names_list

def get_context_around_address(text, address_position, context_chars=25):
    """Get context around the given address position."""
    start = max(address_position - context_chars, 0)
    end = min(address_position + context_chars, len(text))
    return text[start:end]

def address_from_register_name(text, register_name):
    """Retrieve address for a given register name."""
    register_name_pattern = re.compile(rf'\b{re.escape(register_name)}\b', re.IGNORECASE)
    matches = list(register_name_pattern.finditer(text))
    
    if matches:
        addresses, _ = search_address_positions(text)
        for match in matches:
            context = get_context_around_address(text, match.start(), context_chars=25)
            for addr in addresses:
                if addr in context:
                    return addr  # Return the first matched address
    return None

def verify_register_names_for_address(text, address):
    """Verify register names for a given address."""
    register_names = get_register_names_from_address(text, address)

    suspected_matches = []
    for reg_name in list(set(register_names)):
        register_address = address_from_register_name(text, reg_name)
        
        if register_address == address:
            suspected_matches.append(reg_name)
        else:
            suspected_matches.append(reg_name)
    
    return suspected_matches

def load_register_names_from_csv(csv_path):
    """Load register names from the CSV file."""
    register_names = set()
    with open(csv_path, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            register_names.add(row['RegisterName'])
    return register_names

def filter_actual_registers(suspected_matches, actual_register_names):
    """Filter suspected matches to only include actual register names."""
    return [reg for reg in suspected_matches if reg in actual_register_names]


def print_match_details(microcontroller, csv_file, actual_matches, address_to_check):
    with open(csv_file, newline='') as file:
        reader = csv.DictReader(file)

        for row in reader:
            register_name = row['RegisterName']
            mcu_name = row['MCU'].lower()
            register_address = row['RegisterAddress'].strip()

            # Check if the register name matches any in actual_matches and the MCU matches the input microcontroller
            if register_name in actual_matches and mcu_name == microcontroller.lower():
                print(Fore.GREEN + f"MCU: {row['MCU']}")
                print(Fore.GREEN + f"Module: {row['ModuleName']}")
                print(Fore.GREEN + f"Register: {row['RegisterName']}")
                print(Fore.GREEN + f"Caption: {row['RegisterCaption']}")
                # Compare addresses
                if address_to_check == register_address:
                    print(Fore.GREEN + f"Address: {register_address}")  # Print one if addresses are the same
                else:
                    print(Fore.GREEN + f"Addresses: {address_to_check}, {register_address}")  # Print both if different
                print(Fore.GREEN + f"Bitfields: {row['Bitfields']}")
                print(Fore.GREEN + f"Values: {row['Values']}")
                print(Fore.RED + "*" * 80)  # Separator for readability



# Extract PDF text once
pdf_path = 'C:/Users/Yanni/Desktop/328pFULL.PDF'
text = extract_text_from_pdf(pdf_path)
csv_file = "C:/Users/Yanni/Desktop/Logics Project/DETAILED_AVR.csv"

# Verify register names
address_to_check = '0x24'  # Replace with the address you want to check
suspected_matches = verify_register_names_for_address(text, address_to_check)

# Load actual register names from CSV
csv_path = 'C:/Users/Yanni/Desktop/Logics Project/ALL_AVR_REGISTERS.csv'
microcontroller = "atmega328p"
actual_register_names = load_register_names_from_csv(csv_path)

# Filter suspected matches
actual_matches = filter_actual_registers(suspected_matches, actual_register_names)

print(Fore.BLUE + f"Possible Register(s) For address {address_to_check}:\n")
for match in actual_matches:
    print_match_details(microcontroller, csv_file, match, address_to_check)
    


[34mPossible Register(s) For address 0x24:

[32mMCU: ATmega328P
[32mModule: TC8
[32mRegister: TCCR0A
[32mCaption: Timer/Counter  Control Register A
[32mAddresses: 0x24, 0x44
[32mBitfields: Bit 7: COM0A - Compare Output Mode, Phase Correct PWM Mode; Bit 5: COM0B - Compare Output Mode, Fast PWm; Bit 1: WGM0 - Waveform Generation Mode
[32mValues: No Values
[31m********************************************************************************
[32mMCU: ATmega328P
[32mModule: PORT
[32mRegister: DDRB
[32mCaption: Port B Data Direction Register
[32mAddress: 0x24
[32mBitfields: 
[32mValues: No Values
[31m********************************************************************************
