In [3]:
import csv

def get_cell_byte_value(file_path, row_num, col_letter):
    """
    Reads a CSV file and returns the byte value of a specific cell.

    Args:
        file_path (str): The path to the CSV file.
        row_num (int): The row number of the cell (1-based).
        col_letter (str): The column letter of the cell (e.g., 'A', 'B', 'C').

    Returns:
        bytes: The byte value of the cell, or None if the cell is not found or there's an error.
    """
    try:
        with open(file_path, 'r', newline='', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile)
            for i, row in enumerate(reader):
                if i == row_num - 1:  # Adjust row number to 0-based index
                    col_index = ord(col_letter.upper()) - ord('A')
                    if col_index < len(row):
                        return row[col_index].encode('utf-8')
                    else:
                        print(f"Error: Column '{col_letter}' not found in row {row_num} of '{file_path}'.")
                        return None
            print(f"Error: Row {row_num} not found in '{file_path}'.")
            return None
    except FileNotFoundError:
        print(f"Error: File not found at '{file_path}'.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Define the file paths and cell locations
file1_path = r'/home/aricept094/mydata/endometriosis/demographic.csv'
file2_path = r'/home/aricept094/mydata/endometriosis/endometrioma.csv'
file1_row = 1010
file1_col = 'C'
file2_row = 162
file2_col = 'B'

# Get the byte values of the specified cells
cell1_bytes = get_cell_byte_value(file1_path, file1_row, file1_col)
cell2_bytes = get_cell_byte_value(file2_path, file2_row, file2_col)

# Compare the byte values
if cell1_bytes is not None and cell2_bytes is not None:
    print(f"Byte value of cell {file1_col}{file1_row} in '{file1_path}': {cell1_bytes}")
    print(f"Byte value of cell {file2_col}{file2_row} in '{file2_path}': {cell2_bytes}")

    if cell1_bytes == cell2_bytes:
        print("The byte values of the two cells are identical.")
    else:
        print("The byte values of the two cells are NOT identical.")
        # Further investigation: Check for leading/trailing whitespace or invisible characters
        cell1_str = None
        cell2_str = None
        try:
            with open(file1_path, 'r', newline='', encoding='utf-8') as f1:
                reader1 = csv.reader(f1)
                for i, row in enumerate(reader1):
                    if i == file1_row - 1:
                        cell1_str = row[ord(file1_col.upper()) - ord('A')]
                        break
            with open(file2_path, 'r', newline='', encoding='utf-8') as f2:
                reader2 = csv.reader(f2)
                for i, row in enumerate(reader2):
                    if i == file2_row - 1:
                        cell2_str = row[ord(file2_col.upper()) - ord('A')]
                        break
        except Exception as e:
            print(f"Error reading cell contents for string comparison: {e}")
        if cell1_str is not None and cell2_str is not None:
            print(f"String value of cell {file1_col}{file1_row}: '{cell1_str}'")
            print(f"String value of cell {file2_col}{file2_row}: '{cell2_str}'")

            if cell1_str == cell2_str:
                print("The string values appear the same, but byte values differ. This could be due to encoding or invisible characters.")
            else:
                print("The string values are also different.")
                if cell1_str.strip() == cell2_str.strip():
                    print("The string values are the same after removing leading/trailing whitespace.")
                else:
                    print("The string values are different even after stripping whitespace.")
                    # Check for individual character differences
                    print("Character-by-character comparison:")
                    max_len = max(len(cell1_str), len(cell2_str))
                    for i in range(max_len):
                        char1 = cell1_str[i] if i < len(cell1_str) else '<missing>'
                        char2 = cell2_str[i] if i < len(cell2_str) else '<missing>'
                        if char1 != char2:
                            print(f"Difference at position {i}: '{char1}' (ord={ord(char1) if char1 != '<missing>' else ''}) vs '{char2}' (ord={ord(char2) if char2 != '<missing>' else ''})")

Byte value of cell C1010 in '/home/aricept094/mydata/endometriosis/demographic.csv': b'780777794'
Byte value of cell B162 in '/home/aricept094/mydata/endometriosis/endometrioma.csv': b'780777794'
The byte values of the two cells are identical.
