In [3]:
import os
import re
import pandas as pd

# Define the directory and file name
directory = 'path/to/your/directory'
filename = 'prediction_regionprops.xlsx'

# Function to count unique elements in a specified column
def count_unique_values(dataframe, column):
    
    unique_elements = dataframe[column].unique()
    return len(unique_elements)
    
# Use os.path.join to create the full file path
excel_file_path = os.path.join(directory, filename)

# Load the Excel file with the constructed path
df = pd.read_excel(excel_file_path, engine='openpyxl')

# Extract the first and eighth columns without headers
first_column = df.iloc[:, 0]  # First column
eighth_column = df.iloc[:, 7]  # Eighth column (indexing starts from 0, so 7 is the eighth)

# Create a new DataFrame with the extracted columns
result_df = pd.DataFrame({
    'Image': first_column,
    'Label': eighth_column
})

# Function to extract the numeric image number from the 'Image' column
def extract_image_number(path):
    filename = os.path.basename(path)
    numbers = re.findall(r'\d+', str(filename))
    if numbers:
        return int(numbers[0])  # Return the first number found as an integer
    else:
        return None  # Return None if no numbers are found

# Apply the function to the 'Image' column to keep only the numeric image number
result_df['Image'] = result_df['Image'].apply(extract_image_number)

# Count the total number of occurrences per image
image_counts = result_df['Image'].value_counts()

# Count the number of "1" per image
count_ones = result_df[result_df['Label'] == 1]['Image'].value_counts()

# Count the number of "2" per image
count_twos = result_df[result_df['Label'] == 2]['Image'].value_counts()

# Create a DataFrame with the results
final_df = pd.DataFrame({
    'Image Name': image_counts.index,
    'Total Number of Nuclei': image_counts.values,
    'Nuclei In': count_ones.reindex(image_counts.index, fill_value=0),  # Ensure the same index
    'Nuclei Out': count_twos.reindex(image_counts.index, fill_value=0)   # Fill with 0 if not found
})

# Add a column with the percentage of "In" compared to "Total Number"
final_df['Fusion Index (%)'] = (final_df['Nuclei In'] / final_df['Total Number of Nuclei']) * 100

# Define the name of the output Excel file
output_filename = 'Fusion Index.xlsx'  # Name of the output file
output_file_path = os.path.join(directory, output_filename)

# Export the final DataFrame to a new Excel file
final_df.to_excel(output_file_path, index=False, sheet_name='Results')

# Confirm the export
print(f"The DataFrame has been exported to '{output_file_path}'")

The DataFrame has been exported to '/home/benjamin.lair/Bureau/Fusion Index.xlsx'
