In [2]:
import pandas as pd
import os

def process_annotations(file_path, sheet_name, threshold):
    """
    Process the annotation data based on a given dissimilarity score threshold.

    Parameters:
    file_path (str): Path to the Excel file.
    sheet_name (str): Name of the sheet in the Excel file.
    threshold (float): Threshold for the dissimilarity score.

    Returns:
    pd.DataFrame: Updated DataFrame with carried forward annotations.
    dict: Accuracy of the annotations per column.
    int: Number of frames that need annotation.
    """

    # Load the data from the specified sheet
    data = pd.read_excel(file_path, sheet_name=sheet_name)

    # Copy the original annotations
    original_annotations = data.iloc[:, 1:-2].copy()  # Excluding frame number and Optical Flow Score

    # Initialize the 'annotation needed' column with 0s
    data['annotation needed'] = 0

    # Classify frames based on the threshold
    data.loc[data['Optical Flow Score'] > threshold, 'annotation needed'] = 1

    # Carry backwards annotations for frames where no new annotation is needed
    for col in original_annotations.columns:
        for i in range(len(data) - 2, -1, -1):
            if data.loc[i, 'annotation needed'] == 0:
                data.loc[i, col] = data.loc[i + 1, col]

    # Ensure columns match for comparison
    updated_annotations = data.iloc[:, 1:-2]  # Updated annotations, excluding frame number and last two columns
    matching_columns = updated_annotations.columns.intersection(original_annotations.columns)
    updated_annotations = updated_annotations[matching_columns]
    original_annotations = original_annotations[matching_columns]

    # Calculate accuracy for each column
    accuracies = {}
    for col in matching_columns:
        correct_annotations = (original_annotations[col] == updated_annotations[col]).sum()
        total_annotations = len(data)
        accuracies[col] = correct_annotations / total_annotations

    # Count the number of frames that need annotation
    num_frames_to_annotate = data['annotation needed'].sum()

    return data, accuracies, num_frames_to_annotate


In [4]:
# Example usage
file_path = 'C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID75_optical_flow_scores.xlsx'  # Replace with your file path
sheet_name = 'anat'  # Replace with the name of your sheet
threshold = 40 # Set your desired threshold
updated_data, accuracy, num_frames_to_annotate = process_annotations(file_path, sheet_name, threshold)

# Generate output file path
file_name, file_extension = os.path.splitext(file_path)
output_file_path = f"{file_name}_{sheet_name}_accuracy_{threshold}{file_extension}"

# Save the updated data back to an Excel file
updated_data.to_excel(output_file_path, index=False)

print(f'File path: {file_path}')
print(f'Sheet Name: {sheet_name}')
print(f'Accuracy: {accuracy}')
print(f'Threshold: {threshold}')
print(f'Number of frames needing annotation: {num_frames_to_annotate}')
print(f'Updated file saved as: {output_file_path}')

File path: C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID75_optical_flow_scores.xlsx
Sheet Name: anat
Accuracy: {' 0:gallbladder': 0.9464656964656964, ' 1:cystic_plate': 0.9994802494802495, ' 2:cystic_duct': 0.9901247401247402, ' 3:cystic_artery': 0.9916839916839917, ' 4:cystic_pedicle': 1.0, ' 5:blood_vessel': 1.0, ' 6:fluid': 1.0, ' 7:abdominal_wall_cavity': 1.0, ' 8:liver': 1.0, ' 9:adhesion': 1.0, ' 10:omentum': 0.9994802494802495, ' 11:peritoneum': 1.0, ' 12:gut': 0.9942827442827443}
Threshold: 40
Number of frames needing annotation: 697
Updated file saved as: C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID75_optical_flow_scores_anat_accuracy_40.xlsx
