In [1]:
import pandas as pd
import os

def process_annotations(file_path, sheet_name, threshold):
    """
    Process the annotation data based on a given dissimilarity score threshold.

    Parameters:
    file_path (str): Path to the Excel file.
    sheet_name (str): Name of the sheet in the Excel file.
    threshold (float): Threshold for the dissimilarity score.

    Returns:
    pd.DataFrame: Updated DataFrame with carried forward annotations.
    dict: Accuracy of the annotations per column.
    dict: Number of frames that need annotation per column.
    dict: Number Needed to Annotate (NNA) per column.
    dict: Percentage of reduced frames with annotation per column.
    """

    # Load the data from the specified sheet
    data = pd.read_excel(file_path, sheet_name=sheet_name)

    # Copy the original annotations
    original_annotations = data.iloc[:, 1:-1].copy()  # Excluding frame number and Optical Flow Score

    # Initialize the 'annotation needed' column with 0s
    data['annotation needed'] = 0

    # Classify frames based on the threshold
    data.loc[data['Optical Flow Score'] > threshold, 'annotation needed'] = 1

    # Carry forward annotations for frames where no new annotation is needed
    for col in original_annotations.columns:
        for i in range(1, len(data)):
            if data.loc[i, 'annotation needed'] == 0:
                data.loc[i, col] = data.loc[i - 1, col]

    # Ensure columns match for comparison
    updated_annotations = data.iloc[:, 1:-1]  # Updated annotations, excluding frame number and last column
    matching_columns = updated_annotations.columns.intersection(original_annotations.columns)
    updated_annotations = updated_annotations[matching_columns]
    original_annotations = original_annotations[matching_columns]

    # Calculate metrics per column
    accuracies = {}
    frames_to_annotate_per_column = {}
    number_needed_to_annotate = {}
    percent_reduced_with_annotation = {}

    reduced_frames = data[data['annotation needed'] == 1]
    total_reduced_frames = len(reduced_frames)

    for col in matching_columns:
        correct_annotations = (original_annotations[col] == updated_annotations[col]).sum()
        total_annotations = len(data)
        accuracies[col] = correct_annotations / total_annotations
        frames_to_annotate_per_column[col] = reduced_frames[col].sum()

        # NNA: number of frames with annotation of the structure within the reduced set
        number_needed_to_annotate[col] = reduced_frames[col].sum()

        # % of reduced frames with annotation of the structure
        if total_reduced_frames > 0:
            percent_reduced_with_annotation[col] = reduced_frames[col].sum() / total_reduced_frames * 100
        else:
            percent_reduced_with_annotation[col] = 0.0

    # Count the total number of frames that need annotation
    num_frames_to_annotate = data['annotation needed'].sum()

    return data, accuracies, frames_to_annotate_per_column, num_frames_to_annotate, number_needed_to_annotate, percent_reduced_with_annotation


In [9]:
import os
import pandas as pd

# List of files with individual settings
files_to_process = [
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID01_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID02_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 60
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID04_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID05_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID06_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 60
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID08_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 35
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID10_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 45
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID12_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID13_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 45
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID14_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 30
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID15_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID18_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID22_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID23_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID25_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID26_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 60
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID27_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 45
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID29_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID31_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID32_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID35_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID36_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 60
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID40_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 80
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID42_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 70
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID43_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID47_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID48_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID49_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 38
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID50_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID51_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID52_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 38
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID56_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 55
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID57_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID60_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 60
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID62_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID65_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 70
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID66_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 70
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID68_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 70
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID70_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID73_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID74_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 40
    },
    {
        "path": "C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//VID75_optical_flow_scores.xlsx",
        "sheet_name": "anat",
        "threshold": 50
    },

    # Add additional files here
]

# Initialize list to collect results
results = []

# List to store results
results = []

# Loop through each file
for file_info in files_to_process:
    file_path = file_info["path"]
    sheet_name = file_info["sheet_name"]
    threshold = file_info["threshold"]
    vid_number = os.path.splitext(os.path.basename(file_path))[0]

    try:
        processed_data, accuracies, frames_to_annotate_per_column, num_frames_to_annotate, number_needed_to_annotate, percent_reduced_with_annotation = process_annotations(
            file_path, sheet_name, threshold
        )

        row = {"VID number": vid_number}
        total_accuracy = 0
        col_count = 0

        for col in accuracies:
            structure = col.split(":")[-1]
            row[f"{structure}_accuracy"] = accuracies[col]
            row[f"{structure}_num_total"] = number_needed_to_annotate[col]
            row[f"{structure}_percent_reduced_with_annotation"] = percent_reduced_with_annotation[col]
            total_accuracy += accuracies[col]
            col_count += 1

        row["Total accuracy"] = total_accuracy / col_count if col_count else 0
        row["total of frames"] = num_frames_to_annotate

        results.append(row)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Convert to DataFrame and save to Excel
results_df = pd.DataFrame(results)
output_path = r"C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//aggregated_metrics_custom.xlsx"
results_df.to_excel(output_path, index=False)
print(f"Results saved to: {output_path}")


Results saved to: C://Users//dulce//OneDrive//Documentos//Tese mestrado//Excel//aggregated_metrics_custom.xlsx
