## 1. Non-specific error prediction

### 1.1. Results table (all models)

In [7]:
#Create a single dataframe that contains all the results

#The dataframe will contain as indices the data types input to each model
#The columns will be model name, F1 (Window), Accuracy (Window), Jaccard (Window), F1 (Frame), Accuracy (Frame), Jaccard (Frame), Training Time (s), Inference Time (ms per frame/window)

import pandas as pd
import os

results_df = pd.DataFrame(columns=[
    'Input',
    'Model Name',
    'F1 (Window)',
    'Accuracy (Window)',
    'Jaccard (Window)',
    'F1 (Frame)',
    'Accuracy (Frame)',
    'Jaccard (Frame)',
    'Training Time (s)',
    'Inference Rate (ms per frame/window)'
])

frequency = 5
results_folder = '../results/'
inputs = ['video', 'kinematics', 'multimodal']
model_names_dict = {'SimpleCNN': 'CNN',
                    'SimpleLSTM': 'LSTM',
                    'Siamese_CNN': 'Siamese CNN',
                    'Siamese_LSTM': 'Siamese LSTM',
                    'TeCNo': 'TeCNo',
                    'TransSVNet': 'TransSVNet',
                    'COG': 'COG'}


model_frame_or_window = {'SimpleCNN': 'window',
                         'SimpleLSTM': 'window',
                        'Siamese_CNN': 'window',
                        'Siamese_LSTM': 'window',
                        'TeCNo': 'frame',
                        'TransSVNet': 'frame',
                        'COG': 'frame'} 

csv_filename = 'LOSO_summary_df.csv'
csv_filename_2 = 'LOSO_summary_df_window.csv'

for input in inputs:
    
    print(f'Processing input: {input}')
    input_results_folder = os.path.join(results_folder, input, f'{frequency}Hz')
    if not os.path.exists(input_results_folder):
        continue
    
    for model_name in model_names_dict.keys():
        print(f'Processing model: {model_name}')
        model_folder = os.path.join(input_results_folder, model_name)
        if not os.path.exists(model_folder):
            continue
        
        # Load the CSV files
        csv_file = os.path.join(model_folder, csv_filename)
        
        if model_frame_or_window[model_name] == 'frame': #in this case, predictions were windowed for a fair comparison
            csv_file_2 = os.path.join(model_folder, csv_filename_2)
            if os.path.exists(csv_file):

                #Load frame level predictions
                df = pd.read_csv(csv_file)
                df['Input'] = input
                df['Model Name'] = model_names_dict[model_name]
                df['F1 (Frame)'] = df.iloc[1:]['F1'].astype(str)    
                df['Accuracy (Frame)'] = df.iloc[1:]['Accuracy'].astype(str)
                df['Jaccard (Frame)'] = df.iloc[1:]['Jaccard'].astype(str)
                df['Training Time (s)'] = df.iloc[0]['Train Time']
                df['Inference Rate (ms per frame/window)'] = df.iloc[1:]['Inference Rate'].astype(str)

                #Delete F1, Accuracy, Jaccard columns from the original dataframe
                df = df.drop(columns=['F1', 'Accuracy', 'Jaccard', 'Train Time', 'Inference Rate'])

                #Delete first row of df
                df = df.iloc[1:].reset_index(drop=True)

                #Add Window level predictions
                df2 = pd.read_csv(csv_file_2)
                df['F1 (Window)'] = df2['F1'].astype(str)
                df['Accuracy (Window)'] = df2['Accuracy'].astype(str)
                df['Jaccard (Window)'] = df2['Jaccard'].astype(str)

                #Concatenate the dataframes
                results_df = pd.concat([results_df, df], ignore_index=True)
        
        else:
            #Only window level predictions
            if os.path.exists(csv_file):
                df = pd.read_csv(csv_file)
                df['Input'] = input
                df['Model Name'] = model_names_dict[model_name]
                df['F1 (Window)'] = df.iloc[1:]['F1'].astype(str)
                df['Accuracy (Window)'] = df.iloc[1:]['Accuracy'].astype(str)
                df['Jaccard (Window)'] = df.iloc[1:]['Jaccard'].astype(str)
                df['Training Time (s)'] = df.iloc[0]['Train Time']  
                df['Inference Rate (ms per frame/window)'] = df.iloc[1:]['Inference Rate'].astype(str)  
                
                #Delete F1, Accuracy, Jaccard columns from the original dataframe
                df = df.drop(columns=['F1', 'Accuracy', 'Jaccard', 'Train Time', 'Inference Rate']) 

                #Delete first row of df
                df = df.iloc[1:].reset_index(drop=True)

                #Add Frame level predictions as NaN
                df['F1 (Frame)'] = pd.NA
                df['Accuracy (Frame)'] = pd.NA
                df['Jaccard (Frame)'] = pd.NA

                #Concatenate the dataframes
                results_df = pd.concat([results_df, df], ignore_index=True)
        
        
display(results_df)

#Save the results dataframe to a CSV file
output_csv = 'LOSO_summary_results_all.csv'
file_path = os.path.join(results_folder, output_csv)
results_df.to_csv(file_path, index=False)

Processing input: video
Processing model: SimpleCNN
Processing model: SimpleLSTM
Processing model: Siamese_CNN
Processing model: Siamese_LSTM
Processing model: TeCNo
Processing model: TransSVNet
Processing model: COG
Processing input: kinematics
Processing model: SimpleCNN
Processing model: SimpleLSTM
Processing model: Siamese_CNN
Processing model: Siamese_LSTM
Processing model: TeCNo
Processing model: TransSVNet
Processing model: COG
Processing input: multimodal
Processing model: SimpleCNN
Processing model: SimpleLSTM
Processing model: Siamese_CNN
Processing model: Siamese_LSTM
Processing model: TeCNo
Processing model: TransSVNet
Processing model: COG


Unnamed: 0,Input,Model Name,F1 (Window),Accuracy (Window),Jaccard (Window),F1 (Frame),Accuracy (Frame),Jaccard (Frame),Training Time (s),Inference Rate (ms per frame/window)
0,video,CNN,0.711 ± 0.047,0.676 ± 0.029,0.554 ± 0.058,,,,0.31 ± 0.01,0.86 ± 0.02
1,video,LSTM,0.670 ± 0.041,0.673 ± 0.024,0.537 ± 0.045,,,,0.95 ± 0.05,0.93 ± 0.06
2,video,Siamese CNN,0.674 ± 0.038,0.661 ± 0.028,0.510 ± 0.045,,,,4.41 ± 0.18,1.32 ± 0.01
3,video,Siamese LSTM,0.701 ± 0.021,0.667 ± 0.016,0.540 ± 0.025,,,,6.04 ± 0.10,1.65 ± 0.02
4,video,TeCNo,0.709 ± 0.042,0.673 ± 0.026,0.551 ± 0.052,0.684 ± 0.038,0.669 ± 0.025,0.521 ± 0.045,2.38 ± 0.12,67.95 ± 13.71
5,video,TransSVNet,0.681 ± 0.064,0.647 ± 0.046,0.520 ± 0.075,0.662 ± 0.058,0.647 ± 0.038,0.497 ± 0.067,15.35 ± 0.44,1.60 ± 0.20
6,video,COG,0.706 ± 0.057,0.666 ± 0.031,0.548 ± 0.066,0.687 ± 0.051,0.667 ± 0.025,0.525 ± 0.058,25.07 ± 0.56,1.95 ± 0.25
7,kinematics,CNN,0.700 ± 0.039,0.631 ± 0.033,0.539 ± 0.046,,,,0.33 ± 0.01,0.98 ± 0.06
8,kinematics,LSTM,0.69 ± 0.06,0.63 ± 0.02,0.54 ± 0.07,,,,0.30 ± 0.02,1.17 ± 0.07
9,kinematics,Siamese CNN,0.63 ± 0.03,0.58 ± 0.02,0.46 ± 0.04,,,,3.61 ± 0.04,1.42 ± 0.01


In [3]:
#Do the same but only with window level predictions
output_csv_window = 'LOSO_summary_results_window.csv'
file_path_window = os.path.join(results_folder, output_csv_window)
results_df_window = results_df.drop(columns=['F1 (Frame)', 'Accuracy (Frame)', 'Jaccard (Frame)'])
results_df_window.to_csv(file_path_window, index=False)
print(f'Results saved to {file_path}')

Results saved to ../results/LOSO_summary_results_all.csv


In [4]:
#Do the same but only with frame level predictions
output_csv_frame = 'LOSO_summary_results_frame.csv'
file_path_frame = os.path.join(results_folder, output_csv_frame)
results_df_frame = results_df.drop(columns=['F1 (Window)', 'Accuracy (Window)', 'Jaccard (Window)', 'Inference Rate (ms per frame/window)'])

#Also drop non-frame models (CNN, LSTM, Siamese CNN, Siamese LSTM)
results_df_frame = results_df_frame[~results_df_frame['Model Name'].isin(['CNN', 'LSTM', 'Siamese CNN', 'Siamese LSTM'])]

display(results_df_frame)
results_df_frame.to_csv(file_path_frame, index=False)
print(f'Results saved to {file_path_frame}')

Unnamed: 0,Input,Model Name,F1 (Frame),Accuracy (Frame),Jaccard (Frame),Training Time (s)
4,video,TeCNo,0.684 ± 0.038,0.669 ± 0.025,0.521 ± 0.045,2.38 ± 0.12
5,video,TransSVNet,0.662 ± 0.058,0.647 ± 0.038,0.497 ± 0.067,15.35 ± 0.44
6,video,COG,0.660 ± 0.054,0.607 ± 0.035,0.495 ± 0.063,25.48 ± 1.26
11,kinematics,TeCNo,0.679 ± 0.030,0.669 ± 0.029,0.514 ± 0.035,2.19 ± 0.04
12,kinematics,TransSVNet,0.630 ± 0.026,0.611 ± 0.011,0.461 ± 0.027,15.15 ± 0.33
13,kinematics,COG,0.535 ± 0.079,0.559 ± 0.063,0.369 ± 0.073,24.08 ± 0.50
18,multimodal,TeCNo,0.678 ± 0.039,0.668 ± 0.023,0.514 ± 0.046,2.49 ± 0.05
19,multimodal,TransSVNet,0.679 ± 0.041,0.655 ± 0.022,0.515 ± 0.048,15.49 ± 0.31
20,multimodal,COG,0.634 ± 0.050,0.530 ± 0.059,0.467 ± 0.052,25.14 ± 1.14


Results saved to ../results/LOSO_summary_results_frame.csv


### 1.2. Compute specific error prediction performance with best non-specific models

#### a. SimpleCNN + video data

In [27]:
import mlflow
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from MED.modeling.modeling_utils import retrieve_results_mlflow, window_predictions
from MED.dataset.dataset_utils import compute_window_size_stride
from sklearn.metrics import f1_score, accuracy_score, jaccard_score
import numpy as np
import pandas as pd

def compute_metrics(error_dict: dict,
                    all_labels: np.ndarray, 
                    all_preds: np.ndarray,
                    outs: list,
                    all_gest_labels: np.ndarray = None,
                    all_subjects: np.ndarray = None,
                    frame2window=False) -> pd.DataFrame:

    """
    Compute error-type specific F1, Accuracy and Jaccard scores.
    Args:
        labels (np.ndarray): True labels for the specific error type.
        preds (np.ndarray): Predicted labels for the specific error type.
    Returns:
        tuple: F1, Accuracy and Jaccard scores.
    """

    if not frame2window:

        results_df_index = [error for error in error_dict.keys() if error != 'global']
        results_df = pd.DataFrame(columns=['F1', 'Accuracy', 'Jaccard'], index=results_df_index)

        for error, error_position in error_dict.items():

            f1_scores = []
            acc_scores = []
            jaccard_scores = []
            samples = []
            
            #Compute F1, Accuracy and Jaccard for each fold
            for out in outs:

                labels_specific = np.array(all_labels[out])
                labels_specific = labels_specific[:, error_position]  # Get the specific error type labels
                preds = np.array(all_preds[out])

                f1 = f1_score(labels_specific, preds, average='binary')
                acc = accuracy_score(labels_specific, preds)
                jaccard = jaccard_score(labels_specific, preds, average='binary')

                f1_scores.append(f1)
                acc_scores.append(acc)
                jaccard_scores.append(jaccard)
                samples.append(len(labels_specific))

            #Fill df
            results_df.loc[error, 'F1'] = f"{np.average(f1_scores, weights=samples):.3f} ± {np.sqrt(np.average((f1_scores - np.average(f1_scores, weights=samples)) ** 2, weights=samples)):.3f}"
            results_df.loc[error, 'Accuracy'] = f"{np.average(acc_scores, weights=samples):.3f} ± {np.sqrt(np.average((acc_scores - np.average(acc_scores, weights=samples)) ** 2, weights=samples)):.3f}"
            results_df.loc[error, 'Jaccard'] = f"{np.average(jaccard_scores, weights=samples):.3f} ± {np.sqrt(np.average((jaccard_scores - np.average(jaccard_scores, weights=samples)) ** 2, weights=samples)):.3f}"


    else:
        window_size, stride = compute_window_size_stride(frequency=5)
        results_df = pd.DataFrame(columns=['F1', 'Accuracy', 'Jaccard'])

        for error, error_position in error_dict.items():
            f1_scores = []
            acc_scores = []
            jaccard_scores = []
            samples = []
            
            for out in outs:
                labels_specific = np.array(all_labels[out])
                labels_specific = labels_specific[:, error_position]
                preds = np.array(all_preds[out])

                #Window predictions
                predictions_windows, e_labels_windows, gestures_windows, subjects_windows = window_predictions(predictions = preds,
                    e_labels = labels_specific,
                    gestures = np.array(all_gest_labels[out]),
                    subjects = np.array(all_subjects[out]),
                    window_size= window_size, stride=stride)

                predictions_windows = np.array(predictions_windows)
                e_labels_windows = np.array(e_labels_windows)

                f1 = f1_score(e_labels_windows, predictions_windows, average='binary')
                acc = accuracy_score(e_labels_windows, predictions_windows)
                jaccard = jaccard_score(e_labels_windows, predictions_windows, average='binary')

                f1_scores.append(f1)
                acc_scores.append(acc)
                jaccard_scores.append(jaccard)
                samples.append(len(e_labels_windows))
            
            #Fill df
            results_df.loc[error, 'F1'] = f"{np.average(f1_scores, weights=samples):.3f} ± {np.sqrt(np.average((f1_scores - np.average(f1_scores, weights=samples)) ** 2, weights=samples)):.3f}"
            results_df.loc[error, 'Accuracy'] = f"{np.average(acc_scores, weights=samples):.3f} ± {np.sqrt(np.average((acc_scores - np.average(acc_scores, weights=samples)) ** 2, weights=samples)):.3f}"
            results_df.loc[error, 'Jaccard'] = f"{np.average(jaccard_scores, weights=samples):.3f} ± {np.sqrt(np.average((jaccard_scores - np.average(jaccard_scores, weights=samples)) ** 2, weights=samples)):.3f}"

    return results_df

In [12]:
#a. SimpleCNN + video data

error_dict = {
        'Out_Of_View': 0,
        'Needle_Drop': 1,
        'Multiple_Attempts': 2,
        'Needle_Position': 3,
        'global': -1
    }

run_id_video = "705d4490b0a642e1a9c231fdc0eb3bdf"
outs = ['1Out', '2Out', '3Out', '4Out', '5Out']
setting = 'LOSO'

exp_kwargs = {
    'model_name': "SimpleCNN",
    'dataset_type': "frame",
    'compute_from_str': True,
    'save_local': False}

#Retrieve results for video data
video_f1_train, video_f1_test, video_acc_train, video_acc_test, video_jaccard_train, video_jaccard_test, video_cm_train, video_cm_test, \
                test_all_preds_video, test_all_probs_video, test_all_labels_video, test_all_labels_specific_video,test_all_gest_labels_video, test_all_subjects_video = retrieve_results_mlflow(outs=outs,
                                                                                                                   setting=setting,
                                                                                                                   exp_kwargs=exp_kwargs,
                                                                                                                   run_id=run_id_video)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [13]:
#Compute number of windows that have more than 1 specific error type
for out in outs:
    print(len(test_all_labels_video[out]), len(test_all_labels_specific_video[out]))
    counter = 0
    counter_out_of_view = 0
    for i in range(len(test_all_labels_video[out])):
        if test_all_labels_video[out][i][:-1].sum() > 1:
            #print(f'Specific error types: {test_all_labels_specific_video[out][i]}')
            #print(f'Window {i} has more than 1 specific error type: {test_all_labels_video[out][i]}')
            #print(f'Gestures: {test_all_gest_labels_video[out][i]}')
            #print(f'Subject: {test_all_subjects_video[out][i]}')
            #print('---')
            counter += 1
            if test_all_labels_video[out][i][error_dict['Out_Of_View']] == 1:
                counter_out_of_view += 1

    print(f'Total number of windows with more than 1 specific error type: {counter}')
    print(f'Total number of windows with Out Of View error: {counter_out_of_view}')
    print('\n')

1078 1078
Total number of windows with more than 1 specific error type: 112
Total number of windows with Out Of View error: 35


776 776
Total number of windows with more than 1 specific error type: 59
Total number of windows with Out Of View error: 34


888 888
Total number of windows with more than 1 specific error type: 87
Total number of windows with Out Of View error: 56


870 870
Total number of windows with more than 1 specific error type: 48
Total number of windows with Out Of View error: 19


640 640
Total number of windows with more than 1 specific error type: 30
Total number of windows with Out Of View error: 17




In [14]:
results_df_video = compute_metrics(error_dict, test_all_labels_video, test_all_preds_video, outs)
display(results_df_video)

  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_l

Unnamed: 0,F1,Accuracy,Jaccard
Out_Of_View,0.391 ± 0.045,0.557 ± 0.064,0.244 ± 0.036
Needle_Drop,0.025 ± 0.018,0.452 ± 0.062,0.013 ± 0.009
Multiple_Attempts,0.541 ± 0.061,0.566 ± 0.016,0.373 ± 0.058
Needle_Position,0.214 ± 0.069,0.499 ± 0.034,0.122 ± 0.044
global,0.711 ± 0.047,0.676 ± 0.029,0.554 ± 0.058


#### b. Simple CNN + kinematics

In [15]:
#b. SimpleCNN + kinematics data
run_id_kinematics = "f4c962c1b73045d2aa1860a16abbac17"
kinematics_f1_train, kinematics_f1_test, kinematics_acc_train, kinematics_acc_test, kinematics_jaccard_train, kinematics_jaccard_test, \
        kinematics_cm_train, kinematics_cm_test, test_all_preds_kinematics, test_all_probs_kinematics, \
        test_all_labels_kinematics, test_all_labels_specific_kinematics, test_all_gest_labels_kinematics, test_all_subjects_kinematics = retrieve_results_mlflow(outs=outs,
                                                                                                                                            setting=setting,
                                                                                                                                            exp_kwargs=exp_kwargs,
                                                                                                                                            run_id=run_id_kinematics)

results_df_kinematics = compute_metrics(error_dict, test_all_labels_kinematics, test_all_preds_kinematics, outs)
display(results_df_kinematics)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_labels[out])
  labels_specific = np.array(all_l

Unnamed: 0,F1,Accuracy,Jaccard
Out_Of_View,0.325 ± 0.032,0.443 ± 0.056,0.195 ± 0.023
Needle_Drop,0.018 ± 0.014,0.350 ± 0.048,0.009 ± 0.007
Multiple_Attempts,0.560 ± 0.050,0.539 ± 0.024,0.391 ± 0.050
Needle_Position,0.170 ± 0.056,0.387 ± 0.032,0.094 ± 0.034
global,0.700 ± 0.039,0.631 ± 0.033,0.539 ± 0.046


#### c. COG + multimodal data

In [28]:
#c. COG + multimodal data

run_id = "fc159a057aba46ce81547822fdb282b0"
outs = ['1Out', '2Out', '3Out', '4Out', '5Out']
setting = 'LOSO'
exp_kwargs = {
    'model_name': "COG",
    'dataset_type': "frame",
    'compute_from_str': False,
    'save_local': False}

error_dict = {
    'Out_Of_View': 0,
    'Needle_Drop': 1,
    'Multiple_Attempts': 2,
    'Needle_Position': 3,
    'global': -1
}

#Frame-level predictions
multimodal_f1_train, multimodal_f1_test, multimodal_acc_train, multimodal_acc_test, multimodal_jaccard_train, multimodal_jaccard_test, \
    multimodal_cm_train, multimodal_cm_test, test_all_preds_multimodal, test_all_probs_multimodal, \
    test_all_labels_multimodal, test_all_labels_specific_multimodal, test_all_gest_labels_multimodal, test_all_subjects_multimodal = retrieve_results_mlflow(outs=outs,
                                                                                    setting=setting,
                                                                                    exp_kwargs=exp_kwargs,  
                                                                                    run_id=run_id)

results_df_multimodal_frame = compute_metrics(error_dict=error_dict,
                                        all_labels=test_all_labels_multimodal,
                                        all_preds=test_all_preds_multimodal,
                                        outs=outs,
                                        frame2window=False)

#Window-level predictions
results_df_multimodal_window = compute_metrics(error_dict=error_dict, 
                                        all_labels=test_all_labels_multimodal, 
                                        all_preds=test_all_preds_multimodal, 
                                        outs=outs,
                                        all_gest_labels=test_all_gest_labels_multimodal,
                                        all_subjects=test_all_subjects_multimodal,
                                        frame2window=True)
display(results_df_multimodal_frame)
display(results_df_multimodal_window)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.array(e_labels_windows)
  predictions_windows = np.array(predictions_windows)
  e_labels_windows = np.arra

Unnamed: 0,F1,Accuracy,Jaccard
Out_Of_View,0.365 ± 0.038,0.512 ± 0.054,0.224 ± 0.029
Needle_Drop,0.018 ± 0.014,0.408 ± 0.044,0.009 ± 0.007
Multiple_Attempts,0.531 ± 0.062,0.560 ± 0.026,0.364 ± 0.057
Needle_Position,0.207 ± 0.057,0.461 ± 0.039,0.117 ± 0.036
global,0.709 ± 0.033,0.672 ± 0.027,0.550 ± 0.040


Unnamed: 0,F1,Accuracy,Jaccard
Out_Of_View,0.357 ± 0.039,0.484 ± 0.055,0.218 ± 0.029
Needle_Drop,0.020 ± 0.015,0.376 ± 0.042,0.010 ± 0.008
Multiple_Attempts,0.573 ± 0.053,0.562 ± 0.025,0.403 ± 0.051
Needle_Position,0.208 ± 0.066,0.434 ± 0.040,0.118 ± 0.042
global,0.733 ± 0.033,0.679 ± 0.029,0.580 ± 0.042
