In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, cohen_kappa_score, accuracy_score, f1_score
from sklearn.metrics import precision_score, recall_score, classification_report


In [24]:
list_of_summaries = []

for sub_no in range(1,129):
    data = pd.read_csv(f'Dataset_clean_for_jupyter\\sub-{sub_no}\\eeg\\sub-{sub_no}_majority_aiPSG_aiHB_events.tsv', sep='\t')
    # print(data.info()) # for checking null values
    # Calculate simple agreement percentages
    agreement_human_psg = np.mean(data['majority'] == data['ai_psg']) * 100
    agreement_human_hb = np.mean(data['majority'] == data['ai_hb']) * 100
    agreement_psg_hb = np.mean(data['ai_psg'] == data['ai_hb']) * 100
    # Calculate Cohen's Kappa for both devices vs human labels
    kappa_psg = cohen_kappa_score(data['majority'], data['ai_psg'])
    kappa_hb = cohen_kappa_score(data['majority'], data['ai_hb'])
    kappa_hb_psg = cohen_kappa_score(data['ai_psg'], data['ai_hb'])
    # Analyze how well each device captures transitions between sleep stages
    def analyze_transitions(true_labels, pred_labels):
        transitions_true = sum(1 for i in range(1, len(true_labels)) if true_labels[i] != true_labels[i-1])
        transitions_pred = sum(1 for i in range(1, len(pred_labels)) if pred_labels[i] != pred_labels[i-1])
        
        # Calculate detected transitions
        detected_transitions = 0
        for i in range(1, len(true_labels)):
            if true_labels[i] != true_labels[i-1] and pred_labels[i] != pred_labels[i-1]:
                detected_transitions += 1
        
        transition_recall = detected_transitions / transitions_true if transitions_true > 0 else 0
        
        return {
            'true_transitions': transitions_true,
            'predicted_transitions': transitions_pred,
            'detected_transitions': detected_transitions,
            'transition_recall': transition_recall
        }
    
    psg_transitions = analyze_transitions(data['majority'].values, data['ai_psg'].values)
    hb_transitions = analyze_transitions(data['majority'].values, data['ai_hb'].values)
    hb_psg_transitions = analyze_transitions(data['ai_psg'].values, data['ai_hb'].values)

    # Create a summary table of all metrics
    summary = pd.DataFrame({
        'Metric': [
            'Overall Agreement (%)', 
            'Cohen\'s Kappa',
            'Transition Detection Rate'
        ],
        'PSG': [
            agreement_human_psg,
            kappa_psg,
            psg_transitions['transition_recall']
        ],
        'Headband': [
            agreement_human_hb,
            kappa_hb,
            hb_transitions['transition_recall']
        ],
        'HB_PSG': [
            agreement_psg_hb,
            kappa_hb,
            hb_psg_transitions['transition_recall']
        ]
    })
    
    df_name = f"summary_{sub_no}"
    globals()[df_name] = summary
    list_of_summaries.append(summary)



In [25]:
list_of_summaries[19]

Unnamed: 0,Metric,PSG,Headband,HB_PSG
0,Overall Agreement (%),88.358779,84.160305,83.778626
1,Cohen's Kappa,0.804323,0.735611,0.735611
2,Transition Detection Rate,0.390476,0.333333,0.247191


In [42]:
import pandas as pd
import numpy as np
import os
import glob

# Create a function to combine all summary matrices
def combine_summary_matrices(summary_list=None):
    """
    Combine multiple summary comparison matrices into one final matrix.
    
    Returns:
    --------
    pandas.DataFrame
        Combined summary matrix with average values
    """
    all_summaries = []
    
    # If list of dataframes is provided
    if summary_list is not None:
        all_summaries = summary_list
    else:
        raise ValueError("No summary matrices found to combine")
        
    # Initialize the combined matrix with the same structure
    metrics = all_summaries[0]['Metric'].tolist()
    combined_summary = pd.DataFrame({
        'Metric': metrics,
        'PSG': np.zeros(len(metrics)),
        'Headband': np.zeros(len(metrics)),
        'HB_PSG': np.zeros(len(metrics))
    })
    
    # Calculate mean for each metric
    for metric in metrics:
        # Extract values for this metric across all summaries
        psg_values = [df.loc[df['Metric'] == metric, 'PSG'].values[0] for df in all_summaries]
        hb_values = [df.loc[df['Metric'] == metric, 'Headband'].values[0] for df in all_summaries]
        hb_psg_values = [df.loc[df['Metric'] == metric, 'HB_PSG'].values[0] for df in all_summaries]

        
        # Calculate mean
        combined_summary.loc[combined_summary['Metric'] == metric, 'PSG'] = np.mean(psg_values)
        combined_summary.loc[combined_summary['Metric'] == metric, 'Headband'] = np.mean(hb_values)
        combined_summary.loc[combined_summary['Metric'] == metric, 'HB_PSG'] = np.mean(hb_psg_values)

    
    # Calculate standard deviations
    combined_summary['PSG_STD'] = 0.0
    combined_summary['Headband_STD'] = 0.0
    combined_summary['HB_PSG_STD'] = 0.0
    
    
    for metric in metrics:
        psg_values = [df.loc[df['Metric'] == metric, 'PSG'].values[0] for df in all_summaries]
        hb_values = [df.loc[df['Metric'] == metric, 'Headband'].values[0] for df in all_summaries]
        hb_psg_values = [df.loc[df['Metric'] == metric, 'HB_PSG'].values[0] for df in all_summaries]

        combined_summary.loc[combined_summary['Metric'] == metric, 'PSG_STD'] = np.std(psg_values)
        combined_summary.loc[combined_summary['Metric'] == metric, 'Headband_STD'] = np.std(hb_values)
        combined_summary.loc[combined_summary['Metric'] == metric, 'HB_PSG_STD'] = np.std(hb_psg_values)
    
    return combined_summary



# def format_final_matrix(final_matrix):
#     """Format the final matrix for better readability"""
#     formatted = final_matrix.copy()

#     def format_percentage(x):
#         return f"{x:.2f}%"

#     def format_percentage_std(x):
#         return f"\u00B1 {x:.2f}%"

#     def format_metric(x):
#         return f"{x:.4f}"

#     def format_metric_std(x):
#         return f"\u00B1 {x:.4f}"

#     # Format percentages for Overall Agreement
#     mask_overall = formatted['Metric'] == 'Overall Agreement (%)'
#     formatted.loc[mask_overall, 'PSG'] = formatted.loc[mask_overall, 'PSG'].apply(format_percentage)
#     formatted.loc[mask_overall, 'PSG_STD'] = formatted.loc[mask_overall, 'PSG_STD'].apply(format_percentage_std)
#     formatted.loc[mask_overall, 'Headband'] = formatted.loc[mask_overall, 'Headband'].apply(format_percentage)
#     formatted.loc[mask_overall, 'Headband_STD'] = formatted.loc[mask_overall, 'Headband_STD'].apply(format_percentage_std)
#     formatted.loc[mask_overall, 'HB_PSG'] = formatted.loc[mask_overall, 'HB_PSG'].apply(format_percentage)
#     formatted.loc[mask_overall, 'HB_PSG_STD'] = formatted.loc[mask_overall, 'HB_PSG_STD'].apply(format_percentage_std)

#     # Format other metrics
#     for metric in ['Cohen\'s Kappa', 'Transition Detection Rate']:
#         mask_metric = formatted['Metric'] == metric
#         formatted.loc[mask_metric, 'PSG'] = formatted.loc[mask_metric, 'PSG'].apply(format_metric)
#         formatted.loc[mask_metric, 'PSG_STD'] = formatted.loc[mask_metric, 'PSG_STD'].apply(format_metric_std)
#         formatted.loc[mask_metric, 'Headband'] = formatted.loc[mask_metric, 'Headband'].apply(format_metric)
#         formatted.loc[mask_metric, 'Headband_STD'] = formatted.loc[mask_metric, 'Headband_STD'].apply(format_metric_std)
#         formatted.loc[mask_metric, 'HB_PSG'] = formatted.loc[mask_metric, 'HB_PSG'].apply(format_metric)
#         formatted.loc[mask_metric, 'HB_PSG_STD'] = formatted.loc[mask_metric, 'HB_PSG_STD'].apply(format_metric_std)
       
#     return formatted

def format_final_matrix(final_matrix):
    """Format the final matrix for better readability"""
    formatted = final_matrix.copy()

    def format_combined(value, std, is_percentage=False):
        """Formats value and std into a combined string."""
        if is_percentage:
            return f"{value:.2f}% \u00B1 {std:.2f}%"
        else:
            return f"{value:.4f} \u00B1 {std:.4f}"

    # Format percentages for Overall Agreement
    mask_overall = formatted['Metric'] == 'Overall Agreement (%)'
    if not mask_overall.empty:
        formatted.loc[mask_overall, 'PSG'] = formatted.loc[mask_overall, ['PSG', 'PSG_STD']].apply(
            lambda row: format_combined(row['PSG'], row['PSG_STD'], is_percentage=True), axis=1)
        formatted.loc[mask_overall, 'Headband'] = formatted.loc[mask_overall, ['Headband', 'Headband_STD']].apply(
            lambda row: format_combined(row['Headband'], row['Headband_STD'], is_percentage=True), axis=1)
        formatted.loc[mask_overall, 'HB_PSG'] = formatted.loc[mask_overall, ['HB_PSG', 'HB_PSG_STD']].apply(
            lambda row: format_combined(row['HB_PSG'], row['HB_PSG_STD'], is_percentage=True), axis=1)

    # Format other metrics
    for metric in ['Cohen\'s Kappa', 'Transition Detection Rate']:
        mask_metric = formatted['Metric'] == metric
        if not mask_metric.empty:
            formatted.loc[mask_metric, 'PSG'] = formatted.loc[mask_metric, ['PSG', 'PSG_STD']].apply(
                lambda row: format_combined(row['PSG'], row['PSG_STD']), axis=1)
            formatted.loc[mask_metric, 'Headband'] = formatted.loc[mask_metric, ['Headband', 'Headband_STD']].apply(
                lambda row: format_combined(row['Headband'], row['Headband_STD']), axis=1)
            formatted.loc[mask_metric, 'HB_PSG'] = formatted.loc[mask_metric, ['HB_PSG', 'HB_PSG_STD']].apply(
                lambda row: format_combined(row['HB_PSG'], row['HB_PSG_STD']), axis=1)

    # Drop the STD columns
    formatted = formatted.drop(columns=['PSG_STD', 'Headband_STD', 'HB_PSG_STD'])

    return formatted

In [43]:
final_matrix1 = combine_summary_matrices(summary_list=list_of_summaries)

In [44]:
final_Result = format_final_matrix(final_matrix1)

  formatted.loc[mask_overall, 'PSG'] = formatted.loc[mask_overall, ['PSG', 'PSG_STD']].apply(
  formatted.loc[mask_overall, 'Headband'] = formatted.loc[mask_overall, ['Headband', 'Headband_STD']].apply(
  formatted.loc[mask_overall, 'HB_PSG'] = formatted.loc[mask_overall, ['HB_PSG', 'HB_PSG_STD']].apply(


In [45]:
final_Result

Unnamed: 0,Metric,PSG,Headband,HB_PSG
0,Overall Agreement (%),86.63% ± 6.79%,83.02% ± 11.18%,85.35% ± 10.19%
1,Cohen's Kappa,0.7662 ± 0.1072,0.7149 ± 0.1467,0.7149 ± 0.1467
2,Transition Detection Rate,0.4059 ± 0.1179,0.3307 ± 0.1121,0.3497 ± 0.1238
