In [7]:
import pandas as pd
import numpy as np
from io import StringIO
# Load the data
data = '''Dataset_Seq SIMPLE STAPLE CTC Baseline Best_Ind_ChP_CTC Segments
BF-C2DL-HSC_01 0.898 0.895 0.898 0.895 0.902 80
BF-C2DL-HSC_02 0.853 0.853 0.853 0.847 0.886 48
BF-C2DL-MuSC_01 0.820 0.823 0.833 0.814 0.871 39
BF-C2DL-MuSC_02 0.784 0.784 0.784 0.776 0.836 80
DIC-C2DH-HeLa_01 0.961 0.961 0.965 0.961 0.966 98
DIC-C2DH-HeLa_02 0.961 0.961 0.965 0.952 0.966 97
Fluo-C2DL-MSC_01 0.779 0.766 0.792 0.729 0.796 112
Fluo-C2DL-MSC_02 0.762 0.746 0.817 0.762 0.837 116
Fluo-C3DH-A549_01 0.975 0.991 0.991 0.991 - -
Fluo-C3DH-A549_02 0.954 0.991 0.991 0.991 - -
Fluo-C3DL-MDA231_01 0.729 0.718 0.739 0.684 0.661 120
Fluo-C3DL-MDA231_02 0.734 0.738 0.752 0.705 0.738 103
Fluo-N2DH-GOWT1_01 0.913 0.920 0.933 0.913 0.941 144
Fluo-N2DH-GOWT1_02 0.976 0.976 0.976 0.954 0.977 121
Fluo-N2DL-HeLa_01 0.874 0.872 0.884 0.860 0.904 491
Fluo-N2DL-HeLa_02 0.901 0.898 0.909 0.891 0.925 895
Fluo-N3DH-CHO_01 0.867 0.853 0.883 0.852 0.927 115
Fluo-N3DH-CHO_02 0.933 0.928 0.931 0.920 0.957 117
Fluo-N3DH-CE_01 N/A N/A 0.705 0.745 0.785 101
Fluo-N3DH-CE_02 N/A N/A 0.711 0.725 0.771 100
Fluo-C3DH-H157_01 0.845 0.891 0.901 0.891 0.891 119
Fluo-C3DH-H157_02 0.799 0.779 0.824 0.779 0.879 99
PhC-C2DL-PSC_01 0.779 0.779 0.785 0.764 0.818 242
PhC-C2DL-PSC_02 0.780 0.780 0.784 0.779 0.819 254
PhC-C2DH-U373_01 0.949 0.948 0.951 0.947 0.953 100
PhC-C2DH-U373_02 0.884 0.882 0.888 0.880 0.897 110'''

# Convert string data to DataFrame
df = pd.read_csv(StringIO(data), delimiter=' ')

# Remove rows where Best_Ind_ChP_CTC is '-'
df = df[df['Best_Ind_ChP_CTC'] != '-']

# Replace 'N/A' with NaN
df = df.replace('N/A', np.nan)

# Convert columns to float
for col in ['SIMPLE', 'STAPLE', 'CTC', 'Baseline', 'Best_Ind_ChP_CTC']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Calculate the maximum of other methods for each row
df['Max_Others'] = df[['SIMPLE', 'STAPLE', 'CTC', 'Baseline']].max(axis=1)
df['Best_Method'] = df[['SIMPLE', 'STAPLE', 'CTC', 'Baseline']].idxmax(axis=1)

# Calculate improvement percentage
df['Improvement'] = ((df['Best_Ind_ChP_CTC'] - df['Max_Others']) / df['Max_Others'] * 100)

# Sort by improvement
df_sorted = df.sort_values('Improvement', ascending=False)

# Create a formatted results table
results = pd.DataFrame({
    'Dataset': df_sorted['Dataset_Seq'],
    'Best_Ind_ChP_CTC': df_sorted['Best_Ind_ChP_CTC'].round(3),
    'Previous_Best': df_sorted['Max_Others'].round(3),
    'Best_Method': df_sorted['Best_Method'],
    'Improvement_%': df_sorted['Improvement'].round(2)
})

# Print the results with proper formatting
print("\nPerformance Analysis (sorted by improvement):")
print("="*100)
print(results.to_string(index=False))
print("\nSummary:")
print("-"*50)
print(f"Number of datasets where Best_Ind_ChP_CTC is better: {(df_sorted['Improvement'] > 0).sum()}")
print(f"Number of datasets where Best_Ind_ChP_CTC is worse: {(df_sorted['Improvement'] < 0).sum()}")
print(f"Average improvement where better: {df_sorted[df_sorted['Improvement'] > 0]['Improvement'].mean():.2f}%")



Performance Analysis (sorted by improvement):
            Dataset  Best_Ind_ChP_CTC  Previous_Best Best_Method  Improvement_%
  Fluo-C3DH-H157_02             0.879          0.824         CTC           6.67
    BF-C2DL-MuSC_02             0.836          0.784      SIMPLE           6.63
    Fluo-N3DH-CE_02             0.771          0.725    Baseline           6.34
    Fluo-N3DH-CE_01             0.785          0.745    Baseline           5.37
   Fluo-N3DH-CHO_01             0.927          0.883         CTC           4.98
    BF-C2DL-MuSC_01             0.871          0.833         CTC           4.56
    PhC-C2DL-PSC_02             0.819          0.784         CTC           4.46
    PhC-C2DL-PSC_01             0.818          0.785         CTC           4.20
     BF-C2DL-HSC_02             0.886          0.853      SIMPLE           3.87
   Fluo-N3DH-CHO_02             0.957          0.933      SIMPLE           2.57
   Fluo-C2DL-MSC_02             0.837          0.817         CTC         