In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt

In [None]:
file_path = 'scores_scaffold_nonllps.csv'
data = pd.read_csv(file_path)

In [None]:
def parse_scores(json_str):
    return json.loads(json_str.replace("'", "\""))

data['Scores'] = data['Scores'].apply(parse_scores)
data['PR-AUC'] = data['Scores'].apply(lambda x: x['prauc'])
prauc_means = data.groupby(['Dimension', 'Model'])['PR-AUC'].mean().reset_index()
prauc_means.head()

In [None]:
prauc_stats = data.groupby(['Dimension', 'Model'])['PR-AUC'].agg(['mean', 'std']).reset_index()

In [None]:
prauc_stats['Dimension'] = prauc_stats['Dimension'].astype(str)

dimension_order = ['128', '64', '32', '16', '8', '4']

plt.figure(figsize=(6, 4))

markers = {
    'nn': 'o',  
    'svm': 's',  
    'rf': '^',  
    'hgbc': 'D',  
}

unique_models = prauc_stats['Model'].unique()

for model in unique_models:
    model_data = prauc_stats[prauc_stats['Model'] == model]
    model_data = model_data.set_index('Dimension').reindex(dimension_order).reset_index()
    plt.plot(model_data['Dimension'], model_data['mean'], 
             marker=markers.get(model, 'o'),
             label=model.upper())

plt.title('Average PR-AUC by Model and Input Dimension')
plt.xlabel('Input Dimension')
plt.ylabel('Average PR-AUC')
plt.xticks(ticks=dimension_order, labels=dimension_order)
plt.legend(title='Model', loc='upper right')

plt.tight_layout()
plt.savefig("fig_S3_a.pdf")
plt.show()

In [None]:
file_path = 'scores_scaffold_client.csv'
data = pd.read_csv(file_path)

In [None]:
data['Scores'] = data['Scores'].apply(parse_scores)
data['PR-AUC'] = data['Scores'].apply(lambda x: x['prauc'])
prauc_means = data.groupby(['Dimension', 'Model'])['PR-AUC'].mean().reset_index()
prauc_means.head()

In [None]:
prauc_stats = data.groupby(['Dimension', 'Model'])['PR-AUC'].agg(['mean', 'std']).reset_index()

In [None]:
prauc_stats['Dimension'] = prauc_stats['Dimension'].astype(str)

dimension_order = ['128', '64', '32', '16', '8', '4']

plt.figure(figsize=(6, 4))

markers = {
    'nn': 'o',  
    'svm': 's',  
    'rf': '^',  
    'hgbc': 'D',  
}

unique_models = prauc_stats['Model'].unique()

for model in unique_models:
    model_data = prauc_stats[prauc_stats['Model'] == model]
    model_data = model_data.set_index('Dimension').reindex(dimension_order).reset_index()
    plt.plot(model_data['Dimension'], model_data['mean'], 
             marker=markers.get(model, 'o'),
             label=model.upper())

plt.title('Average PR-AUC by Model and Input Dimension')
plt.xlabel('Input Dimension')
plt.ylabel('Average PR-AUC')
plt.xticks(ticks=dimension_order, labels=dimension_order)
plt.legend(title='Model', loc='upper right')

plt.tight_layout()
plt.savefig("fig_S3_b.pdf")
plt.show()