In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy.stats import spearmanr
if os.getcwd().endswith('ALPACA-paper'):
    os.chdir('bin/ANALYSIS')
primary_metrics_per_tumour = pd.read_csv(f'../../_assets/runtime_input.csv')
features = ['Number of clones','Number of segments','Ploidy','Purity']
time_colname = 'Runtime in hours'

sns.set(style="whitegrid")
plt.figure(figsize=(10,10))
font_size = 16
plt.rcParams.update({'font.size': font_size})
sns.set(font_scale=1.3)
for idx,feature in enumerate(features):
    plt.subplot(2,2,idx+1)
    sns.scatterplot(data=primary_metrics_per_tumour,y=time_colname,x=feature)
    plt.ylabel('Runtime per tumour (h)')
    plt.xlabel(feature)
    rho, p_value = spearmanr(primary_metrics_per_tumour[feature], primary_metrics_per_tumour[time_colname])
    print(f'{feature} vs {time_colname}: rho={rho}, p={p_value}')
    # annotate the plot with the correlation coefficient:
    plt.annotate(f'Rho = {rho:.2f}', xy=(0.05, 0.95), xycoords='axes fraction', fontsize=font_size, ha='left', va='top', color='black')
    plt.annotate(f'P = {p_value:.3e}', xy=(0.05, 0.9), xycoords='axes fraction', fontsize=font_size, ha='left', va='top', color='black')
plt.tight_layout()


plt.savefig(f'../../figures/Suppfig7g_runtime_vs_features.pdf',dpi=300)