In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sb

In [None]:
# Modalities are as in Supplementary Figure 5, in case you want to plot combined modalities, uncomment the line. 
modalities = ['mobc_es_op', 'cp_es_op', 'ge_es_op']
#modalities = ['mobc_es_op', 'cp_es_op', 'ge_es_op' 'late_fusion_cs_ge', 'late_fusion_cs_ge_mobc', 'late_fusion_cs_mobc', 'late_fusion_ge_mobc']

modalities_dict = {'mobc_es_op':'MO', 'cp_es_op':'CS', 'ge_es_op':'GE', 'late_fusion_cs_ge':'CS+GE', 'late_fusion_cs_ge_mobc':'CS+GE+MO', 'late_fusion_cs_mobc':'CS+MO', 'late_fusion_ge_mobc':'GE+MO'}
colors_dict = {'MO':'#6aa84f', 'CS':'#f1c232', 'GE':'#4a86e8', 'CS+GE':'#f8766d', 'CS+GE+MO':'#7cae00', 'CS+MO':'#00bfc4', 'GE+MO':'#c77cff'}

In [None]:
assay_file = '../data/assay_matrix_discrete_270_assays.csv'
median_aucs_file = '../predictions/scaffold_median_AUC.csv'
assay_df = pd.read_csv(assay_file)
median_aucs_df = pd.read_csv(median_aucs_file).set_index('assay_id')

#Filter, leave only certain modalities + late fusion
median_aucs_df = median_aucs_df[median_aucs_df['descriptor'].isin(modalities)]
median_aucs_df['descriptor'].replace(modalities_dict, inplace=True)

assays = assay_df.columns[1:]

readouts = {}
hits = {}
for a in assays:
    readouts[a] = np.count_nonzero(~np.isnan(assay_df[a].to_numpy()))
    hits[a] = np.nansum(assay_df[a].to_numpy().astype(np.float))

    
readouts_df = pd.Series(readouts, name="readouts").to_frame()
hits_df = pd.Series(hits, name="hits").to_frame()
median_aucs_df_readouts_hits_df  = pd.merge(median_aucs_df, readouts_df, left_index=True, right_index=True)
median_aucs_df_readouts_hits_df  = pd.merge(median_aucs_df_readouts_hits_df, hits_df, left_index=True, right_index=True)

sb.lmplot(x="readouts", y="auc", hue="descriptor", data=median_aucs_df_readouts_hits_df, height = 9,aspect = 1.5, palette=colors_dict, fit_reg=True)
plt.show()
sb.lmplot(x="hits", y="auc", hue="descriptor", data=median_aucs_df_readouts_hits_df, height = 9,aspect = 1.5, palette=colors_dict, fit_reg=True)
plt.show()

In [None]:
grid = sb.lmplot(x="readouts", y="auc", hue="descriptor", data=median_aucs_df_readouts_hits_df, height = 9,aspect = 1.5, palette=colors_dict, fit_reg=False)
grid.set(xscale="log")
plt.show()
grid = sb.lmplot(x="hits", y="auc", hue="descriptor", data=median_aucs_df_readouts_hits_df, height = 9,aspect = 1.5, palette=colors_dict, fit_reg=False)
grid.set(xscale="log")
plt.show()

In [None]:
median_auc = '../predictions/scaffold_median_AUC.csv'
median_ef = '../predictions/scaffold_median_EF.csv'
median_auc_df = pd.read_csv(median_auc)
median_ef_df = pd.read_csv(median_ef)

In [None]:
median_auc_ef_df = pd.merge(median_auc_df, median_ef_df, on=['assay_id', 'descriptor'])

In [None]:
colors_dict = {'mobc_es_op':'#6aa84f', 'cp_es_op':'#f1c232', 'ge_es_op':'#4a86e8'}
sb.set(rc={'figure.figsize':(11.7,8.27)})
sb.set_style("white")
for ef_th in ["EF1%","EF5%","EF10%"]:
    for mod in colors_dict.keys():
        sb.scatterplot(x="auc", y=ef_th, data=median_auc_ef_df[median_auc_ef_df.descriptor == mod], hue="descriptor", palette=colors_dict, legend=False)
        plt.show()