In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
from collections import Counter

In [None]:
def ArgparseEFFile(path,method = None):
    data = pd.read_excel(path)
    data.columns = ['TARGET', 'AUROC', 'BEDROC', f'0.5%EF', f'1.0%EF', f'5.0%EF']
    data['TARGET'] = data['TARGET'].apply(lambda x : str(x).lower())
    if method is None:
        data['Method'] = [path.split('_')[-1].split('.')[0]]*len(data)
    else:
        data['Method'] = [method]*len(data)
    return data

def GetTargetInfo(path):
    data_informations = pd.read_excel(path)
    data_family = data_informations.loc[:,['Target Name','Classification']]
    data_family.columns = ['TARGET','family']
    data_family['TARGET'] = data_family['TARGET'].apply(lambda x : x.lower())
    return data_family
def GetPandasDataEF(data,):
    name = []
    efs = []
    method = []
    for col in data.columns:
        if 'EF' in col:
            name.extend([col]*len(data))
            efs.extend(data[col].values)

            method.extend(data['Method'].values)
    efs_df = pd.DataFrame({'name':name,
                        'efs':efs,
                        'method':method})
    return efs_df
def GetPandasDataROC(data):
    name = []
    efs = []
    method = []
    for col in data.columns:
        if 'ROC' in col:
            name.extend([col]*len(data))
            efs.extend(data[col].values)

            method.extend(data['Method'].values)
    efs_df = pd.DataFrame({'name':name,
                        'efs':efs,
                        'method':method})
    efs_df = efs_df.fillna(0.0)
    return efs_df

In [None]:
data_list = [
    'dekois_AutoDock Vina.xlsx',
    'dekois2_VINA_EquiScore.xlsx',
    'dekois_GOLD.xlsx',
    'dekois2_GOLD_EquiScore.xlsx',
    'dekois_Surflex-Dock.xlsx',
    'dekois2_SURFLEX_EquiScore.xlsx',
    'dekois_LeDock.xlsx',
    'dekois2_LEDOCK_EquiScore.xlsx',
    'dekois_GLIDE SP.xlsx',
    'dekois2_SP_EquiScore.xlsx', 
]

datas = []
methods = ['AutoDock Vina','AutoDock Vina + EquiScore','GOLD','GOLD + EquiScore','Surflex-Dock','Surflex-Dock + EquiScore',\
    'LeDock','LeDock + EquiScore','GLIDE SP','GLIDE SP + EquiScore',]
for data_path ,method in zip(data_list,methods):
    data_raw = ArgparseEFFile(data_path,method)


    datas.append(data_raw)
datas = pd.concat(datas,axis = 0)
efs_df =  GetPandasDataEF(datas)
efs_df['dock_method'] = efs_df['method'].apply(lambda x: x if '+' not in x else x.split(' +')[0])
efs_df['With EquiScore'] = efs_df['method'].apply(lambda x: 'NO' if '+' not in x else 'YES')

In [None]:
color_styles = ['deep', 'muted', 'pastel', 'bright', 'dark', 'colorblind']
# for color in coloyles:r_st
import random
random.seed(42)
np.random.seed(42)
current_palette = sns.color_palette('pastel')

current_palette[0] = current_palette[5]
current_palette[1] = current_palette[4]

In [None]:
efs_df

In [None]:
current_palette

In [None]:
sns.set_theme(style="ticks", palette=current_palette)
figure = plt.figure(figsize = (12,8))
fig = sns.boxplot(x="dock_method", y="efs",hue='With EquiScore',showmeans=True,meanprops={"marker":"o",
                       "markerfacecolor":"white", 
                       "markeredgecolor":"black",
                      "markersize":"5"},
            data=efs_df[efs_df['name'] == f'1.0%EF'])

plt.ylabel("Enrichment Factor(0.5%)",fontsize = 20)
plt.yticks(fontsize = 16)
plt.xlabel('Method',fontsize = 20,labelpad=12)
plt.xticks(fontsize = 16)

In [None]:

efs_df_ROC =  GetPandasDataROC(datas)
efs_df_ROC['dock_method'] = efs_df_ROC['method'].apply(lambda x: x if '+' not in x else x.split(' +')[0])
efs_df_ROC['With EquiScore'] = efs_df_ROC['method'].apply(lambda x: 'NO' if '+' not in x else 'YES')

In [None]:
sns.set_theme(style="ticks", palette=current_palette)
figure = plt.figure(figsize = (12,8))

fig = sns.boxplot(x="dock_method", y="efs",hue='With EquiScore',showmeans=True,meanprops={"marker":"o",
                       "markerfacecolor":"white", 
                       "markeredgecolor":"black",
                      "markersize":"5"},
            data=efs_df_ROC[efs_df_ROC['name'] == f'AUROC'])

plt.ylabel("BEDROC",fontsize = 20)
plt.yticks(fontsize = 16)
plt.xlabel('Method',fontsize = 20,labelpad=12)
plt.xticks(fontsize = 16)