In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
import matplotlib.patches as mpatches
import matplotlib.ticker as plticker
import seaborn as sns
import os
from statistics import mean
import ast
import scipy.stats as sc_stats
from math import isnan

In [2]:
def plot_PC_heatmap(df, title, name):
    
    fig, axs = plt.subplots(figsize=(14, 11), dpi=300)
    sns.set(font_scale = 2)

    a = sns.heatmap(df, annot=True, fmt=".2f", linewidths = 0.1, 
                cmap='viridis_r', vmin=0.7, vmax=0.84, 
                square=True,
                cbar=True, 
                cbar_kws={'label': 'Pearson correlation coefficient',
                "ticks":[0.7, 0.72, 0.74, 0.76, 0.78, 0.8, 0.82, 0.84]})

    a.set_title(title, fontsize = 22, weight='bold', y = 1.02)
    a.set_ylabel("")
    
    #a.tick_params(axis='y', which='minor', rotation=90)
    
    a.set_xticklabels(a.get_xmajorticklabels(), fontsize = 20, rotation = 90, weight="bold")
    a.set_yticklabels(a.get_ymajorticklabels(), fontsize = 20, rotation = 0, weight="bold")
    
    #plt.setp(a.get_yticklabels(), rotation=90)
    
    fig.tight_layout()
    fig.savefig(name+"_performance.png", transparent=False, facecolor="w", dpi=300)
    plt.close()
    
def plot_RMSE_heatmap(df, title, name):
    
    fig, axs = plt.subplots(figsize=(14, 11), dpi=300)
    sns.set(font_scale = 2)

    a = sns.heatmap(df, annot=True, fmt=".2f", linewidths = 0.1, 
                cmap='viridis_r', vmin=1.2, vmax=1.5, 
                square=True,
                cbar=True, 
                cbar_kws={'label': 'RMSE [pK]',
                "ticks":[1.20, 1.25, 1.30, 1.35, 1.40, 1.45, 1.50]})

    a.set_title(title, fontsize = 22, weight='bold', y = 1.02)
    a.set_ylabel("")
    
    #a.tick_params(axis='y', which='minor', rotation=90)
    
    a.set_xticklabels(a.get_xmajorticklabels(), fontsize = 20, rotation = 90, weight="bold")
    a.set_yticklabels(a.get_ymajorticklabels(), fontsize = 20, rotation = 0, weight="bold")
    
    #plt.setp(a.get_yticklabels(), rotation=90)
    
    fig.tight_layout()
    fig.savefig(name+"_RMSE_performance.png", transparent=False, facecolor="w", dpi=300)
    plt.close()

In [3]:
#get data
data_folder = "data/"

df_crystal = pd.read_csv(data_folder+"casf2016_ensembles_crystal.csv", index_col=0)
df_docked = pd.read_csv(data_folder+"casf2016_ensembles_docked.csv", index_col=0)
df_lig = pd.read_csv(data_folder+"casf2016_ensembles_ligand.csv", index_col=0)
df_ens = pd.read_csv(data_folder+"casf2016_multi-model_ensembles.csv", index_col=0)

df_crystal_RMSE = pd.read_csv(data_folder+"casf2016_ensembles_crystal_RMSE.csv", index_col=0)
df_docked_RMSE = pd.read_csv(data_folder+"casf2016_ensembles_docked_RMSE.csv", index_col=0)
df_lig_RMSE = pd.read_csv(data_folder+"casf2016_ensembles_ligand_RMSE.csv", index_col=0)
df_ens_RMSE = pd.read_csv(data_folder+"casf2016_multi-model_ensembles_RMSE.csv", index_col=0)

In [4]:
#df_crystal_RMSE = df_crystal_RMSE.reindex(df_crystal.index)
#df_docked_RMSE = df_docked_RMSE.reindex(df_docked.index)
#df_lig_RMSE = df_lig_RMSE.reindex(df_lig.index)
#df_ens_RMSE = df_ens_RMSE.reindex(df_ens.index)
print("done")

done


In [5]:
plot_PC_heatmap(df_docked[["Sequence", "No Sequence"]], "CASF-2016 Docked Poses", "docked")
plot_RMSE_heatmap(df_docked_RMSE[["Sequence", "No Sequence"]], "CASF-2016 Docked Poses", "docked")

In [6]:
plot_PC_heatmap(df_crystal[["Sequence", "No Sequence"]], "CASF-2016 Crystal Poses", "crystal")
plot_RMSE_heatmap(df_crystal_RMSE[["Sequence", "No Sequence"]], "CASF-2016 Crystal Poses", "crystal")

In [7]:
plot_PC_heatmap(df_lig[["Sequence", "No Sequence"]], "CASF-2016 Ligand-based", "ligand_based")
plot_RMSE_heatmap(df_lig_RMSE[["Sequence", "No Sequence"]], "CASF-2016 Ligand-based", "ligand_based")

In [8]:
#rename to make shorter
new_index = ['Crystal All PLIGs', 'Docked All PLIGs','Crystal GATNet(PLIG)+MLPNet(ECIF)',
               'Docked GATNet(PLIG)+MLPNet(ECIF)',
               'Crystal GATNet (PLIG)+MLPNet (ECFP512)',
               'Docked GATNet (PLIG)+MLPNet (ECFP512)',
               'Crystal MLPNet (ECIF)+MLPNet (ECFP512)',
               'Docked MLPNet (ECIF)+MLPNet (ECFP512)']

df_ens["New_name"] = new_index
df_ens_RMSE["New_name"] = new_index

df_ens.set_index("New_name", inplace=True)
df_ens_RMSE.set_index("New_name", inplace=True)

In [9]:
#remove superfluous entries
relevant_entries = ["Crystal All PLIGs", "Docked All PLIGs", "Crystal GATNet(PLIG)+MLPNet(ECIF)", 
                    "Docked GATNet(PLIG)+MLPNet(ECIF)", "Crystal GATNet (PLIG)+MLPNet (ECFP512)",
                   "Docked GATNet (PLIG)+MLPNet (ECFP512)", "Crystal MLPNet (ECIF)+MLPNet (ECFP512)",
                   "Docked MLPNet (ECIF)+MLPNet (ECFP512)"]
df_ens_new = df_ens[df_ens.index.isin(relevant_entries)]


df_ens_RMSE_new = df_ens_RMSE[df_ens_RMSE.index.isin(relevant_entries)]

In [10]:
plot_PC_heatmap(df_ens_new[["PC"]], "CASF-2016 Multi-model ensembles", "multi-model_ensembles")
plot_RMSE_heatmap(df_ens_RMSE_new[["RMSE"]], "CASF-2016 Multi-model ensembles", "multi-model_ensembles")