<a href="https://colab.research.google.com/github/JohannesKarwou/notebooks/blob/main/freeSolv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
# If this notebook is opend within the zip folder, please adjust the paths accoring to the two csv files provided in the zip folder (summary_dG.csv and summary_ddG.csv)
df_ddg = pd.read_csv("https://raw.githubusercontent.com/JohannesKarwou/notebooks/main/data/summary_ddG.csv")
df_dg = pd.read_csv("https://raw.githubusercontent.com/JohannesKarwou/notebooks/main/data/summary_dG.csv")
df_hmr = pd.read_csv("https://raw.githubusercontent.com/JohannesKarwou/notebooks/main/data/summary_hmr_no_hmr.csv")

In [None]:
def bootstrap_function(x_values, y_values):
  # bootstrap metric
  def bootstrap_metric(fct, x_values, y_values):
      assert callable(fct) == True
      bootstrapped_metric = []
      # bootstrap metric to generate test distribution
      for _ in range(1000):
          indices = np.random.choice(range(0, len(x_values)), size=len(x_values), replace=True)
          x_selection = np.take(x_values, indices)
          y_selection = np.take(y_values, indices)
          r = fct(x_selection, y_selection)
          bootstrapped_metric.append(r)    

      # define 90% CI
      alpha = 10.0
      lower_p = alpha / 2.0
      # get value at or near percentile (take a look at the definition of percentile if 
      # you have less than 100 values to make sure you understand what is happening)
      lower = np.percentile(bootstrapped_metric, lower_p)
      upper_p = (100 - alpha) + (alpha / 2.0)
      upper = np.percentile(bootstrapped_metric, upper_p)
      # calculate true mean
      mean = fct(x_values, y_values)

      return mean, lower, upper

  # bootstrap MAE
  mean, lower, upper = bootstrap_metric(mean_absolute_error, x_values, y_values)
  print(f'MAE:  {round(mean, 2):.2f} [{round(lower,2):.2f}, {round(upper,2):.2f}]')

  # bootstrap RMSE
  def calc_rmse(x_values, y_values):
      from sklearn.metrics import mean_squared_error
      return np.sqrt(mean_squared_error(x_values, y_values))
  mean, lower, upper = bootstrap_metric(calc_rmse, x_values, y_values)
  print(f'RMSE:  {round(mean, 2):.2f} [{round(lower,2):.2f}, {round(upper,2):.2f}]')
  plt.show()



In [None]:
def bootstrap_function_spearman(x_values, y_values):
  # bootstrap metric
  def bootstrap_metric(x_values, y_values):
      bootstrapped_metric = []
      # bootstrap metric to generate test distribution
      for _ in range(1000):
          indices = np.random.choice(range(0, len(x_values)), size=len(x_values), replace=True)
          x_selection = np.take(x_values, indices)
          y_selection = np.take(y_values, indices)
          r = scipy.stats.spearmanr(x_selection, y_selection)[0]
          bootstrapped_metric.append(r)    

      # define 90% CI
      alpha = 10.0
      lower_p = alpha / 2.0
      # get value at or near percentile (take a look at the definition of percentile if 
      # you have less than 100 values to make sure you understand what is happening)
      lower = np.percentile(bootstrapped_metric, lower_p)
      upper_p = (100 - alpha) + (alpha / 2.0)
      upper = np.percentile(bootstrapped_metric, upper_p)
      # calculate true mean
      spearman = scipy.stats.spearmanr(x_values, y_values)[0]

      return spearman, lower, upper

  # bootstrap MAE
  spear, lower, upper = bootstrap_metric(x_values, y_values)
  print(f"Spearman's correlation:  {round(spear, 2):.2f} [{round(lower,2):.2f}, {round(upper,2):.2f}]")

def bootstrap_function_pearson(x_values, y_values):
  # bootstrap metric
  def bootstrap_metric(x_values, y_values):
      bootstrapped_metric = []
      # bootstrap metric to generate test distribution
      for _ in range(1000):
          indices = np.random.choice(range(0, len(x_values)), size=len(x_values), replace=True)
          x_selection = np.take(x_values, indices)
          y_selection = np.take(y_values, indices)
          r = scipy.stats.pearsonr(x_selection, y_selection)[0]
          bootstrapped_metric.append(r)    

      # define 90% CI
      alpha = 10.0
      lower_p = alpha / 2.0
      # get value at or near percentile (take a look at the definition of percentile if 
      # you have less than 100 values to make sure you understand what is happening)
      lower = np.percentile(bootstrapped_metric, lower_p)
      upper_p = (100 - alpha) + (alpha / 2.0)
      upper = np.percentile(bootstrapped_metric, upper_p)
      # calculate true mean
      pearson = scipy.stats.pearsonr(x_values, y_values)[0]

      return pearson, lower, upper

  # bootstrap MAE
  pearson, lower, upper = bootstrap_metric(x_values, y_values)
  print(f"Pearson correlation:  {round(pearson, 2):.2f} [{round(lower,2):.2f}, {round(upper,2):.2f}]")


In [None]:
def calc_scipy(x,y):
  pearson = scipy.stats.pearsonr(x_values,y_values)
  spearman = scipy.stats.spearmanr(x_values,y_values)
  kendall = scipy.stats.kendalltau(x_values,y_values)
  print(f' Pearson correlation {round(pearson[0],2)}')
  print(f' Spearmans {round(spearman[0],2)}')
  print(f' Kendall {round(kendall[0],2)}')

In [None]:
# Taken from here:
# https://stackoverflow.com/questions/33176049/how-do-you-compute-the-confidence-interval-for-pearsons-r-in-python

def pearsonr_ci(x, y, ci=90, n_boots=1000):
    x = np.asarray(x)
    y = np.asarray(y)
    
   # (n_boots, n_observations) paired arrays
    rand_ixs = np.random.randint(0, x.shape[0], size=(n_boots, x.shape[0]))
    x_boots = x[rand_ixs]
    y_boots = y[rand_ixs]
    
    # differences from mean
    x_mdiffs = x_boots - x_boots.mean(axis=1)[:, None]
    y_mdiffs = y_boots - y_boots.mean(axis=1)[:, None]
    
    # sums of squares
    x_ss = np.einsum('ij, ij -> i', x_mdiffs, x_mdiffs)
    y_ss = np.einsum('ij, ij -> i', y_mdiffs, y_mdiffs)
    
    # pearson correlations
    r_boots = np.einsum('ij, ij -> i', x_mdiffs, y_mdiffs) / np.sqrt(x_ss * y_ss)
    pearson = scipy.stats.pearsonr(x,y)
    # upper and lower bounds for confidence interval
    ci_low = np.percentile(r_boots, (100 - ci) / 2)
    ci_high = np.percentile(r_boots, (ci + 100) / 2)
    return round(pearson[0],2), round(ci_low,2), round(ci_high, 2)

## In the following cells, the calculated **dG** values are processed. The first block calculates all values (MAE, RMSE, Pearson and Spearman correlation) for the results calculated by`TRANSFORMATO` for five datasets (JNK1, 2RA0, GAL3, CDK2, TYK2). In the next block values are calculated for pmx, then for Schroedinger FEP+ and for AMBER TI. Note, not all methods from literature calculate all systems reported for `TRANSFORMATO`.:

In [None]:
### Here the calculated dG values are processed! ###
############ FOR TRANSFORMATO RESULTS ##############

print('#### Results for TRANSFORMATO #####')

##### summary of all dG values of all systems ######

x_values = np.asarray_chkfinite(df_dg["literature"][0:67])
y_values = np.asarray_chkfinite(df_dg['TF'][0:67])
print(f'for all dG values')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)
print(pearsonr_ci(x_values,y_values))

######## Values for the indiviual systems #########
## Galectin ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][21:29])
y_values = np.asarray_chkfinite(df_dg['TF'][21:29])
print(f'dG values for Galectin')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)
print(pearsonr_ci(x_values,y_values))

## CDK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][39:52])
y_values = np.asarray_chkfinite(df_dg['TF'][39:52])
print(f'dG values for CDK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)
print(pearsonr_ci(x_values,y_values))

## 2RA0 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][29:39])
y_values = np.asarray_chkfinite(df_dg['TF'][29:39])
print(f'dG values for 2RA0')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)
print(pearsonr_ci(x_values,y_values))

## TYK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][52:67])
y_values = np.asarray_chkfinite(df_dg['TF'][52:67])
print(f'dG values for TYK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)
print(pearsonr_ci(x_values,y_values))

## JNK1 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][0:21])
y_values = np.asarray_chkfinite(df_dg['TF'][0:21])
print(f'dG values for JNK1')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)
print(pearsonr_ci(x_values,y_values))

#### Results for TRANSFORMATO #####
for all dG values
MAE:  0.86 [0.70, 1.02]
RMSE:  1.17 [0.98, 1.36]
 Pearson correlation 0.73
 Spearmans 0.7
 Kendall 0.52
Pearson correlation:  0.73 [0.63, 0.82]
Spearman's correlation:  0.70 [0.55, 0.80]
(0.73, 0.62, 0.82)
#################
dG values for Galectin
MAE:  0.68 [0.39, 1.04]
RMSE:  0.90 [0.48, 1.23]
 Pearson correlation 0.6
 Spearmans 0.54
 Kendall 0.4
Pearson correlation:  0.60 [-0.06, 0.93]
Spearman's correlation:  0.54 [-0.19, 0.94]
(0.6, -0.07, 0.92)
#################
dG values for CDK2
MAE:  0.80 [0.44, 1.18]
RMSE:  1.12 [0.68, 1.48]
 Pearson correlation 0.61
 Spearmans 0.58
 Kendall 0.45
Pearson correlation:  0.61 [0.24, 0.86]
Spearman's correlation:  0.58 [-0.02, 0.91]
(0.61, 0.25, 0.86)
#################
dG values for 2RA0
MAE:  0.92 [0.67, 1.17]
RMSE:  1.04 [0.80, 1.26]
 Pearson correlation 0.76
 Spearmans 0.65
 Kendall 0.51
Pearson correlation:  0.76 [0.28, 0.92]
Spearman's correlation:  0.65 [0.07, 0.96]
(0.76, 0.31, 0.91)
###

In [None]:
  ############ FOR PMX RESULTS ##############

print('#### Results for PMX #####')

######## Values for the indiviual systems #########
## Galectin ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][21:29])
y_values = np.asarray_chkfinite(df_dg['pmx'][21:29])
print(f'dG values for Galectin')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## CDK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][39:52])
y_values = np.asarray_chkfinite(df_dg['pmx'][39:52])
print(f'dG values for CDK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## TYK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][52:67])
y_values = np.asarray_chkfinite(df_dg['pmx'][52:67])
print(f'dG values for TYK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## JNK1 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][0:21])
y_values = np.asarray_chkfinite(df_dg['pmx'][0:21])
print(f'dG values for JNK1')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

#### Results for PMX #####
#################
dG values for Galectin
MAE:  0.43 [0.27, 0.58]
RMSE:  0.50 [0.36, 0.61]
 Pearson correlation 0.9
 Spearmans 0.78
 Kendall 0.62
Pearson correlation:  0.90 [0.61, 0.97]
Spearman's correlation:  0.78 [0.30, 1.00]
#################
dG values for CDK2
MAE:  0.89 [0.57, 1.22]
RMSE:  1.14 [0.80, 1.42]
 Pearson correlation 0.41
 Spearmans 0.63
 Kendall 0.46
Pearson correlation:  0.41 [0.08, 0.86]
Spearman's correlation:  0.63 [0.16, 0.89]
#################
dG values for TYK2
MAE:  1.61 [1.23, 2.05]
RMSE:  1.87 [1.46, 2.25]
 Pearson correlation 0.54
 Spearmans 0.46
 Kendall 0.3
Pearson correlation:  0.54 [0.12, 0.81]
Spearman's correlation:  0.46 [0.00, 0.77]
#################
dG values for JNK1
MAE:  0.57 [0.37, 0.78]
RMSE:  0.81 [0.52, 1.05]
 Pearson correlation 0.66
 Spearmans 0.77
 Kendall 0.56
Pearson correlation:  0.66 [0.48, 0.83]
Spearman's correlation:  0.77 [0.58, 0.87]


In [None]:
############ FOR FEP+ RESULTS ##############

print('#### Results for Schroedinger FEP+ #####')

######## Values for the indiviual systems #########
## CDK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][39:52])
y_values = np.asarray_chkfinite(df_dg['schroedinger'][39:52])
print(f'dG values for CDK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## TYK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][52:67])
y_values = np.asarray_chkfinite(df_dg['schroedinger'][52:67])
print(f'dG values for TYK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## JNK1 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][0:21])
y_values = np.asarray_chkfinite(df_dg['schroedinger'][0:21])
print(f'dG values for JNK1')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

#### Results for Schroedinger FEP+ #####
#################
dG values for CDK2
MAE:  0.82 [0.60, 1.05]
RMSE:  0.95 [0.74, 1.14]
 Pearson correlation 0.52
 Spearmans 0.58
 Kendall 0.44
Pearson correlation:  0.52 [0.16, 0.88]
Spearman's correlation:  0.58 [0.10, 0.87]
#################
dG values for TYK2
MAE:  0.46 [0.31, 0.61]
RMSE:  0.58 [0.38, 0.77]
 Pearson correlation 0.88
 Spearmans 0.85
 Kendall 0.68
Pearson correlation:  0.88 [0.76, 0.96]
Spearman's correlation:  0.85 [0.62, 0.95]
#################
dG values for JNK1
MAE:  1.06 [0.89, 1.21]
RMSE:  1.14 [0.99, 1.30]
 Pearson correlation 0.85
 Spearmans 0.9
 Kendall 0.76
Pearson correlation:  0.85 [0.72, 0.94]
Spearman's correlation:  0.90 [0.76, 0.97]


In [None]:
############ FOR AMBER TI RESULTS ##############

print('#### Results for AMBER TI #####')

######## Values for the indiviual systems #########


## CDK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][39:52])
y_values = np.asarray_chkfinite(df_dg['AMBER TI'][39:52])
print(f'dG values for CDK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## 2RA0 ###
print('#################')
x_values = np.asarray_chkfinite(df_dg["literature"][29:39])
y_values = np.asarray_chkfinite(df_dg['AMBER TI'][29:39])
print(f'dG values for 2RA0')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)



#### Results for AMBER TI #####
#################
dG values for CDK2
MAE:  0.72 [0.52, 0.92]
RMSE:  0.84 [0.62, 1.03]
 Pearson correlation 0.74
 Spearmans 0.79
 Kendall 0.59
Pearson correlation:  0.74 [0.54, 0.88]
Spearman's correlation:  0.79 [0.44, 0.93]
#################
dG values for 2RA0
MAE:  0.67 [0.35, 1.04]
RMSE:  0.96 [0.42, 1.41]
 Pearson correlation 0.83
 Spearmans 0.81
 Kendall 0.6
Pearson correlation:  0.83 [0.38, 0.97]
Spearman's correlation:  0.81 [0.43, 0.95]


# As for the dG results shown previously, the same is done for the **ddG** results of the calculated mutations. First, results are reported for all systems calculated by `TRANSFORMATO`. The same is done for each system where data is available, first for pmx, followed by the Schroedinger FEP+ results and the AMBER TI results.

In [None]:
### Here the calculated ddG values are processed! ###
############ FOR TRANSFORMATO RESULTS ##############

print('#### Results for TRANSFORMATO #####')

##### summary of all ddG values of all systems ######

x_values = np.asarray_chkfinite(df_ddg["literature"][0:75])
y_values = np.asarray_chkfinite(df_ddg['TF'][0:75])
print(f'for all ddG values')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

######## Values for the indiviual systems #########
## Galectin ###
print('#################')
x_values = np.asarray_chkfinite(df_ddg["literature"][0:7])
y_values = np.asarray_chkfinite(df_ddg['TF'][0:7])
print(f'dG values for Galectin')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## CDK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_ddg["literature"][7:20])
y_values = np.asarray_chkfinite(df_ddg['TF'][7:20])
print(f'dG values for CDK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## 2RA0 ###
print('#################')
x_values = np.asarray_chkfinite(df_ddg["literature"][20:31])
y_values = np.asarray_chkfinite(df_ddg['TF'][20:31])
print(f'dG values for 2RA0')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## TYK2 ###
print('#################')
x_values = np.asarray_chkfinite(df_ddg["literature"][31:46])
y_values = np.asarray_chkfinite(df_ddg['TF'][31:46])
print(f'dG values for TYK2')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

## JNK1 ###
print('#################')
x_values = np.asarray_chkfinite(df_ddg["literature"][46:75])
y_values = np.asarray_chkfinite(df_ddg['TF'][46:75])
print(f'dG values for JNK1')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

#### Results for TRANSFORMATO #####
for all ddG values
MAE:  0.87 [0.73, 1.03]
RMSE:  1.18 [0.98, 1.36]
 Pearson correlation 0.57
 Spearmans 0.48
 Kendall 0.36
Pearson correlation:  0.57 [0.36, 0.71]
Spearman's correlation:  0.48 [0.29, 0.64]
#################
dG values for Galectin
MAE:  0.50 [0.31, 0.69]
RMSE:  0.58 [0.40, 0.74]
 Pearson correlation 0.76
 Spearmans 0.57
 Kendall 0.43
Pearson correlation:  0.76 [0.08, 0.94]
Spearman's correlation:  0.57 [-0.17, 0.96]
#################
dG values for CDK2
MAE:  0.80 [0.48, 1.17]
RMSE:  1.12 [0.72, 1.45]
 Pearson correlation 0.63
 Spearmans 0.59
 Kendall 0.46
Pearson correlation:  0.63 [0.28, 0.88]
Spearman's correlation:  0.59 [0.08, 0.92]
#################
dG values for 2RA0
MAE:  1.01 [0.70, 1.40]
RMSE:  1.23 [0.77, 1.69]
 Pearson correlation 0.83
 Spearmans 0.73
 Kendall 0.62
Pearson correlation:  0.83 [0.40, 0.95]
Spearman's correlation:  0.73 [0.22, 0.99]
#################
dG values for TYK2
MAE:  1.37 [0.92, 1.85]
RMSE:  1.74 [1.2

In [None]:
### Here the calculated ddG values are processed! ###
############ FOR PMX RESULTS ##############

print('#### Results for PMX #####')

######## Values for the indiviual systems #########

## JNK1 ###
print('#################')
x_values = np.asarray_chkfinite(df_ddg["literature"][46:75])
y_values = np.asarray_chkfinite(df_ddg['pmx'][46:75])
print(f'ddG values for JNK1')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

#### Results for PMX #####
#################
ddG values for JNK1
MAE:  0.68 [0.48, 0.88]
RMSE:  0.95 [0.67, 1.21]
 Pearson correlation 0.51
 Spearmans 0.55
 Kendall 0.4
Pearson correlation:  0.51 [0.25, 0.71]
Spearman's correlation:  0.55 [0.25, 0.77]


In [None]:
### Here the calculated ddG values are processed! ###
############ FOR Schroedinger/FEP+ RESULTS ##############

print('#### Results for Schroedinger/FEP+ #####')

######## Values for the indiviual systems #########
## JNK1 ###
print('#################')
x_values = np.asarray_chkfinite(df_ddg["literature"][46:75])
y_values = np.asarray_chkfinite(df_ddg['schroedinger'][46:75])
print(f'ddG values for JNK1')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

#### Results for Schroedinger/FEP+ #####
#################
ddG values for JNK1
MAE:  0.78 [0.59, 0.98]
RMSE:  1.02 [0.75, 1.27]
 Pearson correlation 0.56
 Spearmans 0.58
 Kendall 0.44
Pearson correlation:  0.56 [0.28, 0.78]
Spearman's correlation:  0.58 [0.28, 0.79]


# Results for the HMR no-HMR comparsion

In [None]:
df_hmr

Unnamed: 0,ligand,average,std,TF,std.1,Literature,Charmm_OpenMM,std.2
0,l4 -> l3,-0.57,0.18,-0.61,0.09,-0.47,-0.55,0.43
1,l6 -> l1,-1.83,0.65,-1.91,0.63,-2.66,-1.56,0.17
2,jmc_28 -> ejm_31,-1.64,0.27,-1.73,0.32,1.44,,
3,ejm_46 -> ejm_31,-0.51,0.07,-0.45,0.29,1.77,,
4,l51a -> l51bt,-0.61,0.3,-0.96,0.24,-1.45,-1.04,0.17
5,l51c -> l51d,3.87,0.56,4.03,0.66,3.36,3.4,0.3
6,1h1s -> 1h1q,1.32,0.43,0.6,0.6,3.07,1.64,0.59
7,22 -> 1h1q,-0.44,0.64,-0.29,0.55,-0.32,,
8,29 -> 1h1q,0.23,0.54,-0.22,0.56,1.7,0.8,0.74


In [None]:
## HMR vs noHMR ###
print('#################')
x_values = np.asarray_chkfinite(df_hmr["average"][0:9])
y_values = np.asarray_chkfinite(df_hmr['TF'][0:9])
print(f'ddG Values for HMR vs noHMR')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

#################
ddG Values for HMR vs noHMR
MAE:  0.23 [0.12, 0.36]
RMSE:  0.32 [0.17, 0.44]
 Pearson correlation 0.99
 Spearmans 1.0
 Kendall 1.0
Pearson correlation:  0.99 [0.94, 1.00]
Spearman's correlation:  1.00 [1.00, 1.00]


In [None]:
## HMR vs noHMR ###
print('#################')
np_x_values = np.asarray(df_hmr['Charmm_OpenMM'][0:9])
x_values = np_x_values[np.ix_([0,1,4,5,6,8])]
np_y_values = np.asarray(df_hmr['average'][0:9])
y_values = np_y_values[np.ix_([0,1,4,5,6,8])]
print(f'ddG Values for openMM vs CHARMM/OpenMM')
bootstrap_function(x_values, y_values)
calc_scipy(x_values,y_values)
bootstrap_function_pearson(x_values,y_values)
bootstrap_function_spearman(x_values,y_values)

#################
ddG Values for openMM vs CHARMM/OpenMM
MAE:  0.35 [0.21, 0.46]
RMSE:  0.39 [0.28, 0.47]
 Pearson correlation 0.98
 Spearmans 1.0
 Kendall 1.0
Pearson correlation:  0.98 [nan, nan]




Spearman's correlation:  1.00 [1.00, 1.00]
