# Benchmarking ACE2 RBD point mutations

## Overview

This Jupyter notebook contains ......

In [1]:
import os
import pathlib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import dataframe_image as dfi
from matplotlib.ticker import FuncFormatter

In [2]:
#define root and data directory path 
rootdir = pathlib.Path('.').resolve(strict=True)
datadir = rootdir.parents [1] / 'Output_files'

## ∆∆G calculation of predictors

∆∆G enerjileri ne işe yarar. nedir. MutaBind2 ve SSIPe ∆∆G enerjiileri çıktı olarak verdiği için onların hesaplamasını yapmadık. HADDOCK için Mutant bağlanma enerjisinden WT bağlanma enerjisini çıkararak ∆∆G bulduk. FoldX her mutasyon için yeni bir WT değeri üretir. Biz ∆∆G hesaplamak için her mutantın mutant bağlanma değerinden WT değerini çıkararak ∆∆G hesapladık. FoldXwater için de aynı yolu izledik. 

### HADDOCK
∆∆G =  [Mutant HADDOCK score] - [Wild type HADDOCK score]

In [3]:
#import HADDOCK score dataset
HADDOCK_score = pd.read_csv(datadir / 'HADDOCK_scores.csv', delimiter=',')

# define WT HADDOCK score
wt= HADDOCK_score[HADDOCK_score['mutation_type'].str.contains("WT")]
wt_score = wt['haddock-score'].values.tolist()

# to remove WT row from dataset; first define mutation_type column as index, then drop the row via index. 
# further, reset indexing and organize column order.
HADDOCK_score = HADDOCK_score.set_index("mutation_type")
HADDOCK_score = HADDOCK_score.drop(labels=['WT'], axis=0)
HADDOCK_score = HADDOCK_score.reset_index()
HADDOCK_score = HADDOCK_score[['#case_id', 'protein', 'mutation_type', 'predictor', 'haddock-score']]

# calculate ddg
HADDOCK_score['ddg'] = HADDOCK_score['haddock-score'] - wt_score

# remove the haddock-score column
HADDOCK_score = HADDOCK_score.drop(labels=['haddock-score'], axis=1)

KeyError: 'mutation_type'

### FoldX

FoldX produces wt-score for each mutation. Therefore, we will use wt-score of corresponding mutation to calculate ∆∆G.

∆∆G=∆G_MT-∆G_WT

In [None]:
#import FoldX score dataset
FoldX_score = pd.read_csv(datadir / 'FoldX_scores.csv', delimiter=',')

# calculate ddg
FoldX_score['ddg'] = FoldX_score['foldx-score-mut'] - FoldX_score['foldx-score-wt']

# remove foldx-score-mut and foldx-score-wt column
FoldX_score = FoldX_score.drop(labels=['foldx-score-mut', 'foldx-score-wt'], axis=1)


### FoldXwater

FoldX serves as an option to contribute to crystallographic water bridges. We builded our mutations by using the Crystalwater option since the FoldX team suggested using this option in the last COVID19 related paper [ref].  

In [None]:
#import FoldXwater score dataset
FoldXwater_score = pd.read_csv(datadir / 'FoldXwater_scores.csv', delimiter=',')

# calculate ddg
FoldXwater_score['ddg'] = FoldXwater_score['foldx-score-mut'] - FoldXwater_score['foldx-score-wt']

# remove foldx-score-mut and foldx-score-wt column
FoldXwater_score = FoldXwater_score.drop(labels=['foldx-score-mut', 'foldx-score-wt'], axis=1)


### EvoEF1 

...description


In [None]:
#import EvoEF1 score dataset
EvoEF1_score = pd.read_csv(datadir / 'EvoEF1_scores.csv', delimiter=',')

# define WT EvoEF1 score
wt= EvoEF1_score[EvoEF1_score['mutation_type'].str.contains("WT")]
wt_score = wt['evoef1-score'].values.tolist()

# to remove WT row from dataset; first define mutation_type column as index, then drop the row via index. 
# further, reset indexing and organize column order.
EvoEF1_score = EvoEF1_score.set_index("mutation_type")
EvoEF1_score = EvoEF1_score.drop(labels=['WT'], axis=0)
EvoEF1_score = EvoEF1_score.reset_index()
EvoEF1_score = EvoEF1_score[['#case_id', 'protein', 'mutation_type', 'predictor', 'evoef1-score']]

# calculate ddg
EvoEF1_score['ddg'] = EvoEF1_score['evoef1-score'] - wt_score

# remove evoef1-score column
EvoEF1_score = EvoEF1_score.drop(labels=['evoef1-score'], axis=1)


### MutaBidn2 and SSIPe

There is no need to calculate ∆∆G

In [None]:
#import MutaBind2 and SSIPe score dataset
MutaBind2_score = pd.read_csv(datadir / 'MutaBind2_scores.csv', delimiter=',')
SSIPe_score = pd.read_csv(datadir / 'SSIPe_scores.csv', delimiter=',')


## Metric Analysis

ne kadar metrik var biz ne yapmak istedik


### Volume change

Bu metrik u anlama gelir

In [None]:
def volume_change(df):
    """
    This function concatenates vdW volume change of mutations as a column to corresponding dataset. 
    Note that there should be '#case_id' column: [WT residue][position][mutated residue], A111C
    """
    volume = dict(A=0.05702, R=0.58946, N=0.22972, D=0.21051, C=0.14907, Q=0.34861, E=0.32837, G=0.00279, H=0.37694, I=0.37671,
              L=0.37876, K=0.45363, M=0.38872, F=0.55298, P=0.2279, S=0.09204, T=0.19341, W=0.79351, Y=0.6115, V=0.25674)
    
    list_caseid = df[['#case_id']].values.tolist()

    volume_change = []
    for i in list_caseid:
        mutant_resi= i[0][-1]
        wt_resi= i[0][0]
        #volume dictionary contains vdW volume of amino acids
        vdW_mutant_resi =volume[mutant_resi]
        vdW_wt_resi=volume[wt_resi]
        #2 place after the decimal
        delta_vdW=round(vdW_mutant_resi-vdW_wt_resi,2)
        volume_change.append(delta_vdW)

    volume_change_column=pd.DataFrame(volume_change, columns=['volume_change'])
    df = pd.concat([df,volume_change_column], axis=1)
    return(df)

### Hydrophobicity change

bu metrik şuanlama gelir


In [None]:
def hydrophobicity_change(df):
    """
    This function concatenates hydrophobicity change of mutations as a column to corresponding dataset. 
    Note that there should be '#case_id' column: [WT residue][position][mutated residue], A111C
    """
    hydrophobicity = dict(A=0.62, R=-2.53, N=-0.78, D=-0.90, C=0.29, Q=-0.85, E=-0.74, G=0.48, H=-0.40, I=1.38,
                L=1.06, K=-1.50, M=0.64, F=1.19, P=0.12, S=-0.18, T=-0.05, W=0.81, Y=0.26, V=1.08) 

    list_caseid = df[['#case_id']].values.tolist()

    hydrophobicity_change = []
    for i in list_caseid:
        mutant_resi= i[0][-1]
        wt_resi= i[0][0]
        #hydrophobicity dictionary contains hydrophobicity value of amino acids
        hyd_mutant_resi =hydrophobicity[mutant_resi]
        hyd_wt_resi=hydrophobicity[wt_resi]
        #2 place after the decimal
        delta_hyd=round(hyd_mutant_resi-hyd_wt_resi,2)
        hydrophobicity_change.append(delta_hyd)
   
    hydrophobicity_change_column=pd.DataFrame(hydrophobicity_change, columns=['hydrophobicity_change'])
    df = pd.concat([df,hydrophobicity_change_column], axis=1)
    return(df)

### Flexibility change

Bu metrik şu anlama gelir 

In [None]:
def flexibility_change(df):
    """
    This function concatenates flexibility change of mutations as a column to corresponding dataset. 
    Note that there should be '#case_id' column: [WT residue][position][mutated residue], A111C
    """
    flexibility = dict(A=1, R=81, N=3, D=3, C=3, Q=9, E=9, G=1, H=3, I=9,
                L=9, K=81, M=27, F=3, P=2, S=3, T=3, W=3, Y=3, V=3) 

    list_caseid = df[['#case_id']].values.tolist()

    flexibility_change = []
    for i in list_caseid:
        mutant_resi= i[0][-1]
        wt_resi= i[0][0]
        #flexibility dictionary contains hydrophobicity value of amino acids
        flex_mutant_resi =flexibility[mutant_resi]
        flex_wt_resi=flexibility[wt_resi]
        #2 place after the decimal
        delta_flex=round(flex_mutant_resi-flex_wt_resi,2)
        flexibility_change.append(delta_flex)
   
    flexibility_change_column=pd.DataFrame(flexibility_change, columns=['flexibility_change'])
    df = pd.concat([df,flexibility_change_column], axis=1)
    return(df)


### Physicochemical Property change

Bu metrik şu anlama gelir 

In [None]:
def physicochemical_property_change(df):
    physicochemical_property = dict(A='non-polar', R='charge', N='polar', D='charge', C='non-polar', Q='polar',
                                    E='charge', G='non-polar', H='charge', I='non-polar', L='non-polar', K='charge',
                                    M='non-polar', F='non-polar', P='non-polar', S='polar', T='polar', W='non-polar',
                                    Y='polar', V='non-polar') 

    list_caseid = df[['#case_id']].values.tolist()

    wt = []
    mutant = []
    for i in list_caseid:
        mutant_resi= i[0][-1]
        wt_resi= i[0][0]
        #physicochemical_property dictionary contains physicochemical property of amino acids
        mutant_property =physicochemical_property[mutant_resi]
        wt_property=physicochemical_property[wt_resi] 

        mutant.append(mutant_property)
        wt.append(wt_property)

    mutant_dataframe=pd.DataFrame(mutant, columns=['mutant_property'])
    wt_dataframe=pd.DataFrame(wt, columns=['wt_property'])

    df = pd.concat([df, wt_dataframe, mutant_dataframe], axis=1)

    non_polar= df[df['wt_property'].str.contains("non-polar")]
    polar = df[df['wt_property'].str.contains("^polar")]
    charge = df[df['wt_property'].str.contains("charge")]

    #non-polar            
    condition_non_polar = [
                (non_polar.loc[:, ['mutant_property']] == 'non-polar'),
                (non_polar.loc[:, ['mutant_property']] == 'polar'),
                (non_polar.loc[:, ['mutant_property']] == 'charge')]

    value_non_polar = ['no_change', 'polarity_gain', 'charge_gain']

    non_polar.loc[:,['physicochem_property_change']] = np.select(condition_non_polar, value_non_polar)

    #polar

    condition_polar = [
                (polar.loc[:, ['mutant_property']] == 'polar'),
                (polar.loc[:, ['mutant_property']] == 'non-polar'),
                (polar.loc[:, ['mutant_property']] == 'charge')]

    value_polar = ['no_change', 'polarity_loss', 'charge_gain']

    polar.loc[:,['physicochem_property_change']] = np.select(condition_polar, value_polar)

    #charge

    condition_charge = [
                (charge.loc[:, ['mutant_property']] == 'polar'),
                (charge.loc[:, ['mutant_property']] == 'non-polar'),
                (charge.loc[:, ['mutant_property']] == 'charge')]

    value_charge = ['charge_loss', 'charge_loss', 'no_change']

    charge.loc[:,['physicochem_property_change']] = np.select(condition_charge, value_charge)

    df = pd.concat([non_polar, polar, charge])
    df.drop(['mutant_property', 'wt_property'], inplace=True, axis=1)
    return(df)


### Success tag    

Bağlanmayı arttıran ve azaltan mutasyonlar deneysel veriye uyumluluğuna göre succcess ve falure olarak sınıflandırıldı.

In [None]:
pd.set_option('mode.chained_assignment', None)

def success_tag(df):
    """
    This function is used to determine prediction status of corresponding mutation in the dataset.
    """
    enriched = df[df['mutation_type'].str.contains("E")]
    depleted = df[df['mutation_type'].str.contains("MD") | df['mutation_type'].str.contains("RD")]

    condition_enr = [
            (enriched.loc[:, ['ddg']] < 0),
            (enriched.loc[:, ['ddg']] >= 0)]

    value_enr = ['success', 'failure']
    
    enriched.loc[:,['success_tag']] = np.select(condition_enr, value_enr)
    
    condition_dep = [
            (depleted.loc[:, ['ddg']] <= 0),
            (depleted.loc[:, ['ddg']] > 0)]

    value_dep = ['failure', 'success']
    depleted.loc[:,['success_tag']] = np.select(condition_dep, value_dep)
    df=pd.concat([enriched, depleted])
    return (df)


## Building master dataset Exporting datasets as CSV file

In [None]:
#import experimental dataset
ACE2 = pd.read_csv(datadir / 'ACE2_Experimental_dataset.csv', delimiter=',')
ACE2.columns=['#case_id','exp_binding']
RBD = pd.read_csv(datadir / 'RBD_Experimental_dataset.csv', delimiter=',')
RBD = RBD[['#case_id', 'RBD_bind_avg']]
RBD.columns=['#case_id','exp_binding']

exp_dataset = pd.concat([ACE2,RBD])
exp_dataset = exp_dataset.set_index("#case_id")


In [None]:
#import predictors in a desired format

haddock = success_tag(HADDOCK_score)
haddock = haddock[['#case_id','protein', 'mutation_type','ddg', 'success_tag']]
haddock.columns=['#case_id','protein', 'mutation_type','haddock-ddg', 'haddock-success-tag']
haddock = haddock.set_index("#case_id")

foldx = success_tag(FoldX_score)
foldx = foldx[['#case_id','ddg', 'success_tag']]
foldx.columns=['#case_id','foldx-ddg', 'foldx-success-tag']
foldx = foldx.set_index("#case_id")

foldxwater = success_tag(FoldXwater_score)
foldxwater = foldxwater[['#case_id','ddg', 'success_tag']]
foldxwater.columns=['#case_id','foldxwater-ddg', 'foldxwater-success-tag']
foldxwater = foldxwater.set_index("#case_id")

evoef1 = success_tag(EvoEF1_score)
evoef1 = evoef1[['#case_id','ddg', 'success_tag']]
evoef1.columns=['#case_id','evoef1-ddg', 'evoef1-success-tag']
evoef1 = evoef1.set_index("#case_id")

mutabind2 = success_tag(MutaBind2_score)
mutabind2 = mutabind2[['#case_id','ddg', 'success_tag']]
mutabind2.columns=['#case_id','mutabind2-ddg', 'mutabind2-success-tag']
mutabind2 = mutabind2.set_index("#case_id")

ssipe =success_tag(SSIPe_score)
ssipe = ssipe[['#case_id','ddg', 'success_tag']]
ssipe.columns=['#case_id','ssipe-ddg', 'ssipe-success-tag']
ssipe = ssipe.set_index("#case_id")



In [None]:
#merge datasets

df = pd.merge(exp_dataset, haddock, on="#case_id")
df = pd.merge(df, foldx, on="#case_id")
df = pd.merge(df, foldxwater, on="#case_id")
df = pd.merge(df, evoef1, on="#case_id")
df = pd.merge(df, mutabind2, on="#case_id")
df = pd.merge(df, ssipe, on="#case_id")
df = df.reset_index()

#export master dataset as CSV after doing metric analysis
df = physicochemical_property_change(flexibility_change(hydrophobicity_change(volume_change(df))))

df.to_csv('ACE2_RBD_benchmarking_dataset.csv', index=False, float_format='%.2f')


## Preparing Datasets of Predictors

By usinf defined fuctions we builded datasets of predictors. These datasets contains ..... columns.

## for Volume

In [None]:
def label(df):
    mutation_tag = []
    for i in df['mutation_type']:
        if (i=="E"):
            mutation_tag.append("Enriched")
        else:
            mutation_tag.append("Depleted")
    df['mutation_tag'] = mutation_tag
    
    df["label"] = df["mutation_tag"] + "_" +df["success_tag"]
    return(df)

HADDOCK = label(success_tag(physicochemical_property_change(flexibility_change(hydrophobicity_change(volume_change(HADDOCK_score))))))
FoldX = label(success_tag(physicochemical_property_change(flexibility_change(hydrophobicity_change(volume_change(FoldX_score))))))
FoldXwater = label(success_tag(physicochemical_property_change(flexibility_change(hydrophobicity_change(volume_change(FoldXwater_score))))))
EvoEF1 = label(success_tag(physicochemical_property_change(flexibility_change(hydrophobicity_change(volume_change(EvoEF1_score))))))
MutaBind2 = label(success_tag(physicochemical_property_change(flexibility_change(hydrophobicity_change(volume_change(MutaBind2_score))))))
SSIPe = label(success_tag(physicochemical_property_change(flexibility_change(hydrophobicity_change(volume_change(SSIPe_score))))))


In [None]:

#necessery for figure 1, gray lines
Enriched_Experimental = HADDOCK[HADDOCK['mutation_tag'].str.contains('Enriched')].copy()
Depleted_Experimental = HADDOCK[HADDOCK['mutation_tag'].str.contains('Depleted')].copy()

Enriched_Experimental['predictor'] = Enriched_Experimental['predictor'].str.replace('HADDOCK', 'Experimental', regex=True)
Depleted_Experimental['predictor'] = Depleted_Experimental['predictor'].str.replace('HADDOCK', 'Experimental', regex=True)


#predictors

frames=[HADDOCK, FoldX, FoldXwater, EvoEF1, MutaBind2, SSIPe]
df = pd.concat(frames, axis=0)

Enriched_Success_dataset = df[df['label'].str.contains('Enriched_success')]
Enriched_Success_dataset = pd.concat([Enriched_Success_dataset, Enriched_Experimental]).sort_values("predictor")

Enriched_Failure_dataset = df[df['label'].str.contains('Enriched_failure')]
Enriched_Failure_dataset = pd.concat([Enriched_Failure_dataset, Enriched_Experimental]).sort_values("predictor")


Depleted_Success_dataset = df[df['label'].str.contains('Depleted_success')]
Depleted_Success_dataset = pd.concat([Depleted_Success_dataset, Depleted_Experimental]).sort_values("predictor")

Depleted_Failure_dataset = df[df['label'].str.contains('Depleted_failure')]
Depleted_Failure_dataset = pd.concat([Depleted_Failure_dataset, Depleted_Experimental]).sort_values("predictor")



## Figure 1

In [None]:
#Volume

sns.set_style('white')


fig = plt.figure(figsize=(27,19))

color=["green", "gray", "yellow","orange", "blue", "red", "purple"]

quantity='volume_change'

ax = plt.subplot(341)
g = sns.kdeplot(data=Enriched_Success_dataset, x=quantity, hue="predictor",palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.ylabel('Volume change', size =30, fontweight='bold')
plt.xlabel('')
plt.ylim(0,0.55)
plt.xlim(-1,1)
plt.title('Enriched-Success', size =30, fontweight='bold')
plt.xticks(fontsize=20)
plt.yticks(fontsize=25)
plt.text(-1, 0.6, 'a', style='normal', bbox={'facecolor': 'lightgray', 'alpha': 0.5, 'pad': 10},fontsize=30)


plt.subplot(342)
g = sns.kdeplot(data=Enriched_Failure_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.55)
plt.xlim(-1,1)
plt.title('Enriched Failure', size =30, fontweight='bold')
plt.xticks(fontsize=20)
plt.yticks(fontsize=25)
plt.text(-1, 0.6, 'b', style='normal', bbox={'facecolor': 'lightgray', 'alpha': 0.5, 'pad': 10},fontsize=30)

plt.subplot(343)
g = sns.kdeplot(data=Depleted_Success_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.55)
plt.xlim(-1,1)
plt.title('Depleted Success', size =30, fontweight='bold')
plt.xticks(fontsize=20)
plt.yticks(fontsize=25)
plt.text(-1, 0.6, 'c', style='normal', bbox={'facecolor': 'lightgray', 'alpha': 0.5, 'pad': 10},fontsize=30)

plt.subplot(344)
g = sns.kdeplot(data=Depleted_Failure_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.55)
plt.xlim(-1,1)
plt.xticks(fontsize=20)
plt.yticks(fontsize=25)
plt.title('Depleted Failure', size =30, fontweight='bold')
plt.text(-1, 0.6, 'd', style='normal', bbox={'facecolor': 'lightgray', 'alpha': 0.5, 'pad': 10},fontsize=30)


#Hydrophobicity

quantity='hydrophobicity_change'

plt.subplot(345)
g = sns.kdeplot(data=Enriched_Success_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.ylabel('Hydrophobicity change', size =30, fontweight='bold')
plt.xlabel('')
plt.ylim(0,0.12)
plt.xlim(-6,6)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)

plt.subplot(346)
g = sns.kdeplot(data=Enriched_Failure_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.12)
plt.xlim(-6,6)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)

plt.subplot(347)
g = sns.kdeplot(data=Depleted_Success_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.12)
plt.xlim(-6,6)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)


plt.subplot(348)
g = sns.kdeplot(data=Depleted_Failure_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.12)
plt.xlim(-6,6)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)



#Flexibility

quantity='flexibility_change'

plt.subplot(349)
g = sns.kdeplot(data=Enriched_Success_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.ylabel('Flexibility change', size =30, fontweight='bold')
plt.xlabel('')
plt.ylim(0,0.01)
plt.xlim(-140,140)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
plt.legend(labels=['SSIPe 28', 'MutaBind2 20', 'HADDOCK 68', 'FoldXwater 67', 'FoldX 67', 'Exp. dataset 131', 'EvoEF1 64'], title='Enriched-Success \n         N size',title_fontsize=25,fontsize=25, loc=8, bbox_to_anchor=(0.5, -1.3))


plt.subplot(3,4,10)
g = sns.kdeplot(data=Enriched_Failure_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.01)
plt.xlim(-140,140)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
plt.legend(labels=['SSIPe 103', 'MutaBind2 111', 'HADDOCK 63', 'FoldXwater 64', 'FoldX 64', 'Exp. dataset 131', 'EvoEF1 67'], title='Enriched-Failure \n         N size',title_fontsize=25,fontsize=25, loc=8, bbox_to_anchor=(0.5, -1.3))


plt.subplot(3,4,11)
g = sns.kdeplot(data=Depleted_Success_dataset, x=quantity, hue="predictor", palette=color,legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.ylabel('')
plt.xlabel('')
plt.ylim(0,0.01)
plt.xlim(-140,140)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
plt.legend(labels=['SSIPe 118', 'MutaBind2 126', 'HADDOCK 74', 'FoldXwater 98', 'FoldX 101', 'Exp. dataset 132', 'EvoEF1 87'], title='Depleted-Success \n         N size',title_fontsize=25,fontsize=25,loc=8, bbox_to_anchor=(0.5, -1.3))


plt.subplot(3,4,12)
g = sns.kdeplot(data=Depleted_Failure_dataset, x=quantity, hue="predictor", palette=color, legend=False)
x = g.lines[-2].get_xdata()
y = g.lines[-2].get_ydata()
g.fill_between(x, 0, y, color='gray', alpha=0.07)
plt.axvline(0, color = 'gray', linestyle = 'dashed', linewidth = 1)
plt.xlabel('')
plt.ylabel('')
plt.ylim(0,0.01)
plt.xlim(-140,140)
plt.tight_layout()
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
plt.legend(labels=['SSIPe 14', 'MutaBind2 6', 'HADDOCK 58', 'FoldXwater 34', 'FoldX 31', 'Exp. dataset 132','EvoEF1 45'], title='Depleted-Failure \n         N size',title_fontsize=25,fontsize=25, loc=8, bbox_to_anchor=(0.5, -1.3))

plt.savefig('figure1.pdf', bbox_inches='tight')




## ∆Success Rate Table

In [None]:
# Volume
Enriched_Success_dataset_bigger_than_0 = Enriched_Success_dataset[Enriched_Success_dataset['volume_change'] >= 0]
Depleted_Success_dataset_bigger_than_0 = Depleted_Success_dataset[Depleted_Success_dataset['volume_change'] >= 0]

enr_haddock=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="HADDOCK"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_haddock=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="HADDOCK"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_foldx=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="FoldX"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_foldx=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="FoldX"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_foldxwater=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="FoldXwater"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_foldxwater=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="FoldXwater"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_evoef1=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="EvoEF1"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_evoef1=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="EvoEF1"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_mutabind2=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="MutaBind2"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_mutabind2=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="MutaBind2"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_ssipe=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="SSIPe"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_ssipe=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="SSIPe"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)

#create a dataframe
Volume = pd.DataFrame([[enr_haddock,dep_haddock,enr_haddock-dep_haddock],
              [enr_foldx,dep_foldx,enr_foldx-dep_foldx],
              [enr_foldxwater,dep_foldxwater,enr_foldxwater-dep_foldxwater],
              [enr_evoef1,dep_evoef1,enr_evoef1-dep_evoef1],
              [enr_mutabind2,dep_mutabind2,enr_mutabind2-dep_mutabind2],
              [enr_ssipe,dep_ssipe,enr_ssipe-dep_ssipe]], columns=['Enriched success', "Depleted success","∆Success rate"])
Volume = Volume.set_index([pd.Index(['HADDOCK', 'FoldX', 'FoldXwater', 'EvoEF1','MutaBind2', 'SSIPe'])])
Volume = pd.concat([Volume],keys=['Volume'])


# Hydrophobicity
Enriched_Success_dataset_bigger_than_0 = Enriched_Success_dataset[Enriched_Success_dataset['hydrophobicity_change'] >= 0]
Depleted_Success_dataset_bigger_than_0 = Depleted_Success_dataset[Depleted_Success_dataset['hydrophobicity_change'] >= 0]

enr_haddock=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="HADDOCK"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_haddock=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="HADDOCK"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_foldx=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="FoldX"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_foldx=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="FoldX"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_foldxwater=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="FoldXwater"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_foldxwater=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="FoldXwater"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_evoef1=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="EvoEF1"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_evoef1=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="EvoEF1"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_mutabind2=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="MutaBind2"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_mutabind2=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="MutaBind2"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_ssipe=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="SSIPe"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_ssipe=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="SSIPe"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)

#create a dataframe
Hydrophobicity = pd.DataFrame([[enr_haddock,dep_haddock,enr_haddock-dep_haddock],
              [enr_foldx,dep_foldx,enr_foldx-dep_foldx],
              [enr_foldxwater,dep_foldxwater,enr_foldxwater-dep_foldxwater],
              [enr_evoef1,dep_evoef1,enr_evoef1-dep_evoef1],
              [enr_mutabind2,dep_mutabind2,enr_mutabind2-dep_mutabind2],
              [enr_ssipe,dep_ssipe,enr_ssipe-dep_ssipe]], columns=['Enriched success', "Depleted success","∆Success rate"])
Hydrophobicity = Hydrophobicity.set_index([pd.Index(['HADDOCK', 'FoldX', 'FoldXwater', 'EvoEF1','MutaBind2', 'SSIPe'])])
Hydrophobicity = pd.concat([Hydrophobicity],keys=['Hydrophobicity'])


# Flexibility
Enriched_Success_dataset_bigger_than_0 = Enriched_Success_dataset[Enriched_Success_dataset['flexibility_change'] >= 0]
Depleted_Success_dataset_bigger_than_0 = Depleted_Success_dataset[Depleted_Success_dataset['flexibility_change'] >= 0]

enr_haddock=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="HADDOCK"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_haddock=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="HADDOCK"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_foldx=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="FoldX"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_foldx=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="FoldX"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_foldxwater=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="FoldXwater"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_foldxwater=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="FoldXwater"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_evoef1=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="EvoEF1"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_evoef1=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="EvoEF1"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_mutabind2=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="MutaBind2"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_mutabind2=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="MutaBind2"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
enr_ssipe=round(len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="SSIPe"])/len(Enriched_Success_dataset_bigger_than_0[Enriched_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)
dep_ssipe=round(len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="SSIPe"])/len(Depleted_Success_dataset_bigger_than_0[Depleted_Success_dataset_bigger_than_0['predictor']=="Experimental"])*100, 0)

#create a dataframe
Flexibility = pd.DataFrame([[enr_haddock,dep_haddock,enr_haddock-dep_haddock],
              [enr_foldx,dep_foldx,enr_foldx-dep_foldx],
              [enr_foldxwater,dep_foldxwater,enr_foldxwater-dep_foldxwater],
              [enr_evoef1,dep_evoef1,enr_evoef1-dep_evoef1],
              [enr_mutabind2,dep_mutabind2,enr_mutabind2-dep_mutabind2],
              [enr_ssipe,dep_ssipe,enr_ssipe-dep_ssipe]], columns=['Enriched success', "Depleted success","∆Success rate"])
Flexibility = Flexibility.set_index([pd.Index(['HADDOCK', 'FoldX', 'FoldXwater', 'EvoEF1','MutaBind2', 'SSIPe'])])
Flexibility = pd.concat([Flexibility],keys=['Flexibility'])



# get to together to metric values

df = pd.concat([Volume,Hydrophobicity,Flexibility])
cm = sns.diverging_palette(220, 20, as_cmap=True)
x = df.style.format('{:.0f}')
x.set_table_styles([{'selector' : '','props' : [('border','1px solid black')]}])
table5 = x.background_gradient(cmap=cm, subset=["∆Success rate"], axis=None, vmin=-100, vmax=100 )
table_5 = table5.set_properties(**{'font-size': '13pt', 'font-family': 'Helvatica'})

#save the table as png
dfi.export(table_5, 'table_5_new.png')
# take a look table 5
table_5

## For physicohemical property change

In [None]:
def plot_phys_propery_change (df):
    mutation_tag = []
    for i in df['mutation_type']:
        if (i=="E"):
            mutation_tag.append("Enriched")
        else:
            mutation_tag.append("Depleted")
    df['mutation_tag'] = mutation_tag
    
    df["mutation_label"] = df["mutation_tag"] + "_" +df["success_tag"]
    df["subdataset_tag"] = df["predictor"]  + "_" + df["mutation_tag"]  + "_" + df["success_tag"]
    return(df)

haddock = plot_phys_propery_change(HADDOCK)
foldx = plot_phys_propery_change(FoldX)
foldxwater = plot_phys_propery_change(FoldXwater)
evoef1 = plot_phys_propery_change(EvoEF1)
mutabind2 = plot_phys_propery_change(MutaBind2)
ssipe = plot_phys_propery_change(SSIPe)

frames=[haddock, foldx, foldxwater, evoef1, mutabind2, ssipe]
plot_physicohemical_prop_change_dataset = pd.concat(frames)

#exprimental dataset by using HADDOCK
experimental_enr = haddock[haddock['mutation_tag'].str.contains('Enriched')]
experimental_dep = haddock[haddock['mutation_tag'].str.contains('Depleted')]
experimental_enr_counts = experimental_enr["physicochem_property_change"].value_counts() #   
experimental_dep_counts = experimental_dep["physicochem_property_change"].value_counts() #
experimental = pd.concat([experimental_enr_counts, experimental_dep_counts], axis=1, keys=['Experimental Enriched', 'Experimental Depleted'])

#There are 131 Enriched cases in the dataset
experimental_enriched = round(experimental[['Experimental Enriched']]/263*100,0)
experimental_enriched =experimental_enriched.T
#There are 132 Depleted cases in the dataset
experimental_depleted = round(experimental[['Experimental Depleted']]/263*100,0)
experimental_depleted = experimental_depleted.T


In [None]:
# Enriched

def counts_per_enriched_physicochemical_property(predictor):
    predictor_name = [x for x in globals() if globals()[x] is predictor][1]
    enr = plot_physicohemical_prop_change_dataset[plot_physicohemical_prop_change_dataset['subdataset_tag']== f'{predictor_name}_Enriched_success']
    df = enr["physicochem_property_change"].value_counts()
    return df


haddock_enr = counts_per_enriched_physicochemical_property(HADDOCK)
foldx_enr = counts_per_enriched_physicochemical_property(FoldX)
foldxwater_enr = counts_per_enriched_physicochemical_property(FoldXwater)
evoef1_enr = counts_per_enriched_physicochemical_property(EvoEF1)
mutabind2_enr = counts_per_enriched_physicochemical_property(MutaBind2)
ssipe_enr = counts_per_enriched_physicochemical_property(SSIPe)

frames = [haddock_enr, foldx_enr, foldxwater_enr, evoef1_enr, mutabind2_enr, ssipe_enr]
df = pd.concat(frames, axis=1, keys=['HADDOCK', 'FoldX', 'FoldXwater', 'EvoEF1', 'MutaBind2', 'SSIPe']).replace(np.nan, 0)


enriched = df.T
enriched = round(enriched/263*100,2)
enriched

In [None]:
# Depleted

def counts_per_depleted_physicochemical_property(predictor):
    predictor_name = [x for x in globals() if globals()[x] is predictor][1]
    dep = plot_physicohemical_prop_change_dataset[plot_physicohemical_prop_change_dataset['subdataset_tag']== f'{predictor_name}_Depleted_success']
    df = dep["physicochem_property_change"].value_counts()
    return df


haddock_dep = counts_per_depleted_physicochemical_property(HADDOCK)
foldx_dep = counts_per_depleted_physicochemical_property(FoldX)
foldxwater_dep = counts_per_depleted_physicochemical_property(FoldXwater)
evoef1_dep = counts_per_depleted_physicochemical_property(EvoEF1)
mutabind2_dep = counts_per_depleted_physicochemical_property(MutaBind2)
ssipe_dep = counts_per_depleted_physicochemical_property(SSIPe)

frames = [haddock_dep, foldx_dep, foldxwater_dep, evoef1_dep, mutabind2_dep, ssipe_dep]
df = pd.concat(frames, axis=1, keys=['HADDOCK', 'FoldX', 'FoldXwater', 'EvoEF1', 'MutaBind2', 'SSIPe']).replace(np.nan, 0)


depleted = df.T
depleted = round(depleted/263*100,2)
depleted

## Figure 2

In [None]:
mpl.style.use('seaborn-white')

# predictors
colors_depleted = ['blue', "yellow", "orange", "green", "red", "purple" ]
colors_enriched = ['skyblue', "lemonchiffon" ,"moccasin", "lightgreen", "lightcoral", "plum" ]
labels = ['HADDOCK', 'FoldX', 'FoldXwater','EvoEF1', 'MutaBind2', 'SSIPe']
labels_exp='Experimental'
mpl.rcParams.update({'font.size': 20})

fig, axs = plt.subplots(2, 5, figsize = (20,8), gridspec_kw={'height_ratios': [2, 0.3]})

axs[0, 0].set_title('No change', size=22)
axs[0, 1].set_title('Polarity loss', size=22)
axs[0, 2].set_title('Charge loss', size=22)
axs[0, 3].set_title('Charge gain ', size=22)
axs[0, 4].set_title('Polarity gain', size=22)

plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=0.3)

# Experimental dataset panels

axs[1, 0].barh(labels_exp, experimental_depleted["no_change"], label='Depleted', color="gray")
axs[1, 0].barh(labels_exp, experimental_enriched["no_change"], label='Enriched', left=experimental_depleted["no_change"], color="lightgray")
axs[1, 0].xaxis.set_ticks(np.arange(0, 40, 5))
axs[1, 0].set(yticklabels=["Exp. dataset"])


axs[1, 1].barh(labels_exp, experimental_depleted["polarity_loss"], label='Depleted', color="gray")
axs[1, 1].barh(labels_exp, experimental_enriched["polarity_loss"], label='Enriched', left=experimental_depleted["polarity_loss"], color="lightgray")
axs[1, 1].set(yticklabels=[])
axs[1, 1].xaxis.set_ticks(np.arange(0, 40, 5))


axs[1, 2].barh(labels_exp,experimental_depleted["charge_loss"], label='Depleted', color="gray")
axs[1, 2].barh(labels_exp, experimental_enriched["charge_loss"], label='Enriched', left=experimental_depleted["charge_loss"], color="lightgray")
axs[1, 2].set(yticklabels=[])
axs[1, 2].xaxis.set_ticks(np.arange(0, 40, 5))

axs[1, 3].barh(labels_exp, experimental_depleted["charge_gain"], label='Depleted', color="gray")
axs[1, 3].barh(labels_exp, experimental_enriched["charge_gain"], label='Enriched', left=experimental_depleted["charge_gain"], color="lightgray")
axs[1, 3].set(yticklabels=[])
axs[1, 3].xaxis.set_ticks(np.arange(0, 40, 5))

axs[1, 4].barh(labels_exp, experimental_depleted["polarity_gain"], label='Depleted', color="gray")
axs[1, 4].barh(labels_exp, experimental_enriched["polarity_gain"], label='Enriched', left=experimental_depleted["polarity_gain"], color="lightgray")
axs[1, 4].set(yticklabels=[])
axs[1, 4].xaxis.set_ticks(np.arange(0, 40, 5))


#no change
axs[0, 0].barh(labels, depleted["no_change"], label='Depleted', color=colors_depleted)
axs[0, 0].barh(labels, enriched["no_change"], label='Enriched', left=depleted["no_change"], color=colors_enriched)
axs[0, 0].set_xlim([0, 40])
axs[0, 0].set(xticklabels=[])

#polarity_loss
axs[0, 1].barh(labels, depleted["polarity_loss"], label='Depleted', color=colors_depleted)
axs[0, 1].barh(labels, enriched["polarity_loss"], label='Enriched', left=depleted["polarity_loss"], color=colors_enriched)
axs[0, 1].set_xlim([0, 40])
axs[0, 1].set(yticklabels=[])
axs[0, 1].set(xticklabels=[])

#charge_gain
axs[0, 2].barh(labels, depleted["charge_loss"], label='Depleted', color=colors_depleted,)
axs[0, 2].barh(labels, enriched["charge_loss"], label='Enriched', left=depleted["charge_loss"], color=colors_enriched)
axs[0, 2].set_xlim([0, 40])
axs[0, 2].set(yticklabels=[])
axs[0, 2].set(xticklabels=[])

#charge_gain
axs[0, 3].barh(labels, depleted["charge_gain"], label='Depleted', color=colors_depleted,)
axs[0, 3].barh(labels, enriched["charge_gain"], label='Enriched', left=depleted["charge_gain"], color=colors_enriched)
axs[0, 3].set_xlim([0, 40])
axs[0, 3].set(yticklabels=[])
axs[0, 3].set(xticklabels=[])

#polarity_gain
axs[0, 4].barh(labels, depleted["polarity_gain"], label='Depleted', color=colors_depleted,)
axs[0, 4].barh(labels, enriched["polarity_gain"], label='Enriched', left=depleted["polarity_gain"], color=colors_enriched)
axs[0, 4].set_xlim([0, 40])
axs[0, 4].set(yticklabels=[])
axs[0, 4].set(xticklabels=[])

# legend
axs[1, 0].legend(['Dark colors represents depleted cases', 'Light colors represents enriched cases'], loc = 'lower center', bbox_to_anchor = (0, -1.8), fontsize=20)

fig.tight_layout()
#save
fig.savefig('Figure2_charge_histidine.pdf', bbox_inches='tight')


In [None]:
#  PLOT 2

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt


mpl.style.use('seaborn-white')

# predictors
colors_depleted = ['blue', "yellow", "orange", "green", "red", "purple" ]
colors_enriched = ['skyblue', "lemonchiffon" ,"moccasin", "lightgreen", "lightcoral", "plum" ]
labels = ['HADDOCK', 'FoldX', 'FoldXwater','EvoEF1', 'MutaBind2', 'SSIPe']
labels_exp='Experimental'
mpl.rcParams.update({'font.size': 20})

fig, axs = plt.subplots(2, 5, figsize = (20,8), gridspec_kw={'height_ratios': [2, 0.3]})

axs[0, 0].set_title('No change', size=22)
axs[0, 1].set_title('Polarity loss', size=22)
axs[0, 2].set_title('Charge loss', size=22)
axs[0, 3].set_title('Charge gain ', size=22)
axs[0, 4].set_title('Polarity gain', size=22)

plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=0.3)


# experimental
axs[1, 0].barh(labels_exp, experimental_depleted["no_change"], label='Depleted', color="gray")
axs[1, 0].barh(labels_exp, experimental_enriched["no_change"], label='Enriched', left=experimental_depleted["no_change"], color="lightgray")
nochange_xlim=int(experimental_enriched["no_change"][0] + experimental_depleted["no_change"][0])
axs[1, 0].set_xlim([0, nochange_xlim])
axs[1, 0].xaxis.set_ticks(np.arange(0, nochange_xlim, 5))
axs[1, 0].set(yticklabels=["Exp. dataset"])

axs[1, 1].barh(labels_exp, experimental_depleted["polarity_loss"], label='Depleted', color="gray")
axs[1, 1].barh(labels_exp, experimental_enriched["polarity_loss"], label='Enriched', left=experimental_depleted["polarity_loss"], color="lightgray")
polarityloss_xlim=int(experimental_enriched["polarity_loss"][0] + experimental_depleted["polarity_loss"][0])
axs[1, 1].set_xlim([0, polarityloss_xlim])
axs[1, 1].set(yticklabels=[])
axs[1, 1].xaxis.set_ticks(np.arange(0, polarityloss_xlim, 5))


axs[1, 2].barh(labels_exp, experimental_depleted["charge_loss"], label='Depleted', color="gray")
axs[1, 2].barh(labels_exp, experimental_enriched["charge_loss"], label='Enriched', left=experimental_depleted["charge_loss"], color="lightgray")
chargeloss_xlim=int(experimental_enriched["charge_loss"][0] + experimental_depleted["charge_loss"][0])
axs[1, 2].set_xlim([0, chargeloss_xlim])
axs[1, 2].set(yticklabels=[])
axs[1, 2].xaxis.set_ticks(np.arange(0, chargeloss_xlim, 5))

axs[1, 3].barh(labels_exp, experimental_depleted["charge_gain"], label='Depleted', color="gray")
axs[1, 3].barh(labels_exp, experimental_enriched["charge_gain"], label='Enriched', left=experimental_depleted["charge_gain"], color="lightgray")
chargegain_xlim=int(experimental_enriched["charge_gain"][0] + experimental_depleted["charge_gain"][0])
axs[1, 3].set_xlim([0, chargegain_xlim])
axs[1, 3].set(yticklabels=[])
axs[1, 3].xaxis.set_ticks(np.arange(0, chargegain_xlim, 5))

axs[1, 4].barh(labels_exp, experimental_depleted["polarity_gain"], label='Depleted', color="gray")
axs[1, 4].barh(labels_exp, experimental_enriched["polarity_gain"], label='Enriched', left=experimental_depleted["polarity_gain"], color="lightgray")
polaritygain_xlim=int(experimental_enriched["polarity_gain"][0] + experimental_depleted["polarity_gain"][0])
axs[1, 4].set_xlim([0, polaritygain_xlim])
axs[1, 4].set(yticklabels=[])
axs[1, 4].xaxis.set_ticks(np.arange(0, polaritygain_xlim, 5))

#no change
axs[0, 0].barh(labels, depleted["no_change"], label='Depleted', color=colors_depleted)
axs[0, 0].barh(labels, enriched["no_change"], label='Enriched', left=depleted["no_change"], color=colors_enriched)
axs[0, 0].set_xlim([0, nochange_xlim])
axs[0, 0].set(xticklabels=[])

#polarity_loss
axs[0, 1].barh(labels, depleted["polarity_loss"], label='Depleted', color=colors_depleted)
axs[0, 1].barh(labels, enriched["polarity_loss"], label='Enriched', left=depleted["polarity_loss"], color=colors_enriched)
axs[0, 1].set_xlim([0, polarityloss_xlim])
axs[0, 1].set(yticklabels=[])
axs[0, 1].set(xticklabels=[])

#charge_gain
axs[0, 2].barh(labels, depleted["charge_loss"], label='Depleted', color=colors_depleted,)
axs[0, 2].barh(labels, enriched["charge_loss"], label='Enriched', left=depleted["charge_loss"], color=colors_enriched)
axs[0, 2].set_xlim([0, chargeloss_xlim])
axs[0, 2].set(yticklabels=[])
axs[0, 2].set(xticklabels=[])

#charge_gain
axs[0, 3].barh(labels, depleted["charge_gain"], label='Depleted', color=colors_depleted,)
axs[0, 3].barh(labels, enriched["charge_gain"], label='Enriched', left=depleted["charge_gain"], color=colors_enriched)
axs[0, 3].set_xlim([0, chargegain_xlim])
axs[0, 3].set(yticklabels=[])
axs[0, 3].set(xticklabels=[])

#polarity_gain
axs[0, 4].barh(labels, depleted["polarity_gain"], label='Depleted', color=colors_depleted,)
axs[0, 4].barh(labels, enriched["polarity_gain"], label='Enriched', left=depleted["polarity_gain"], color=colors_enriched)
axs[0, 4].set_xlim([0, polaritygain_xlim])
axs[0, 4].set(yticklabels=[])
axs[0, 4].set(xticklabels=[])

# legend
axs[1, 0].legend(['Dark colors represents depleted cases', 'Light colors represents enriched cases'], loc = 'lower center', bbox_to_anchor = (0, -1.8), fontsize=20)

fig.tight_layout()
#save
fig.savefig('Figure2_ver2.pdf', bbox_inches='tight')


## Performance tableof predictor per physicochemical class changes, Supp. Table 2
Unfortunately we could not found out better way to calculate performance.

In [None]:
#HADDOCK
denominator=experimental_enr_counts+experimental_dep_counts

enr_no_change=round(haddock_enr["no_change"]/denominator["no_change"]*100,0)
dep_no_change=round(haddock_dep["no_change"]/denominator["no_change"]*100,0)

enr_polarity_loss=round(haddock_enr["polarity_loss"]/denominator["polarity_loss"]*100,0)
dep_polarity_loss=round(haddock_dep["polarity_loss"]/denominator["polarity_loss"]*100,0)

enr_charge_loss=round(haddock_enr["charge_loss"]/denominator["charge_loss"]*100,0)
dep_charge_loss=round(haddock_dep["charge_loss"]/denominator["charge_loss"]*100,0)

enr_charge_gain=round(haddock_enr["charge_gain"]/denominator["charge_gain"]*100,0)
dep_charge_gain=round(haddock_dep["charge_gain"]/denominator["charge_gain"]*100,0)

enr_polarity_gain=round(haddock_enr["polarity_gain"]/denominator["polarity_gain"]*100,0)
dep_polarity_gain=round(haddock_dep["polarity_gain"]/denominator["polarity_gain"]*100,0)


performance_table = [enr_no_change, dep_no_change, enr_polarity_loss, dep_polarity_loss, enr_charge_loss, dep_charge_loss, enr_charge_gain, dep_charge_gain, enr_polarity_gain, dep_polarity_gain]
performance_table

In [None]:
#FoldX
denominator=experimental_enr_counts+experimental_dep_counts

enr_no_change=round(foldx_enr["no_change"]/denominator["no_change"]*100,0)
dep_no_change=round(foldx_dep["no_change"]/denominator["no_change"]*100,0)

enr_polarity_loss=round(foldx_enr["polarity_loss"]/denominator["polarity_loss"]*100,0)
dep_polarity_loss=round(foldx_dep["polarity_loss"]/denominator["polarity_loss"]*100,0)

enr_charge_loss=round(foldx_enr["charge_loss"]/denominator["charge_loss"]*100,0)
dep_charge_loss=round(foldx_dep["charge_loss"]/denominator["charge_loss"]*100,0)

enr_charge_gain=round(foldx_enr["charge_gain"]/denominator["charge_gain"]*100,0)
dep_charge_gain=round(foldx_dep["charge_gain"]/denominator["charge_gain"]*100,0)

enr_polarity_gain=round(foldx_enr["polarity_gain"]/denominator["polarity_gain"]*100,0)
dep_polarity_gain=round(foldx_dep["polarity_gain"]/denominator["polarity_gain"]*100,0)


performance_table = [enr_no_change, dep_no_change, enr_polarity_loss, dep_polarity_loss, enr_charge_loss, dep_charge_loss, enr_charge_gain, dep_charge_gain, enr_polarity_gain, dep_polarity_gain]
performance_table

In [None]:
#FoldXwater
denominator=experimental_enr_counts+experimental_dep_counts

enr_no_change=round(foldxwater_enr["no_change"]/denominator["no_change"]*100,0)
dep_no_change=round(foldxwater_dep["no_change"]/denominator["no_change"]*100,0)

enr_polarity_loss=round(foldxwater_enr["polarity_loss"]/denominator["polarity_loss"]*100,0)
dep_polarity_loss=round(foldxwater_dep["polarity_loss"]/denominator["polarity_loss"]*100,0)

enr_charge_loss=round(foldxwater_enr["charge_loss"]/denominator["charge_loss"]*100,0)
dep_charge_loss=round(foldxwater_dep["charge_loss"]/denominator["charge_loss"]*100,0)

enr_charge_gain=round(foldxwater_enr["charge_gain"]/denominator["charge_gain"]*100,0)
dep_charge_gain=round(foldxwater_dep["charge_gain"]/denominator["charge_gain"]*100,0)

enr_polarity_gain=round(foldxwater_enr["polarity_gain"]/denominator["polarity_gain"]*100,0)
dep_polarity_gain=round(foldxwater_dep["polarity_gain"]/denominator["polarity_gain"]*100,0)


performance_table = [enr_no_change, dep_no_change, enr_polarity_loss, dep_polarity_loss, enr_charge_loss, dep_charge_loss, enr_charge_gain, dep_charge_gain, enr_polarity_gain, dep_polarity_gain]
performance_table

In [None]:
#EvoEF1
denominator=experimental_enr_counts+experimental_dep_counts

enr_no_change=round(evoef1_enr["no_change"]/denominator["no_change"]*100,0)
dep_no_change=round(evoef1_dep["no_change"]/denominator["no_change"]*100,0)

enr_polarity_loss=round(evoef1_enr["polarity_loss"]/denominator["polarity_loss"]*100,0)
dep_polarity_loss=round(evoef1_dep["polarity_loss"]/denominator["polarity_loss"]*100,0)

enr_charge_loss=round(evoef1_enr["charge_loss"]/denominator["charge_loss"]*100,0)
dep_charge_loss=round(evoef1_dep["charge_loss"]/denominator["charge_loss"]*100,0)

enr_charge_gain=round(evoef1_enr["charge_gain"]/denominator["charge_gain"]*100,0)
dep_charge_gain=round(evoef1_dep["charge_gain"]/denominator["charge_gain"]*100,0)

enr_polarity_gain=round(evoef1_enr["polarity_gain"]/denominator["polarity_gain"]*100,0)
dep_polarity_gain=round(evoef1_dep["polarity_gain"]/denominator["polarity_gain"]*100,0)


performance_table = [enr_no_change, dep_no_change, enr_polarity_loss, dep_polarity_loss, enr_charge_loss, dep_charge_loss, enr_charge_gain, dep_charge_gain, enr_polarity_gain, dep_polarity_gain]
performance_table

In [None]:
#MutaBind2
denominator=experimental_enr_counts+experimental_dep_counts

enr_no_change=round(mutabind2_enr["no_change"]/denominator["no_change"]*100,0)
dep_no_change=round(mutabind2_dep["no_change"]/denominator["no_change"]*100,0)

enr_polarity_loss=round(mutabind2_enr["polarity_loss"]/denominator["polarity_loss"]*100,0)
dep_polarity_loss=round(mutabind2_dep["polarity_loss"]/denominator["polarity_loss"]*100,0)

mutabind2_enr["charge_loss"]=0
enr_charge_loss=round(mutabind2_enr["charge_loss"]/denominator["charge_loss"]*100,0)
dep_charge_loss=round(mutabind2_dep["charge_loss"]/denominator["charge_loss"]*100,0)

enr_charge_gain=round(mutabind2_enr["charge_gain"]/denominator["charge_gain"]*100,0)
dep_charge_gain=round(mutabind2_dep["charge_gain"]/denominator["charge_gain"]*100,0)

enr_polarity_gain=round(mutabind2_enr["polarity_gain"]/denominator["polarity_gain"]*100,0)
dep_polarity_gain=round(mutabind2_dep["polarity_gain"]/denominator["polarity_gain"]*100,0)


performance_table = [enr_no_change, dep_no_change, enr_polarity_loss, dep_polarity_loss, enr_charge_loss, dep_charge_loss, enr_charge_gain, dep_charge_gain, enr_polarity_gain, dep_polarity_gain]
performance_table

In [None]:
# SSIPe
denominator=experimental_enr_counts+experimental_dep_counts

enr_no_change=round(ssipe_enr["no_change"]/denominator["no_change"]*100,0)
dep_no_change=round(ssipe_dep["no_change"]/denominator["no_change"]*100,0)

enr_polarity_loss=round(ssipe_enr["polarity_loss"]/denominator["polarity_loss"]*100,0)
dep_polarity_loss=round(ssipe_dep["polarity_loss"]/denominator["polarity_loss"]*100,0)

enr_charge_loss=round(ssipe_enr["charge_loss"]/denominator["charge_loss"]*100,0)
dep_charge_loss=round(ssipe_dep["charge_loss"]/denominator["charge_loss"]*100,0)

enr_charge_gain=round(ssipe_enr["charge_gain"]/denominator["charge_gain"]*100,0)
dep_charge_gain=round(ssipe_dep["charge_gain"]/denominator["charge_gain"]*100,0)

ssipe_enr["polarity_gain"]=0
enr_polarity_gain=round(ssipe_enr["polarity_gain"]/denominator["polarity_gain"]*100,0)
dep_polarity_gain=round(ssipe_dep["polarity_gain"]/denominator["polarity_gain"]*100,0)


performance_table = [enr_no_change, dep_no_change, enr_polarity_loss, dep_polarity_loss, enr_charge_loss, dep_charge_loss, enr_charge_gain, dep_charge_gain, enr_polarity_gain, dep_polarity_gain]
performance_table

## UEP performance analysis from Shell output

In [None]:
UEP = pd.read_csv(datadir / 'UEP_ACE2-RBD_common_dataset.csv', delimiter=',')
UEP=UEP[['case_id', 'protein', 'mutation_type', 'binding_value']]
UEP.columns = ['#case_id', 'protein', 'mutation_type', 'ddg']
common_cases=UEP[['#case_id']]


In [None]:
#There is another performance calculation function for UEP since list index out of range error, [x for x in globals() if globals()[x] is df][0] instead of [1]

def performance_calculation_uep(df):
    success_rate = []
    for i in df['success_tag']:
        if (i == "success"):
            success_rate.append(1)
        else:
            success_rate.append(0)
    df['success_rate'] = success_rate
    
    df_enriched = df[df['mutation_type'].str.contains("E")]
    df_depleted = df[df['mutation_type'].str.contains("MD") | df['mutation_type'].str.contains("RD")]
    
    
    performance_total = round(sum(df['success_rate'])/ len(df) * 100, 0)
    performance_enriched = round(sum(df_enriched['success_rate'])/ len(df_enriched) * 100, 0)
    performance_depleted = round(sum(df_depleted['success_rate'])/ len(df_depleted) * 100, 0)
    
    df_ACE2 = df[df['protein'].str.contains("ACE2")]
    df_ACE2_enriched = df_ACE2[df_ACE2['mutation_type'].str.contains("E")]
    df_ACE2_depleted = df_ACE2[df_ACE2['mutation_type'].str.contains("MD") | df_ACE2['mutation_type'].str.contains("RD")]
    
    performance_ACE2 = round(sum(df_ACE2['success_rate'])/ len(df_ACE2) * 100, 0)
    performance_ACE2_enriched = round(sum(df_ACE2_enriched['success_rate'])/ len(df_ACE2_enriched) * 100, 0)
    performance_ACE2_depleted = round(sum(df_ACE2_depleted['success_rate'])/ len(df_ACE2_depleted) * 100, 0)
    
    df_RBD= df[df['protein'].str.contains("RBD")]
    df_RBD_enriched = df_RBD[df_RBD['mutation_type'].str.contains("E")]
    df_RBD_depleted = df_RBD[df_RBD['mutation_type'].str.contains("MD") | df_RBD['mutation_type'].str.contains("RD")]
    
    performance_RBD = round(sum(df_RBD['success_rate'])/ len(df_RBD) * 100, 0)
    performance_RBD_enriched = round(sum(df_RBD_enriched['success_rate'])/ len(df_RBD_enriched) * 100, 0)
    performance_RBD_depleted = round(sum(df_RBD_depleted['success_rate'])/ len(df_RBD_depleted) * 100, 0)
    
    df_name =[x for x in globals() if globals()[x] is df][0]
    
    
    performances = {'Total': [performance_total], 'Enriched': [performance_enriched], 'Depleted': [performance_depleted], 'ACE2': [performance_ACE2], 'ACE2-Enriched': [performance_ACE2_enriched], 'ACE2-Depleted': [performance_ACE2_depleted], 
                            'RBD': [performance_RBD], 'RBD-Enriched': [performance_RBD_enriched], 'RBD-Depleted': [performance_RBD_depleted],
        'Predictors'  : [df_name]}
    
    df = pd.DataFrame(data=performances).set_index('Predictors')
    return(df)
    



In [None]:
#Predictor performances on common cases of UEP and main dataset (263)

HADDOCK = pd.merge(common_cases, HADDOCK, how='inner', on=['#case_id'])
FoldX = pd.merge(common_cases, FoldX, how='inner', on=['#case_id'])
FoldXwater = pd.merge(common_cases, FoldXwater, how='inner', on=['#case_id'])
EvoEF1 = pd.merge(common_cases, EvoEF1, how='inner', on=['#case_id'])
MutaBind2 = pd.merge(common_cases, MutaBind2, how='inner', on=['#case_id'])
SSIPe = pd.merge(common_cases, SSIPe, how='inner', on=['#case_id'])


HADDOCK_UEP_table = performance_calculation_uep(HADDOCK)
FoldX_UEP_table = performance_calculation_uep(FoldX)
FoldXwater_UEP_table = performance_calculation_uep(FoldXwater)
EvoEF1_UEP_table = performance_calculation_uep(EvoEF1)
MutaBind2_UEP_table = performance_calculation_uep(MutaBind2)
SSIPe_UEP_table = performance_calculation_uep(SSIPe)

UEP = success_tag(UEP)
UEP_table = performance_calculation_uep(UEP)

performance_table = pd.concat([HADDOCK_UEP_table, FoldX_UEP_table, FoldXwater_UEP_table, EvoEF1_UEP_table, MutaBind2_UEP_table, SSIPe_UEP_table, UEP_table])
perf_table_uep = performance_table.style.format('{:.0f}').set_properties(**{'font-size': '12pt', 'font-family': 'Times'})

#save the table as png
dfi.export(perf_table_uep, 'performance_table_uep.png')
# take a look table 5
perf_table_uep

## MM-PBSA Method PReformance comparison
Table 7 performances

In [None]:


ACE2_RBD_Benchmarking_dataset = pd.read_csv(datadir / 'ACE2_RBD_Benchmarking_dataset.csv', delimiter=',')

colnames=['#case_id', 'ddg']
MM_PBSA_scores = pd.read_csv(datadir / 'ACS_scores.csv', delimiter=',',names=colnames, header=None)

common_cases = MM_PBSA_scores['#case_id'].values.tolist()


df=pd.DataFrame()

for i in common_cases:
    common_cases_mmpbsa = ACE2_RBD_Benchmarking_dataset[ACE2_RBD_Benchmarking_dataset['#case_id'].str.contains(i)]
    df=df.append(common_cases_mmpbsa)

#dataset with MM-PBSA ddg values
df=pd.merge(MM_PBSA_scores,df)

#success rate calculation of MM-PBSA
df=success_tag(df)

denominator=len(df)
enriched_cases=df[df["mutation_type"]=='E']
depleted_cases=df[df["mutation_type"]!='E']
denominator_enr=len(enriched_cases)
denominator_dep=len(depleted_cases)

#Note: They stated that Negative/positive ΔΔG values indicate unfavorable/favorable substitutions for the mutant residue in the relevant position, respectively.
#Therefore, mutation type should be reverse (depleted instead of enriched). Result of that, there should be success instead of failure -failure rate will give us success rate.

MM_PBSA = [{"MM-PBSA" : round(len(df[df['success_tag']=="failure"])/denominator*100,0)},
{"MM_PBSA-enriched" : round(len(enriched_cases[enriched_cases['success_tag']=="failure"])/denominator_enr*100,0)},
{"MM_PBSA-depleted" : round(len(depleted_cases[depleted_cases['success_tag']=="failure"])/denominator_dep*100,0)}]

haddock_mmpbsa=[{"HADDOCK": round(len(df[df["haddock-success-tag"]=="success"])/denominator*100,0)},
{"HADDOCK-enriched": round(len(enriched_cases[enriched_cases["haddock-success-tag"]=="success"])/denominator_enr*100,0)},
{"HADDOCK-depleted": round(len(depleted_cases[depleted_cases["haddock-success-tag"]=="success"])/denominator_dep*100,0)}]

foldx_mmpbsa=[{"FoldX": round(len(df[df["foldx-success-tag"]=="success"])/denominator*100,0)},
{"FoldX-enriched": round(len(enriched_cases[enriched_cases["foldx-success-tag"]=="success"])/denominator_enr*100,0)},
{"FoldX-depleted": round(len(depleted_cases[depleted_cases["foldx-success-tag"]=="success"])/denominator_dep*100,0)}]

foldxwater_mmpbsa=[{"FoldXwater": round(len(df[df["foldxwater-success-tag"]=="success"])/denominator*100,0)},
{"FoldXwater-enriched": round(len(enriched_cases[enriched_cases["foldxwater-success-tag"]=="success"])/denominator_enr*100,0)},
{"FoldXwater-depleted": round(len(depleted_cases[depleted_cases["foldxwater-success-tag"]=="success"])/denominator_dep*100,0)}]

evoef1_mmpbsa=[{"EvoEF1": round(len(df[df["evoef1-success-tag"]=="success"])/denominator*100,0)},
{"EvoEF1-enriched": round(len(enriched_cases[enriched_cases["evoef1-success-tag"]=="success"])/denominator_enr*100,0)},
{"EvoEF1-depleted": round(len(depleted_cases[depleted_cases["evoef1-success-tag"]=="success"])/denominator_dep*100,0)}]

mutabind2_mmpbsa=[{"MutaBind2": round(len(df[df["mutabind2-success-tag"]=="success"])/denominator*100,0)},
{"MutaBind2-enriched": round(len(enriched_cases[enriched_cases["mutabind2-success-tag"]=="success"])/denominator_enr*100,0)},
{"MutaBind2-depleted": round(len(depleted_cases[depleted_cases["mutabind2-success-tag"]=="success"])/denominator_dep*100,0)}]

ssipe_mmpbsa=[{"SSIPe": round(len(df[df["ssipe-success-tag"]=="success"])/denominator*100,0)},
{"SSIPe-enriched": round(len(enriched_cases[enriched_cases["ssipe-success-tag"]=="success"])/denominator_enr*100,0)},
{"SSIPe-depleted": round(len(depleted_cases[depleted_cases["ssipe-success-tag"]=="success"])/denominator_dep*100,0)}]


haddock_mmpbsa + foldx_mmpbsa + foldxwater_mmpbsa + evoef1_mmpbsa + mutabind2_mmpbsa + ssipe_mmpbsa + MM_PBSA


## Antibody Binding Sites

Table 8 performances

In [None]:
common_cases = ["V445K", "V445P", "V445Q", "Q498F", "Q498H", "Q498P", "Q498W", "Q498Y", "F490A", "F490K", "F490M", "F490N", "F490R",
                "F490Y", "Y489A", "Y489E", "Y489F", "Y489S", "S477D", "S477K", "S477N", "S477P","S477W","Q493A","Q493F","Q493G",
                "Q493K", "Q493L", "Q493M", "Q493V", "Q493Y", "T500L", "T500S", "V503I","V503K","V503L","V503M","V503R",
                "E484K","E484Q","E484R","E484S","E484T"]

ACE2_RBD_Benchmarking_dataset = pd.read_csv(datadir / 'ACE2_RBD_Benchmarking_dataset.csv', delimiter=',')

df=pd.DataFrame()

for i in common_cases:
    common_case_antibody = ACE2_RBD_Benchmarking_dataset[ACE2_RBD_Benchmarking_dataset['#case_id'].str.contains(i)]
    df=df.append(common_case_antibody)

enriched_cases=df[df["mutation_type"]=='E']
depleted_cases=df[df["mutation_type"]!='E']

denominator=len(df)
denominator_enr=len(enriched_cases)
denominator_dep=len(depleted_cases)



haddock_antibody=[{"HADDOCK": round(len(df[df["haddock-success-tag"]=="success"])/denominator*100,0)},
{"HADDOCK-enriched": round(len(enriched_cases[enriched_cases["haddock-success-tag"]=="success"])/denominator_enr*100,0)},
{"HADDOCK-depleted": round(len(depleted_cases[depleted_cases["haddock-success-tag"]=="success"])/denominator_dep*100,0)}]

foldx_antibody=[{"FoldX": round(len(df[df["foldx-success-tag"]=="success"])/denominator*100,0)},
{"FoldX-enriched": round(len(enriched_cases[enriched_cases["foldx-success-tag"]=="success"])/denominator_enr*100,0)},
{"FoldX-depleted": round(len(depleted_cases[depleted_cases["foldx-success-tag"]=="success"])/denominator_dep*100,0)}]

foldxwater_antibody=[{"FoldXwater": round(len(df[df["foldxwater-success-tag"]=="success"])/denominator*100,0)},
{"FoldXwater-enriched": round(len(enriched_cases[enriched_cases["foldxwater-success-tag"]=="success"])/denominator_enr*100,0)},
{"FoldXwater-depleted": round(len(depleted_cases[depleted_cases["foldxwater-success-tag"]=="success"])/denominator_dep*100,0)}]

evoef1_antibody=[{"EvoEF1": round(len(df[df["evoef1-success-tag"]=="success"])/denominator*100,0)},
{"EvoEF1-enriched": round(len(enriched_cases[enriched_cases["evoef1-success-tag"]=="success"])/denominator_enr*100,0)},
{"EvoEF1-depleted": round(len(depleted_cases[depleted_cases["evoef1-success-tag"]=="success"])/denominator_dep*100,0)}]

mutabind2_antibody=[{"MutaBind2": round(len(df[df["mutabind2-success-tag"]=="success"])/denominator*100,0)},
{"MutaBind2-enriched": round(len(enriched_cases[enriched_cases["mutabind2-success-tag"]=="success"])/denominator_enr*100,0)},
{"MutaBind2-depleted": round(len(depleted_cases[depleted_cases["mutabind2-success-tag"]=="success"])/denominator_dep*100,0)}]

ssipe_antibody=[{"SSIPe": round(len(df[df["ssipe-success-tag"]=="success"])/denominator*100,0)},
{"SSIPe-enriched": round(len(enriched_cases[enriched_cases["ssipe-success-tag"]=="success"])/denominator_enr*100,0)},
{"SSIPe-depleted": round(len(depleted_cases[depleted_cases["ssipe-success-tag"]=="success"])/denominator_dep*100,0)}]


haddock_antibody + foldx_antibody + foldxwater_antibody + evoef1_antibody + mutabind2_antibody + ssipe_antibody


## FoldX and FoldXwater, water mediated hydrogen bond partner and performances of predictors on these mutations


In first analysis we found 60 cases related to the water mediated residius, but this time we found 71. I will find the reson of this.

In [None]:
water_mediated_hydrogen_bond_partners =  ["F28","E37","D38","Q42","Q76","Y83","T324","Y449", "N487", "Y489", "Q493", "Q493"]

#get only wt amino acid and position of mutation as a column
case_id = ACE2_RBD_Benchmarking_dataset['#case_id']

wt_position = []
for i in case_id:
    a=i[:-1]
    wt_position.append(a)
    
wt_position=pd.DataFrame(wt_position, columns=['wt_position'])

ACE2_RBD_Benchmarking_dataset['wt_position']=wt_position

df=pd.DataFrame()

for i in water_mediated_hydrogen_bond_partners:
    water_mediated_hydrogen_bond_partners_dataset = ACE2_RBD_Benchmarking_dataset[ACE2_RBD_Benchmarking_dataset['wt_position'].str.contains(i)]
    df=df.append(water_mediated_hydrogen_bond_partners_dataset)

enriched_cases=df[df["mutation_type"]=='E']
depleted_cases=df[df["mutation_type"]!='E']

denominator=len(df)
denominator_enr=len(enriched_cases)
denominator_dep=len(depleted_cases)


foldx_water_mediated_hb=[{"FoldX": round(len(df[df["foldx-success-tag"]=="success"])/denominator*100,0)},
{"FoldX-enriched": round(len(enriched_cases[enriched_cases["foldx-success-tag"]=="success"])/denominator_enr*100,0)},
{"FoldX-depleted": round(len(depleted_cases[depleted_cases["foldx-success-tag"]=="success"])/denominator_dep*100,0)}]

foldxwater_water_mediated_hb=[{"FoldXwater": round(len(df[df["foldxwater-success-tag"]=="success"])/denominator*100,0)},
{"FoldXwater-enriched": round(len(enriched_cases[enriched_cases["foldxwater-success-tag"]=="success"])/denominator_enr*100,0)},
{"FoldXwater-depleted": round(len(depleted_cases[depleted_cases["foldxwater-success-tag"]=="success"])/denominator_dep*100,0)}]

foldx_water_mediated_hb + foldxwater_water_mediated_hb

## Sonradan eklenen kodlar

## Perforamance of Predictors on Antibody Binding Sites


In [None]:

common_cases = ["V445K", "V445P", "V445Q", "Q498F", "Q498H", "Q498P", "Q498W", "Q498Y", "F490A", "F490K", "F490M", "F490N", "F490R",
                "F490Y", "Y489A", "Y489E", "Y489F", "Y489S", "S477D", "S477K", "S477N", "S477P","S477W","Q493A","Q493F","Q493G",
                "Q493K", "Q493L", "Q493M", "Q493V", "Q493Y", "T500L", "T500S", "V503I","V503K","V503L","V503M","V503R",
                "E484K","E484Q","E484R","E484S","E484T"]

df=pd.DataFrame()
for i in common_cases:
    common_case_antibody = SARS_CoV_2_RBD_ACE2_benchmarking_dataset[SARS_CoV_2_RBD_ACE2_benchmarking_dataset['#case_id'].str.contains(i)]
    df = df.append(common_case_antibody)

total = performance_calculation(df)
enriched_cases = performance_calculation(df[df['exp_binding']>0])
depleted_cases = performance_calculation(df[df['exp_binding']<0])

performance_table = pd.concat([total, enriched_cases[1], depleted_cases[1]], axis=1)
performance_table
performance_table.columns=['Predictors', 'Total', 'Enriched', 'Depleted']
performance_table['Predictors'] = ['HADDOCK', 'FoldX', 'FoldXwater', 'EvoEF1', 'MutaBind2', 'SSIPe']
performance_table.set_index('Predictors')