# Analysis of correlation between Delta G, Kd and Temperature

In [1]:
import pandas as pd
import yaml
import os
from tqdm import tqdm_notebook
import numpy as np


In [2]:
import warnings
warnings.filterwarnings("ignore")

from abag_affinity.utils.config import read_config, get_data_paths

In [3]:
config = read_config("../abag_affinity/config.yaml")
sabdab_summary_path, sabdab_pdb_path = get_data_paths(config, "SAbDab")
abdb_summary_path, abdb_pdb_path = get_data_paths(config, "AbDb")

In [4]:
abdb_pdb_ids = os.listdir(abdb_pdb_path)
abdb_pdb_ids = set([ pdb_id.split("_")[0].lower() for pdb_id in abdb_pdb_ids])
sabdab_df = pd.read_csv(sabdab_summary_path, sep="\t")
sabdab_pdb_ids = set(sabdab_df["pdb"].unique())

overlapping_ids = abdb_pdb_ids.intersection(sabdab_pdb_ids)

In [5]:
overlapping_data = sabdab_df[sabdab_df["pdb"].isin(overlapping_ids)].copy()

In [6]:
gas_constant =  8.31446261815324 # 0.0821

def calc_temp(row):
    kd = row["affinity"]
    delta_g = row["delta_g"] * 4184 # convert kcal to joule 
    return (delta_g / (gas_constant * np.log(kd))) - 273.15 # convert kelvin to celsius

def calc_delta_g(row):
    delta_g = gas_constant * row["temperature_kelvin"] * np.log(row["affinity"])
    return delta_g / 4184 # convert to kcal

In [7]:
temperature_data = overlapping_data[overlapping_data["temperature"].notnull() & overlapping_data["affinity"].notnull() & ~ overlapping_data["temperature"].isin(["Unknown", "SPR", "BLI"])].copy()
temperature_data["temperature_kelvin"] = temperature_data["temperature"].astype(int) + 273.15
temperature_data["affinity"] = temperature_data["affinity"].astype(float)

temperature_data["calc_dg"] = temperature_data.apply(lambda row: calc_delta_g(row), axis=1)

temperature_data[["temperature", "affinity", "calc_dg", "delta_g"]]

Unnamed: 0,temperature,affinity,calc_dg,delta_g
13,25,1.000000e-10,-13.642470,-13.63
14,25,2.000000e-04,-5.046309,-5.04
15,25,2.000000e-04,-5.046309,-5.04
16,25,2.000000e-04,-5.046309,-5.04
17,25,2.000000e-04,-5.046309,-5.04
...,...,...,...,...
1239,37,1.100000e-10,-14.132812,-14.12
1302,25,1.300000e-09,-12.122776,-12.12
1307,25,4.100000e-08,-10.077988,-10.07
1316,22,8.900000e-09,-10.872509,-10.87


In [8]:
temperature_data["calc_dg"].corr(temperature_data["delta_g"])

0.9996927244626528