# Set Up the EphB4 Calculations

This involves splitting the sdf file into its components and creating the required directories

In [1]:
from pathlib import Path
import os

INP = "../../master_input"

In [25]:
def make_inp_dir(name: str, sdf: list[str]) -> None:
    inp_dir = f"{name}/input"

    # Make the input directory
    os.mkdir(f"{name}")
    os.mkdir(f"{name}/input")

    # Make soft links to the generic input files
    os.symlink(f"{INP}/protein.pdb", f"{inp_dir}/protein.pdb")
    os.symlink(f"{INP}/run_somd.sh", f"{inp_dir}/run_somd.sh")
    os.symlink(f"{INP}/template_config.cfg", f"{inp_dir}/template_config.cfg")

    # Write the sdf file
    with open(f"{inp_dir}/ligand.sdf", "wt") as f:
        f.writelines(sdf)

In [26]:
def split_sdf(multi_sdf: str) -> None:
    with open(multi_sdf, "rt") as f:
        lines = f.readlines()
        lig_name = ""
        current_lines = []

        # Iterate 
        for line in lines:

            if "CHEMBL" in line:
                lig_name = line.strip()

            # Append the line to current list
            current_lines.append(line)

            if "$$$$" in line:
                make_inp_dir(name = lig_name, sdf = current_lines)
                current_lines = []



In [27]:
split_sdf("master_input/ephb4_chembl_3D.sdf")

# Get the Experimental Free Energies of Binding

In [6]:
import numpy as np

In [7]:
def ic50_to_dg(ic50: float) -> float:
    """Convert IC50 to dG bind (in kcal / mol) at 298 K"""
    # Assume substrate concentration = Km
    ic50 /= 2
    # Convert to dG (standard conc 1 M)
    return 0.5922 * np.log(ic50)

In [18]:
def write_exp_dgs(sdf_file: str, output_file:str) -> None:
    """Write out the experimental free energies based on the SDF pIC50s"""
    with open(sdf_file, "rt") as f:
        lines = f.readlines()
        lig_name = ""
        lines_to_write = []

        # Iterate 
        for i, line in enumerate(lines):

            if "CHEMBL" in line:
                lig_name = line.strip()

            if "paper_pIC50" in line:
                pic50 = float(lines[i+1].strip())
                ic50 = 10**(-pic50)
                dg = ic50_to_dg(ic50)
                # Use 0.5 kcal/mol as a reasonable experimental error
                lines_to_write.append(f"{lig_name},{lig_name},{dg},{0.5},0\n")

        # Write output
        with open(output_file, "wt") as f:
            f.write("calc_base_dir,name,exp_dg,exp_er,calc_cor\n")
            f.writelines(lines_to_write)

In [19]:
write_exp_dgs(sdf_file="master_input/ephb4_chembl_3D.sdf", output_file="input/exp_dgs.csv")