In [None]:
#This script contains functions to conduct gnina docking for our evaluation purposes
from pymol import cmd
from pdbfixer import PDBFixer
from openmm.app import PDBFile
import os
import pandas as pd

In [None]:
def convert_cif_to_pdb(input_cif, output_pdb):
    cmd.delete("all")
    """
    Convert a CIF file to a PDB file using PyMOL.
    Ensures ligands and heteroatoms are preserved.
    
    Parameters:
        input_cif (str): Path to the input CIF file.
        output_pdb (str): Path to save the output PDB file.
    """
    # Load the CIF file
    cmd.load(input_cif, "structure")
    # Save the structure as PDB
    cmd.save(output_pdb, "structure")
    print(f"Converted {input_cif} to {output_pdb}")
    # Clean up PyMOL session
    cmd.delete("all")


def split_non_standard_residues(input_pdb, receptor_pdb, ligand_pdb, ligand_sdf):
    cmd.delete("all")
    """
    Splits a protein-ligand complex into receptor and ligand files by identifying non-standard residues.

    Parameters:
    - input_pdb (str): Path to the input PDB file containing the protein-ligand complex.
    - receptor_output (str): Path to save the receptor PDB file.
    - ligand_output (str): Path to save the ligand PDB file.
    """
    # List of standard amino acid residue names
    standard_residues = [
        "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY",
        "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER",
        "THR", "TRP", "TYR", "VAL"
    ]

    # Convert the list into a selection-friendly string
    standard_residue_selection = "+".join(standard_residues)

    # Load the PDB file
    cmd.load(input_pdb, "complex")

    # Select the ligand (non-standard residues)
    cmd.select("ligand", f"not resn {standard_residue_selection}")

    # Save the ligand atoms
    if cmd.count_atoms("ligand") > 0:
        cmd.save(ligand_pdb, "ligand")
        print(f"Ligand saved to {ligand_pdb}")
    else:
        print("No non-standard residues found. Check the input PDB.")


    # Select the receptor
    cmd.select("receptor", f"resn {standard_residue_selection}")
    # Save the ligand atoms
    if cmd.count_atoms("receptor") > 0:
        cmd.save(receptor_pdb, "receptor")
        print(f"Ligand saved to {ligand_pdb}")
    else:
        print("No standard residues found. Check the input PDB.")
    # Clean up PyMOL objects
    cmd.delete("all")
    cmd.load(ligand_pdb)
    cmd.save(ligand_sdf)
    cmd.save(ligand_pdb)
    cmd.delete("all")

def combine_complex(ligand_pdb, receptor_pdb,output_pdb):
    cmd.delete("all")
    cmd.load(ligand_pdb)
    cmd.load(receptor_pdb)
    cmd.save(output_pdb)
    cmd.delete("all")

def prep_gnina_fix(work_id,minimized_pdb,work_dir):
    cmd.delete("all")
    fixed_complex_pdb = work_dir+"/fixed_complex_%s.pdb"%work_id
    fixer = PDBFixer(filename=minimized_pdb)

    # Step 2: Identify and fix missing residues
    fixer.findMissingResidues()
    # By default, PDBFixer may add missing terminal residues. Remove unwanted ones:
    chains = list(fixer.topology.chains())
    keys = list(fixer.missingResidues.keys())
    for key in keys:
        chain_id, res_id = key
        if res_id == 0 or res_id == len(list(chains[chain_id].residues())):
            del fixer.missingResidues[key]

    # Step 3: Identify and add missing atoms
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    # Step 4: Add missing hydrogens (adjust pH if needed)
    fixer.addMissingHydrogens(pH=7.4)

    # Step 5: Write the fixed structure to a new PDB file
    with open(fixed_complex_pdb, 'w') as output_file:
        PDBFile.writeFile(fixer.topology, fixer.positions, output_file)
        
    print("Fixed PDB file saved as 'fixed_structure.pdb'")
    min_ligand = work_dir + "/ligand_%s.pdb"%work_id
    min_receptor = work_dir + "/receptor_%s.pdb"%work_id
    min_ligand_sdf = work_dir + "/ligand_%s.sdf"%work_id
    min_complex_docking = work_dir + "/complex_docking%s.pdb"%work_id
    split_non_standard_residues(fixed_complex_pdb, min_receptor,min_ligand, min_ligand_sdf)
    cmd.delete("all")
    cmd.load(min_ligand)
    cmd.save(min_ligand)
    cmd.save(min_ligand_sdf)
    cmd.delete("all")
    combine_complex(min_ligand_sdf,min_receptor,min_complex_docking)
    return min_ligand_sdf, min_receptor



In [None]:
#extract results from GNINA docking
def extract_affinity_and_cnn_affinity(gnina_output_path):
    affinities = []
    cnn_affinities = []

    with open(gnina_output_path, 'r') as file:
        lines = file.readlines()
        parsing = False  # A flag to indicate when we reach the scores section

        for line in lines:
            # Check for the header line to start parsing
            if line.strip().startswith("mode |"):
                parsing = True
                continue
            if parsing and line.strip() == "":  # Stop parsing at the end of the table
                break
            if parsing:
                try:
                    # Extract the columns of interest
                    values = line.split()  # Split by whitespace
                    affinity = float(values[1])  # Extract affinity
                    cnn_affinity = float(values[-1])  # Extract CNN affinity (last column)
                    affinities.append(affinity)
                    cnn_affinities.append(cnn_affinity)
                except (IndexError, ValueError):
                    continue  # Handle malformed lines gracefully

    return affinities, cnn_affinities

In [None]:
directory = "/path/to/your/cif/files"  # Replace with your directory containing CIF files
work_dir = "/path/to/your/work_dir"  # Replace with your desired working directory
os.makedirs(work_dir, exist_ok=True)
# Loop through all files in the directory
for file_name in os.listdir(directory):
    # Check if the file ends with `_model.cif`
    if file_name.endswith(".cif"):
        file_path = os.path.join(directory, file_name)
        pdb_path = file_path.replace(".cif", ".pdb")
        convert_cif_to_pdb(file_path, pdb_path)
        job_id = file_name.replace(".cif", "")
        ligand_sdf, receptor = prep_gnina_fix(job_id, pdb_path, work_dir)
        gnina_command = """
        docker run --gpus device=0 -v %s:/scr 
        gnina/gnina gnina -r /scr/receptor_%s.pdb -l /scr/ligand_%s.sdf --autobox_ligand /scr/ligand_%s.sdf
        --cnn_scoring refinement -o /scr/ligand_docked%s.sdf --log /scr/result_%s.log"""%(work_dir, job_id, job_id, job_id, job_id, job_id)
        gnina_command = gnina_command.replace("\n","")
        #print(gnina_command)
        os.system(gnina_command)

In [None]:
directory = "/path/to/your/work_dir"  # Replace with your directory containing CIF files
aff_list = []
for file_name in os.listdir(directory):
    # Check if the file ends with `_model.cif`
    if file_name.endswith(".log"):
        file_path = os.path.join(directory, file_name)
        job_id = file_name.replace("result_", "").replace("_model.log", "").upper()
        affinties, cnn_affinities = extract_affinity_and_cnn_affinity(file_path)
        aff_list.append([job_id,affinties[0],cnn_affinities[0]])
aff_df = pd.DataFrame(aff_list,columns = ["Job ID", "predicted affinity", "predicted CNN affinity"])
aff_df