This notebook is used to prepare complexes for molecular docking. xTB is used to calculate charges to generate the PDBQT files from XYZ files.

# Prepare metallic complexes for docking

## Installation

In [None]:
try:
    import google.colab

    !pip install condacolab
    import condacolab

    condacolab.install()
except ModuleNotFoundError:
    pass

In [None]:
try:
    import condacolab
    from google.colab import files
    from IPython.display import clear_output

    condacolab.check()
    !conda install -q -y -c conda-forge xtb rdkit py3Dmol openbabel
except ModuleNotFoundError:
    on_colab = False
else:
    # check if installation was succesful
    try:
        import rdkit

        on_colab = True
        clear_output()  # clear the excessive installation outputs
        print("Dependencies successfully installed!")
    except ModuleNotFoundError:
        print("Error while installing dependencies!")

## Import libraries

In [None]:
import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')

from rdkit import Chem
from rdkit.Chem import AllChem
from collections import defaultdict
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.ipython_3d = True

import py3Dmol
import matplotlib.pyplot as plt

from google.colab import files

## Load functions

In [None]:
def write_xyz(mol, file_name='temp.xyz'):
  number_of_atoms = mol.GetNumAtoms()
  symbols = [a.GetSymbol() for a in mol.GetAtoms()]
  with open(file_name, "w") as file:
    file.write(str(number_of_atoms)+"\n")
    file.write("title\n")
    conf = mol.GetConformers()[0]
    for atom,symbol in enumerate(symbols):
      p = conf.GetAtomPosition(atom)
      line = " ".join((symbol,str(p.x),str(p.y),str(p.z),"\n"))
      file.write(line)

def show_mol(file_name, animate=False):
  xyz=open(file_name, 'r').read()
  p = py3Dmol.view(width=400,height=400)
  if animate:
    p.addModelsAsFrames(xyz,'xyz')
    p.animate({'loop': "forward",'reps': 5})
    #p.animate({'loop': 'backAndForth'})
  else:
    p.addModel(xyz,'xyz')
  p.setStyle({'stick':{}})
  p.setBackgroundColor('0xeeeeee')
  p.zoomTo()
  p.show()


# See https://github.com/rdkit/rdkit/discussions/5280
def is_transition_metal(at):
    n = at.GetAtomicNum()
    return (n>=22 and n<=29) or (n>=40 and n<=47) or (n>=72 and n<=79)
def set_dative_bonds(mol, fromAtoms=(7,8)):
    """ convert some bonds to dative

    Replaces some single bonds between metals and atoms with atomic numbers in fomAtoms
    with dative bonds. The replacement is only done if the atom has "too many" bonds.

    Returns the modified molecule.

    """
    pt = Chem.GetPeriodicTable()
    rwmol = Chem.RWMol(mol)
    rwmol.UpdatePropertyCache(strict=False)
    metals = [at for at in rwmol.GetAtoms() if is_transition_metal(at)]
    for metal in metals:
        for nbr in metal.GetNeighbors():
            if nbr.GetAtomicNum() in fromAtoms and \
               nbr.GetExplicitValence()>pt.GetDefaultValence(nbr.GetAtomicNum()) and \
               rwmol.GetBondBetweenAtoms(nbr.GetIdx(),metal.GetIdx()).GetBondType() == Chem.BondType.SINGLE:
                rwmol.RemoveBond(nbr.GetIdx(),metal.GetIdx())
                rwmol.AddBond(nbr.GetIdx(),metal.GetIdx(),Chem.BondType.DATIVE)
    return rwmol

def add_charges_to_mol2(mol2_file, charges_file, output_mol2_file):
    """
    Reads a MOL2 file, adds charges from a separate file to the @<TRIPOS>ATOM section,
    and ensures the alignment of columns is preserved with fixed column width.
    """

    # Read the charges from the charges file
    with open(charges_file, 'r') as f:
        charges = [line.strip() for line in f.readlines()]

    # Read the original MOL2 file
    with open(mol2_file, 'r') as f:
        mol2_lines = f.readlines()

    atom_section = False  # Track if we are in the @<TRIPOS>ATOM section
    charge_idx = 0  # Track the current charge

    # Write to the output file
    with open(output_mol2_file, 'w') as f:
        for line in mol2_lines:
            if line.startswith("@<TRIPOS>ATOM"):
                atom_section = True
                f.write(line)  # Write the @<TRIPOS>ATOM header
                continue
            elif line.startswith("@<TRIPOS>BOND"):
                atom_section = False
                f.write(line)  # Write the @<TRIPOS>BOND header
                continue

            # If we are in the atom section, format the lines with proper alignment
            if atom_section and len(line.split()) >= 9:
                parts = line.split()
                if charge_idx < len(charges):
                    charge = float(charges[charge_idx])
                    charge_idx += 1
                    # Enforce fixed width for each column: ID, atom name, x, y, z, atom type, residue, etc.
                    f.write(f"{int(parts[0]):>7} {parts[1]:<7} {float(parts[2]):>10.4f} "
                            f"{float(parts[3]):>10.4f} {float(parts[4]):>10.4f} {parts[5]:<7} "
                            f"{parts[6]:>2} {parts[7]:<6} {charge:>10.4f}\n")
            else:
                f.write(line)  # Write non-atom lines as they are



## Get charges with xTB

In [None]:
# --opt: optimize geometry
# --ohess: compute frequencies
# uhf: to set unpaired electrons (multiplicity)
# use 0 for singlet
# use 1 for doblet
# use 2 for triplet

# Example: !xtb file.xyz --chrg 0 --uhf 1

In [None]:
show_mol('file.xyz')

## Get mol2 and pdbqt files for docking

In [None]:
# Replace 'file' with the corresponding file name.

# Get mol2 file
!obabel file.xyz -O file.mol2
# include charges into mol2 file
add_charges_to_mol2('file.mol2', 'charges', 'file.mol2')
# Get pdbqt file
!obabel file.mol2 -O file.pdbqt
# Download files
files.download('file.pdbqt')

# Preparare fibrils for docking

- Compute Euler angles to rotate the matrices, given a successive rotation matrix.
- Renumber the residues and change all chains to 'A'.

In [None]:
import numpy as np
def R_x(theta):
  theta = np.radians(theta)
  return np.array([
        [1, 0, 0],
        [0, np.cos(theta), -np.sin(theta)],
        [0, np.sin(theta),  np.cos(theta)]
    ])

def R_y(theta):
  theta = np.radians(theta)
  return np.array([
        [ np.cos(theta), 0, np.sin(theta)],
        [0, 1, 0],
        [-np.sin(theta), 0, np.cos(theta)]
    ])

def R_z(theta):
  theta = np.radians(theta)
  return np.array([
        [np.cos(theta), -np.sin(theta), 0],
        [np.sin(theta),  np.cos(theta), 0],
        [0, 0, 1]
    ])

In [None]:
# Matriz de rotaciones sucesivas

#R = R_y(-3)  @ R_x(3) @  R_z(15) @ R_y(70) @ R_z(20)                        # 2MXU Fibril
#R = R_x(-5) @ R_y(-7) @ R_y(10) @ R_x(180) @ R_z(115) @ R_x(11) @ R_y(-30)  # 2MVX Fibril
#R = R_x(3) @ R_z(-95) @ R_y(180)                                            # 5OQV Fibril
print(R)

In [None]:
# Euler angles
beta = np.degrees(np.arcsin(-R[2,0]))
alpha = np.degrees(np.arctan2(R[2,1], R[2,2]))
gamma = np.degrees(np.arctan2(R[1,0], R[0,0]))
print('α:', alpha, 'β:', beta, 'γ:', gamma)

R1 = R_z(gamma) @ R_y(beta) @ R_x(alpha)
print(R1) # R1 should be the same as R

In [None]:
def renumerar_pdb_cambiar_cadena(input_pdb, output_pdb):
    with open(input_pdb, 'r') as f_in, open(output_pdb, 'w') as f_out:
        residuo_actual = None
        nuevo_numero = 1
        for linea in f_in:
            if linea.startswith(('ATOM', 'HETATM')):
                # Número de residuo original (col 23-26)
                num_residuo = int(linea[22:26])
                if num_residuo != residuo_actual:
                    residuo_actual = num_residuo
                    nuevo_numero_actual = nuevo_numero
                    nuevo_numero += 1

                # Forzamos cadena a 'A' (posición 22)
                nueva_linea = (
                    linea[:21] + 'A' +            # Cadena A
                    linea[22:26] +                # Número de residuo original
                    linea[26:]                    # Lo que sigue
                )

                # Reemplazamos número de residuo
                nueva_linea = nueva_linea[:22] + f"{nuevo_numero_actual:>4}" + nueva_linea[26:]
                f_out.write(nueva_linea)
            else:
                f_out.write(linea)

In [None]:
renumerar_pdb_cambiar_cadena('2MXU.pdb', '2MXU_1.pdb')
renumerar_pdb_cambiar_cadena('2MVX.pdb', '2MVX_1.pdb')
renumerar_pdb_cambiar_cadena('5OQV.pdb', '5OQV_1.pdb')