# FEgrow: An Open-Source Molecular Builder and Free Energy Preparation Workflow

**Authors: Mateusz K Bieniek, Ben Cree, Rachael Pirie, Joshua T. Horton, Natalie J. Tatum, Daniel J. Cole**

## Overview

Building and scoring molecules can be further streamlined by employing our established protocol. Here we show how to quickly build a library and score the entire library. 

In [None]:
import pandas as pd
import prody
from rdkit import Chem

import fegrow
from fegrow import ChemSpace

from fegrow.testing import core_5R83_path, rec_5R83_path, data_5R83_path

# Prepare the ligand template

In [None]:
scaffold = Chem.SDMolSupplier('sarscov2/mini.sdf')[0]


In [None]:
toview = fegrow.RMol(scaffold)
toview.rep2D(idx=True, size=(500, 500))

In [None]:
with open('sarscov2/SARS-smiles.txt') as f:
    mols = f.read().splitlines()

In [None]:
Chem.MolFromSmiles(mols[0])

In [None]:
pattern = scaffold

for i in range(len(mols)):
    mol = Chem.MolFromSmiles(mols[i])
    if mol.HasSubstructMatch(pattern) == False:
        print(i, mols[i])

As we are using already prepared Smiles that have the scaffold as a substructure, it is not needed to set any growing vector. 

In [None]:
from dask.distributed import LocalCluster
lc = LocalCluster(processes=True, n_workers=None, threads_per_worker=1)

In [None]:
# create the chemical space
cs = ChemSpace(dask_cluster=lc)
cs

In [None]:
#cs._dask_cluster

In [None]:
# we're not growing the scaffold, we're superimposing bigger molecules on it
cs.add_scaffold(scaffold)

In [None]:
# get the protein-ligand complex structure
!wget -nc https://files.rcsb.org/download/7L10.pdb

# load the complex with the ligand
sys = prody.parsePDB('sarscov2/7l10.pdb')

# remove any unwanted molecules
rec = sys.select('not (nucleic or hetatm or water)')

# save the processed protein
prody.writePDB('rec.pdb', rec)

# fix the receptor file (missing residues, protonation, etc)
fegrow.fix_receptor("rec.pdb", "rec_final.pdb")

# load back into prody
#rec_final = prody.parsePDB("rec_final.pdb")
#rec_final = prody.parsePDB("out.pdb")

# fix the receptor file (missing residues, protonation, etc)
##fegrow.fix_receptor("7t79-H-prep.pdb", "rec_final.pdb")

# load back into prody
##rec_final = prody.parsePDB("rec_final.pdb")

#!grep "ATOM" ../structures/7t79-H.pdb > rec_final.pdb
#cs.add_protein(rec_5R83_path)

cs.add_protein('rec_final.pdb')

In [None]:
smiles = mols[0:]

In [None]:
print(smiles)

In [None]:
# load 50k Smiles
#smiles = pd.read_csv('csv/arthor-hits-2024Mar26-0918.csv',
#                     names=["Smiles", "??", "db"],
#                     index_col=0).Smiles

#smiles = pd.read_csv('smiles.csv').Smiles.to_list()


# take all 20000
#smiles = smiles.apply(lambda r: r.split()[0])
smiles = mols[0:]

# here we add Smiles which should already have been matched
# to the scaffold (rdkit Mol.HasSubstructureMatch)
#cs.add_smiles(smiles.to_list(), protonate=True)
cs.add_smiles(smiles, protonate=True)
cs

In [None]:
cs.evaluate(num_conf=500, gnina_gpu=False, penalty=0.0, al_ignore_penalty=False)

In [None]:
cs.df

In [None]:
cs.to_sdf("cs_optimised_molecules.sdf")

In [None]:
for i in range (len(cs)):
    try:
        cs[i].to_file("best_conformers_{0}.pdb".format(i))
    except AttributeError:
        print("No conformer for molecule", i)

In [None]:
cs.df.to_csv('SARS-out.csv', index=True)

In [None]:
pattern = scaffold
mol = Chem.MolFromSmiles(smiles[0])
print(mol.HasSubstructMatch(pattern))
mol

In [None]:
cs.df.loc[cs.df['Success'] == True]

In [None]:
# save the chemical space of built molecules:

failed=False
unbuilt=False

with Chem.SDWriter('notebook_chemspace.sdf') as SD:
    columns = cs.df.columns.to_list()
    columns.remove("Mol")

    for i, row in cs.df.iterrows():

    # ignore this molecule because it failed during the build
        if failed is False and row.Success is False:
            continue

    # ignore this molecule because it was not built yet
        if unbuilt is False and row.Success != True:
            continue

        mol = row.Mol
        mol.SetIntProp("index", i)
        for column in columns:
            value = getattr(row, column)
            mol.SetProp(column, str(value))

        mol.ClearProp("attachement_point")
        SD.write(mol)

In [None]:
# save the structures of the top 10 molecules in ranked order as a sdf file:
molecules = []
input_sdf = 'notebook_chemspace.sdf'
best_n = 100

with Chem.SDMolSupplier(input_sdf) as SDF:
    # for each mol
    for mol in SDF:
        if mol is None:
            continue
        if mol.GetPropsAsDict()['Success'] == 'True':
            molecules.append(mol)

# sort by the key
sorted_molecules = sorted(molecules, key=lambda m: m.GetPropsAsDict()['score'], reverse=True)

with Chem.SDWriter(f"top_{best_n:d}_{input_sdf}") as SDF_OUT:
    for i, mol in enumerate(sorted_molecules):
        if i == best_n:
            break

        SDF_OUT.write(mol)

print('Done')