In [3]:
from rdkit import Chem

def IonizedMols(smi):
    '''From a given SMILES, return all possible protonated/deprotonated molecules, considering N, O and S atoms'''
    # Setting molecule
    mol = Chem.MolFromSmiles(smi)
    # Building lists of all N, O and S atoms
    atoms_idx = [] # Atoms indexes
    nfc = [] # New formal charge
    nehs = [] # New number of explicit hydrogens
    for atom in mol.GetAtoms():
        atn = atom.GetAtomicNum()
        if (atn == 7 or atn == 8 or atn == 16): # If N, O or S
            skip = False
            nhs = atom.GetTotalNumHs() # Number of hydrogen bonded to the atom
            # If N with no protons, if N with two protons,
            # if O with no protons, or if S with no protons: protonate
            if ((atn == 7 and nhs == 0) or (atn == 7 and nhs == 2) or (atn == 8 and nhs == 0) or (atn == 16 and nhs == 0)):
                nfc.append(atom.GetFormalCharge()+1)
                nehs.append(atom.GetTotalNumHs()+1)
            # If N with one proton, if N with three protons,
            # if O with one proton, if S with one proton: deprotonate
            elif ((atn == 7 and nhs == 1) or (atn == 7 and nhs == 3) or (atn == 8 and nhs == 1) or (atn == 16 and nhs == 1)):
                nfc.append(atom.GetFormalCharge()-1)
                nehs.append(max(0,atom.GetNumExplicitHs()-1))
            # If none of the above conditions are true, skip this atom
            else:
                skip = True
            if (not skip): # If not skipping
                atoms_idx.append(atom.GetIdx())
    # Combining all possible protonations/deprotonations
    ntats=len(atoms_idx) # Number of titratable atoms
    vec = [] # This is a reference array that will aid in preparing the new SMILES string
    [vec.append(0) for cnt in range(ntats)] # Initializing the array
    nmols = [] # New molecules
    while (sum(vec)<ntats): # With this loop we generate all possible protonations/deprotonations
        pos = 0
        if (vec[pos]==0):
            vec[pos]=1
        else:
            vec[pos]=0
            while (vec[pos+1]==1):
                vec[pos+1]=0
                pos+=1
            vec[pos+1]=1
        # Generating the new SMILES string
        nmol = Chem.MolFromSmiles(smi)
        for i in range(ntats):
            if (vec[i]==1):
                nmol.GetAtomWithIdx(atoms_idx[i]).SetFormalCharge(nfc[i])
                nmol.GetAtomWithIdx(atoms_idx[i]).SetNumExplicitHs(nehs[i])
        Chem.SanitizeMol(nmol)
        if (Chem.MolToSmiles(mol) != Chem.MolToSmiles(nmol)):
            # Printing new SMILES string
            nmols.append((Chem.MolToSmiles(nmol)))
    return nmols

In [None]:
files1 = ["gdb11_size%02d.smi"%(i) for i in range(1,12)]
files2 = ["gdb11SFCl_size%02d.smi"%(i) for i in range(1,12)]
for i in range(len(files1)):
    print('Working on: '+files1[i]+' and '+files2[i])
    nf=open("gdb11charged_size%02d.smi"%(i+1), 'w')
    cnt=1
    files=[files1[i],files2[i]]
    for fl in files:
        f=open(fl, "r")
        for line in f:
            if not line.strip():
                break
            smi = line.split()[0]
            data = IonizedMols(smi)
            if (data):
                for d in data:
                    nf.write(d+'\t'+str(cnt)+'\t1\n')
                    cnt+=1
        f.close()
    nf.close()

Working on: gdb11_size01.smi and gdb11SFCl_size01.smi
Working on: gdb11_size02.smi and gdb11SFCl_size02.smi
Working on: gdb11_size03.smi and gdb11SFCl_size03.smi
Working on: gdb11_size04.smi and gdb11SFCl_size04.smi
Working on: gdb11_size05.smi and gdb11SFCl_size05.smi
Working on: gdb11_size06.smi and gdb11SFCl_size06.smi
Working on: gdb11_size07.smi and gdb11SFCl_size07.smi
Working on: gdb11_size08.smi and gdb11SFCl_size08.smi
Working on: gdb11_size09.smi and gdb11SFCl_size09.smi
Working on: gdb11_size10.smi and gdb11SFCl_size10.smi
Working on: gdb11_size11.smi and gdb11SFCl_size11.smi
