In [1]:
import os, glob, sys, math
import concurrent.futures
import argparse

import shutil

#from meetdock import *
# from scipy.spatial.transform import Rotation as R
from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd
from Bio.PDB import *
from Bio.PDB.ResidueDepth import get_surface
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.ResidueDepth import min_dist
from pyquaternion import Quaternion

from utils import pdbtools
from utils import pdb_resdepth
from utils import matrice_distances
from utils import Lennard_Jones
from utils import electrostatic
from utils import combine_methods as cm
from utils import tm_score as tm

# from surface import *
p = PDBParser()

recognized_residues = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET',
                           'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'NH', 'OC']
atom_types = [['N'], ['CA'], ['C'], ['O'], ['GLYCA'],
                  ['ALACB', 'ARGCB', 'ASNCB', 'ASPCB', 'CYSCB', 'GLNCB', 'GLUCB', 'HISCB', 'ILECB', 'LEUCB', 'LYSCB',
                   'METCB', 'PHECB', 'PROCB', 'PROCG', 'PROCD', 'THRCB', 'TRPCB', 'TYRCB', 'VALCB'],
                  ['LYSCE', 'LYSNZ'], ['LYSCD'], ['ASPCG', 'ASPOD1', 'ASPOD2', 'GLUCD', 'GLUOE1', 'GLUOE2'],
                  ['ARGCZ', 'ARGNH1', 'ARGNH2'],
                  ['ASNCG', 'ASNOD1', 'ASNND2', 'GLNCD', 'GLNOE1', 'GLNNE2'], ['ARGCD', 'ARGNE'],
                  ['SERCB', 'SEROG', 'THROG1', 'TYROH'],
                  ['HISCG', 'HISND1', 'HISCD2', 'HISCE1', 'HISNE2', 'TRPNE1'], ['TYRCE1', 'TYRCE2', 'TYRCZ'],
                  ['ARGCG', 'GLNCG', 'GLUCG', 'ILECG1', 'LEUCG', 'LYSCG', 'METCG', 'METSD', 'PHECG', 'PHECD1', 'PHECD2',
                   'PHECE1', 'PHECE2', 'PHECZ', 'THRCG2', 'TRPCG', 'TRPCD1', 'TRPCD2', 'TRPCE2', 'TRPCE3', 'TRPCZ2',
                   'TRPCZ3', 'TRPCH2', 'TYRCG', 'TYRCD1', 'TYRCD2'],
                  ['ILECG2', 'ILECD1', 'ILECD', 'LEUCD1', 'LEUCD2', 'METCE', 'VALCG1', 'VALCG2'], ['CYSSG']]

rng = np.random.default_rng(0)

In [134]:
def generate_one_frog(init):
    Quater = [0, 0, 0, 0]
    recRandIdx = rng.integers(0, rec_coord.shape[0] - 1)
    ligRandIdx = rng.integers(0, lig_coord.shape[0] - 1)
    axis = rec_coord[recRandIdx]
    a = rec_normal[recRandIdx]
    b = lig_normal[ligRandIdx]
    
    dotProduct = np.dot(a, b)
    theta = np.arccos(dotProduct) * 2 - np.pi
    
    Quater = Quaternion(axis=a, angle=theta)
    
    final = np.array([Quater.rotate(i) for i in lig_atom])
    args = [[final, init, Quater, -1]]
    return args

In [135]:
def generate_init_population():
    global init
    with concurrent.futures.ProcessPoolExecutor() as executor:
        Doargs = []
        for _ in range(frogs):
            Doargs += generate_one_frog(init)
            init += 1
        results = executor.map(find_score, Doargs)
        for r in results:
            if r:
                StructInfo[r[1]] = [r[0], r[2]]    

In [None]:
class SFLA:
    
    def __init__(self, frogs, mplx_no, n_iter, N, q, ):
        self.frogs = frogs
        self.mplx_no = mplx_no
        self.structinfo = {}
        self.init = 0
        self.mypath ='poses/'
        self.n_iter = n_iter
        self.N = N
        self.q = q
    
    def chaindef(self, file, rec_chain):
        structure=p.get_structure('1bth',file)
        coordinatesr = np.empty((0,3))
        tobi_residuesr = []
        residue_id=[]
        boundary_residue_coord=np.empty((0,3))
        atom_coord=np.empty((0,3))
        boundary_residue_id=[]
        boundary_residue_name=[]
        for model in structure:
            surface = get_surface(model)
            for chain in model:
                if chain.id in rec_chain:
                    for residue in chain:
                        cx = 0.0
                        cy = 0.0
                        cz = 0.0
                        count = 0
                        residue_index=recognized_residues.index(residue.get_resname())
                        atom_set=np.empty((0,3))
                        for atom in residue:
                            if  not atom.name=='H':
                                ax=atom.get_coord()[0]
                                ay=atom.get_coord()[1]
                                az=atom.get_coord()[2]
                                atom_set=np.append(atom_set,[atom.get_coord()], axis=0)
                                atom_coord=np.append(atom_coord,[atom.get_coord()], axis=0)
                                cur_atom=residue.get_resname()+atom.name
                                for typ in atom_types:
                                    if  cur_atom in typ or atom.name in ['N','CA','C','O']:	#typ:#atom.name now added
                                        cx += ax
                                        cy += ay
                                        cz += az
                                        count += 1
                                    else:
                                        pass
                        cx/= float(count)
                        cy/= float(count)
                        cz/= float(count)
                        coordinatesr=np.append(coordinatesr,[[cx, cy, cz]], axis=0)
                        #rcc+=1
                        tobi_residuesr.append(residue_index)
                        residue_id.append(str(residue.get_id()[1])+residue.get_id()[2])
                        fji=0     #check whether any of of the atoms in the resdue are at a distance 3 A from surface
                        for ji in range(len(atom_set)):
                            if min_dist(atom_set[ji], surface) < 2:
                                fji=1
                                break
                        if fji==1:
                            boundary_residue_coord=np.append(boundary_residue_coord,[[cx, cy, cz]],axis=0)
                            #boundary_atom_name.append(atom.name)
                            boundary_residue_id.append(str(residue.get_id()[1])+residue.get_id()[2])
                            boundary_residue_name.append(residue.get_resname())
        
        return boundary_residue_coord,boundary_residue_name, boundary_residue_id, atom_coord
    
    def findPointNormals(self, points, numNeighbours, viewPoint, residue_id, residue_name, f):
        nbrs = NearestNeighbors(n_neighbors=numNeighbours+1, algorithm='kd_tree').fit(points)
        distances, indices = nbrs.kneighbors(points)
        n = []
        [n.append(indices[i][1:].tolist()) for i in range(0,len(indices))]


        # find difference in position from neighbouring points
        n=np.asarray(n).flatten('F')    
        p = np.tile(points,(numNeighbours,1)) - points[n]
        x=np.zeros((3,len(points),numNeighbours))
        for i in range(0,3):
            for j in range(0,len(points)):
                for k in range(0,numNeighbours):
                    x[i,j,k]=p[k*len(points)+j,i]
        p = x
        C = np.zeros((len(points),6))
        C[:,0]= np.sum(np.multiply(p[0],p[0]),axis=1)
        C[:,1]= np.sum(np.multiply(p[0],p[1]),axis=1)
        C[:,2]= np.sum(np.multiply(p[0],p[2]),axis=1)
        C[:,3]= np.sum(np.multiply(p[1],p[1]),axis=1)
        C[:,4]= np.sum(np.multiply(p[1],p[2]),axis=1)
        C[:,5]= np.sum(np.multiply(p[2],p[2]),axis=1)
        C = np.divide(C, numNeighbours)
        normals = np.zeros((len(points),3))
        curvature = np.zeros((len(points),1))
        for i in range(0,len(points)):
            Cmat = [[C[i,0], C[i,1] ,C[i,2]], [C[i,1], C[i,3], C[i,4]], [C[i,2], C[i,4], C[i,5]]]
            [value,vector] = np.linalg.eigh(Cmat)
            [lam,k] = min(value), value.tolist().index(min(value))
            normals[i,:] = vector[:,k] #np.transpose(vector[:,k])
            curvature[i]= lam / sum(value)

        return normals, curvature

    def find_score(self, args):
        output_file='out' + str(args[1]) + '.pdb'
        shape, electro, jones, proba = True, True, True, False
        pH = 7
        dist = 8.6
        with open(os.path.join(self.mypath, output_file),'w') as out:
            in1 = open(inp2, "r")
            in2 = open(inp1, "r")
            for line in in1:
                if "ATOM" in line:
                    out.write(line)
            indexing = 0
            new_co = args[0]
            for line in in2:
                if "ATOM" in line:
                    # print(line)
                    l = line.split()
                    l[0] = l[0].ljust(5)
                    l[1] = l[1].rjust(5)
                    l[2] = l[2].ljust(3)
                    l[3] = l[3].ljust(3)
                    l[4] = line[21]
                    l[5] = ("%4d" % (int(line[22:26]))).rjust(4)
                    l[6] = ("%8.3f" % (float(new_co[indexing][0]))).rjust(8)
                    l[7] = ("%8.3f" % (float(new_co[indexing][1]))).rjust(8)
                    l[8] = ("%8.3f" % (float(new_co[indexing][2]))).rjust(8)
                    out.write(
                        "{0} {1}  {2} {3} {4}{5}    {6}{7}{8}".format(
                            l[0], l[1], l[2], l[3], l[4], l[5], l[6], l[7], l[8]
                        )
                    )
                    out.write("\n")
                    indexing += 1

        pdbfile = os.path.join(self.mypath, output_file)
        res = cm.combine_score(pdbfile, recepChain=self.rec_chain, ligChain=self.lig_chain, statpotrun=proba, vdwrun=jones, electrorun=electro, shaperun=shape, pH=pH, depth=depth, dist=dist)
        mydf = pd.DataFrame(res,  index=[0])
        mydf = mydf.set_index('pdb')
        score = tm.tm_score(mydf, execdir='.')
        return float(score['tm_score_prediction']), args[1], args[2], args[3]

    def pdbpre(self, file1):
        with open(os.path.join(args.pdb, file1), "r") as pdb_in: 
            with open(file1 + "1.pdb", "w") as out: 
                atmno = 1
                resno = 0
                res = ""
                fr = ""
                l = [""] * 11
                for line in pdb_in:
                    if "ATOM" in line[0:4]:
                        li = line.split()
                        l[0] = li[0].ljust(6)
                        l[1] = str(atmno).rjust(4)
                        l[2] = li[2].ljust(3)
                        l[3] = li[3].ljust(3)
                        l[4] = line[21]
                        if fr != line[21]:
                            atmno = 1
                            resno = 0
                            res = ""
                            fr = line[21]
                        if line[22:26] == res:
                            l[5] = ("%4d" % (int(resno))).rjust(4)
                        else:
                            resno += 1
                            res = line[22:26]
                            l[5] = ("%4d" % (int(resno))).rjust(4)
                        # if len(l[6])>10:
                        l[6] = ("%8.3f" % (float(line[29:38]))).rjust(8)
                        l[7] = ("%8.3f" % (float(line[38:46]))).rjust(8)
                        l[8] = ("%8.3f" % (float(line[46:54]))).rjust(8)
                        l[9] = ("%6.2f" % (float(line[55:60]))).rjust(6)
                        l[10] = ("%6.2f" % (float(line[60:66]))).ljust(6)
                        out.write(
                            "{0} {1}  {2} {3} {4}{5}    {6}{7}{8}{9}{10}".format(
                                l[0], l[1], l[2], l[3], l[4], l[5], l[6], l[7], l[8], l[9], l[10]
                            )
                        )
                        out.write("\n")
                        atmno += 1
        return file1 + "1.pdb"

    def generate_one_frog(self, uid):
        Quater = [0, 0, 0, 0]
        recRandIdx = rng.integers(0, self.rec_coord.shape[0] - 1)
        ligRandIdx = rng.integers(0, self.lig_coord.shape[0] - 1)
        axis = self.rec_coord[recRandIdx]
        a = self.rec_normal[recRandIdx]
        b = self.lig_normal[ligRandIdx]

        dotProduct = np.dot(a, b)
        theta = np.arccos(dotProduct) * 2 - np.pi

        Quater = Quaternion(axis=a, angle=theta)

        final = np.array([Quater.rotate(i) for i in self.lig_atom])
        args = [[final, uid, Quater, -1]]
        return args

    def generate_init_population(self):
        with concurrent.futures.ProcessPoolExecutor() as executor:
            Doargs = []
            for _ in range(self.frogs):
                Doargs += self.generate_one_frog(self.init)
                self.init += 1
            
            results = executor.map(self.find_score, Doargs)
            for r in results:
                if r:
                    self.structinfo[r[1]] = [r[0], r[2]]   

    def sort_frog(self):
        sorted_fitness = np.array(sorted(self.structinfo, key = lambda x: self.structinfo[x][0], reverse=True))

        memeplexes = np.zeros((self.mplx_no, int(self.frogs/self.mplx_no)))

        for j in range(memeplexes.shape[1]):
            for i in range(mplx_no):
                memeplexes[i, j] = sorted_fitness[i + (mplx_no*j)] 
        return memeplexes
    
    def shuffle_memeplexes(self):
        """Shuffles the memeplexes and sorting them.
        """
        temp = self.memeplexes.flatten()
        temp = np.array(sorted(temp, key = lambda x: self.structinfo[x][0], reverse=True))
        for j in range(self.memeplexes.shape[1]):
            for i in range(self.mplx_no):
                self.memeplexes[i, j] = temp[i + (self.mplx_no * j)]
                
    def local_search_one_memeplex(self, im):
        """
            q: The number of frogs in submemeplex
            N: No of mutations
        """

        for iN in range(self.N):
            uId = self.init + im + 1
            rValue = rng.random(self.FrogsEach) * self.weights # random value with probability weights
            subindex = np.sort(np.argsort(rValue)[::-1][0:self.q]) # index of selected frogs in memeplex
            submemeplex = self.memeplexes[im][subindex] 

            #--- Improve the worst frog's position ---#
            # Learn from local best Pb #
            Pb = self.structinfo[int(submemeplex[0])] # mark the best frog in submemeplex
            Pw = self.structinfo[int(submemeplex[self.q - 1])] # mark the worst frog in memeplex

            S = rng.random() * (Pb[1] - Pw[1]) 
            Uq = Pw[1] + S

            globStep = False
            censorship = False
            # Check feasible space and the performance #
            if self.omega[0] <= min(Uq) and max(Uq) <= self.omega[1]: # check feasible space
                final = np.array([Uq.rotate(i) for i in self.lig_atom])  
                results = self.find_score([final, uId, Uq, im])

                if results[0] < Pw[0]:
                    globStep = True

            if globStep:
                S = rng.random() * (self.Frog_gb[1] - Pw[1])
                for i in range(4):
                    if S[i] > 0:
                        S[i] = min(S[i], max_step)
                    else:
                        S[i] = max(S[i], -max_step)
                Uq = Pw[1] + S

                if self.omega[0] <= min(Uq) and max(Uq) <= self.omega[1]: # check feasible space
                    final = np.array([Uq.rotate(i) for i in self.lig_atom])  
                    results = self.find_score([final, uId, Uq, im])
                    if results[0] < Pw[0]:
                        censorship = True
                else:
                    censorship = True

            if censorship:
                params = self.generate_one_frog(uId)
                results = self.find_score(params)            


            #StructInfo[im] = [results[0], results[2]]
            shutil.move(os.path.join('poses/', 'out'+str(uId)+'.pdb'), os.path.join('poses/', 'out'+ str(submemeplex[q-1]) + '.pdb'))
            self.structinfo[int(submemeplex[q-1])] = [results[0], results[2]]
            self.memeplexes[im] = self.memeplexes[im][np.argsort(memeplexes[im])]
            
    def local_search(self):
        self.Frog_gb = self.structinfo[int(self.memeplexes[0][0])]
        self.FrogsEach = int(self.frogs/len(self.memeplexes))
        self.weights = [2*(self.FrogsEach+1-j)/(self.FrogsEach*(self.FrogsEach+1)) for j in range(1, self.FrogsEach+1)] 
        self.omega = [np.amin(rec_normal), np.amax(rec_normal)]
        self.max_step = (self.omega[1] - self.omega[0])/2 # maximum step sizess

        with concurrent.futures.ProcessPoolExecutor() as executor:
            doargs = [[im] for im in range(len(self.memeplexes))]
            results = executor.map(self.local_search_one_memeplex, doargs)
            
            
    def run_sfla(self):
        rpdb= pdb1[0] + '_model_st.pdb'
        lpdb=pdb1[1] + '_model_st.pdb'
        
        self.lig_chain = [i for i in pdb1[0]]
        self.rec_chain = [i for i in pdb1[1]]
         
        inp1 = self.pdbpre(lpdb)
        inp2 = self.pdbpre(rpdb)
        
        self.lig_coord, lig_res, lig_res_id, self.lig_atom = self.chaindef(inp1, self.lig_chain)
        self.rec_coord, rec_res, rec_res_id, self.rec_atom = self.chaindef(inp2, self.rec_chain)   
        
        self.rec_normal, rec_curve = self.findPointNormals(self.rec_coord, 20,[0,0,0], rec_res_id, rec_res, 'r')
        self.lig_normal, lig_curve = self.findPointNormals(self.lig_coord, 20,[0,0,0], lig_res_id, lig_res, 'r')
            
        self.generate_init_population()
        self.memeplexes = self.sort_frog(self.mplx_no)
        
        for _ in range(self.n_iter):
            self.local_search()
            self.shuffle_memeplexes()

In [240]:
shuffle_memeplexes(mem)

In [138]:
memeplexes = sort_frog(20)

In [2]:
def chaindef(file, rec_chain):
    
    structure=p.get_structure('1bth',file)
    coordinatesr = np.empty((0,3))
    tobi_residuesr = []
    residue_id=[]
    boundary_residue_coord=np.empty((0,3))
    atom_coord=np.empty((0,3))
    boundary_residue_id=[]
    boundary_residue_name=[]
    #rcc=0
    for model in structure:
        surface = get_surface(model)
        for chain in model:
            if chain.id in rec_chain:
                for residue in chain:
                    #print('hi')
                    cx = 0.0
                    cy = 0.0
                    cz = 0.0
                    count = 0
                    residue_index=recognized_residues.index(residue.get_resname())
                    atom_set=np.empty((0,3))
                    for atom in residue:
                        if  not atom.name=='H':
                            ax=atom.get_coord()[0]
                            ay=atom.get_coord()[1]
                            az=atom.get_coord()[2]
                            atom_set=np.append(atom_set,[atom.get_coord()], axis=0)
                            atom_coord=np.append(atom_coord,[atom.get_coord()], axis=0)
                            cur_atom=residue.get_resname()+atom.name
                            for typ in atom_types:
                                if  cur_atom in typ or atom.name in ['N','CA','C','O']:	#typ:#atom.name now added
                                    cx += ax
                                    cy += ay
                                    cz += az
                                    count += 1
                                else:
                                    pass
                    cx/= float(count)
                    cy/= float(count)
                    cz/= float(count)
                    coordinatesr=np.append(coordinatesr,[[cx, cy, cz]], axis=0)
                    #rcc+=1
                    tobi_residuesr.append(residue_index)
                    residue_id.append(str(residue.get_id()[1])+residue.get_id()[2])
                    fji=0     #check whether any of of the atoms in the resdue are at a distance 3 A from surface
                    for ji in range(len(atom_set)):
                        if min_dist(atom_set[ji], surface) < 2:
                            fji=1
                            break
                    if fji==1:
                        boundary_residue_coord=np.append(boundary_residue_coord,[[cx, cy, cz]],axis=0)
                        #boundary_atom_name.append(atom.name)
                        boundary_residue_id.append(str(residue.get_id()[1])+residue.get_id()[2])
                        boundary_residue_name.append(residue.get_resname())
    #print(rcc)
    return boundary_residue_coord,boundary_residue_name, boundary_residue_id, atom_coord

In [3]:
def findPointNormals(points, numNeighbours, viewPoint, residue_id, residue_name,f):
    """
    xu=[]
    for i in points:
        k=[]
        for j in i:
             k.append(float(j))
        xu.append(k)
    viewPoint =[float(x) for x in viewPoint]
    X=xu
    """
    nbrs = NearestNeighbors(n_neighbors=numNeighbours+1, algorithm='kd_tree').fit(points)
    distances, indices = nbrs.kneighbors(points)
    n = [] #indices[:,2:]
    [n.append(indices[i][1:].tolist()) for i in range(0,len(indices))]

    
      #%find difference in position from neighbouring points
    n=np.asarray(n).flatten('F')    
    p = np.tile(points,(numNeighbours,1)) - points[n]
    x=np.zeros((3,len(points),numNeighbours))
    for i in range(0,3):
        for j in range(0,len(points)):
            for k in range(0,numNeighbours):
                x[i,j,k]=p[k*len(points)+j,i]
    p=x
    C = np.zeros((len(points),6))
    C[:,0]= np.sum(np.multiply(p[0],p[0]),axis=1)
    C[:,1]= np.sum(np.multiply(p[0],p[1]),axis=1)
    C[:,2]= np.sum(np.multiply(p[0],p[2]),axis=1)
    C[:,3]= np.sum(np.multiply(p[1],p[1]),axis=1)
    C[:,4]= np.sum(np.multiply(p[1],p[2]),axis=1)
    C[:,5]= np.sum(np.multiply(p[2],p[2]),axis=1)
    C = np.divide(C, numNeighbours)
    normals = np.zeros((len(points),3))
    curvature = np.zeros((len(points),1))
    for i in range(0,len(points)):
        Cmat = [[C[i,0], C[i,1] ,C[i,2]], [C[i,1], C[i,3], C[i,4]], [C[i,2], C[i,4], C[i,5]]]
        [value,vector] = np.linalg.eigh(Cmat)
        [lam,k] = min(value), value.tolist().index(min(value))
        normals[i,:] = vector[:,k] #np.transpose(vector[:,k])
        curvature[i]= lam / sum(value)
        
    return normals, curvature

In [4]:
def do_something(args):
    output_file='out'+str(args[1])+'.pdb'
    out=open(os.path.join(mypath, output_file),'w')
    sc=open('score.txt','a')
    in1=open(inp2,'r')
    in2=open(inp1,'r')
    for line in in1:
        if 'ATOM' in line:
              out.write(line) 
    indexing=0
    new_co=args[0]
    for line in in2:
        if 'ATOM' in line:
            #print(line)
            l=line.split()
            l[0]=l[0].ljust(5)
            l[1]=l[1].rjust(5)
            l[2]=l[2].ljust(3)
            l[3]=l[3].ljust(3)
            l[4]=line[21]
            l[5]=('%4d' % (int(line[22:26]))).rjust(4)
            l[6]=('%8.3f' % (float(new_co[indexing][0]))).rjust(8)
            l[7]=('%8.3f' % (float(new_co[indexing][1]))).rjust(8)
            l[8]=('%8.3f' % (float(new_co[indexing][2]))).rjust(8)
            out.write('{0} {1}  {2} {3} {4}{5}    {6}{7}{8}' .format(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8]))
            out.write('\n')
            indexing+=1
    out.close()
    #print("depth ok")
    pdbfile=os.path.join(mypath, output_file)
    my_struct = pdbtools.read_pdb(pdbfile)
    try:
        depth_dict = pdb_resdepth.calculate_resdepth(structure=my_struct, pdb_filename=pdbfile, method="msms")
    except:
        os.remove(os.path.join(mypath, output_file))
        return
    
    distmat = matrice_distances.calc_distance_matrix(structure=my_struct, depth= depth_dict, chain_R=rec_chain, chain_L=lig_chain, dist_max=dist, method = depth)

    vdw = Lennard_Jones.lennard_jones(dist_matrix=distmat)
    electro = electrostatic.electrostatic(inter_resid_dict=distmat, pH =pH)
    score=vdw+electro
    #if score>=0:
    #      os.remove(os.path.join(mypath, output_file))#eliminate the bad solutions

    #      return
    #else:

    #score=np.random.randint(-30,20)
    #sc.write(str(args[1])+'   '+ str(score)+'\n')
    #sc.close()
    return score, args[1], args[2], args[3]

In [5]:
def find_score(args):
    output_file='out'+str(args[1])+'.pdb'
    shape, electro, jones, proba = True, True, True, False
    pH = 7
    dist = 8.6
    with open(os.path.join(mypath, output_file),'w') as out:
        in1 = open(inp2, "r")
        in2 = open(inp1, "r")
        for line in in1:
            if "ATOM" in line:
                out.write(line)
        indexing = 0
        new_co = args[0]
        for line in in2:
            if "ATOM" in line:
                # print(line)
                l = line.split()
                l[0] = l[0].ljust(5)
                l[1] = l[1].rjust(5)
                l[2] = l[2].ljust(3)
                l[3] = l[3].ljust(3)
                l[4] = line[21]
                l[5] = ("%4d" % (int(line[22:26]))).rjust(4)
                l[6] = ("%8.3f" % (float(new_co[indexing][0]))).rjust(8)
                l[7] = ("%8.3f" % (float(new_co[indexing][1]))).rjust(8)
                l[8] = ("%8.3f" % (float(new_co[indexing][2]))).rjust(8)
                out.write(
                    "{0} {1}  {2} {3} {4}{5}    {6}{7}{8}".format(
                        l[0], l[1], l[2], l[3], l[4], l[5], l[6], l[7], l[8]
                    )
                )
                out.write("\n")
                indexing += 1

    pdbfile = os.path.join(mypath, output_file)
    res = cm.combine_score(pdbfile, recepChain=rec_chain, ligChain=lig_chain, statpotrun=proba, vdwrun=jones, electrorun=electro, shaperun=shape, pH=pH, depth=depth, dist=dist)
    mydf = pd.DataFrame(res,  index=[0])
    mydf = mydf.set_index('pdb')
    score = tm.tm_score(mydf, execdir='.')
    return float(score['tm_score_prediction']), args[1], args[2], args[3]

### sklearn == 0.24.1

In [29]:
res1 = do_something1(Doargs[16])

MSMS running for poses/out16.pdb
MSMS finished with poses/out16.pdb
Alright!
You will find your results in the . directory.


In [58]:
res

(0.5557653944444447,
 3,
 Quaternion(0.6744755650928211, -0.5721136235901982, -0.43578649009673776, -0.16690970266697627),
 -1)

In [30]:
res = do_something(Doargs[10])

MSMS running for poses/out10.pdb
MSMS finished with poses/out10.pdb


In [28]:
res

(0,
 10,
 Quaternion(0.6152593455419292, -0.31361652351971564, -0.5196077021493078, -0.5030988469136463),
 -1)

In [None]:
0.555765     

In [None]:
0.555765

In [6]:
def pdbpre(file1):
    with open(os.path.join(args.pdb, file1), "r") as pdb_in: 
        with open(file1 + "1.pdb", "w") as out: 
            atmno = 1
            resno = 0
            res = ""
            fr = ""
            l = [""] * 11
            for line in pdb_in:
                if "ATOM" in line[0:4]:
                    li = line.split()
                    l[0] = li[0].ljust(6)
                    l[1] = str(atmno).rjust(4)
                    l[2] = li[2].ljust(3)
                    l[3] = li[3].ljust(3)
                    l[4] = line[21]
                    if fr != line[21]:
                        atmno = 1
                        resno = 0
                        res = ""
                        fr = line[21]
                    if line[22:26] == res:
                        l[5] = ("%4d" % (int(resno))).rjust(4)
                    else:
                        resno += 1
                        res = line[22:26]
                        l[5] = ("%4d" % (int(resno))).rjust(4)
                    # if len(l[6])>10:
                    l[6] = ("%8.3f" % (float(line[29:38]))).rjust(8)
                    l[7] = ("%8.3f" % (float(line[38:46]))).rjust(8)
                    l[8] = ("%8.3f" % (float(line[46:54]))).rjust(8)
                    l[9] = ("%6.2f" % (float(line[55:60]))).rjust(6)
                    l[10] = ("%6.2f" % (float(line[60:66]))).ljust(6)
                    out.write(
                        "{0} {1}  {2} {3} {4}{5}    {6}{7}{8}{9}{10}".format(
                            l[0], l[1], l[2], l[3], l[4], l[5], l[6], l[7], l[8], l[9], l[10]
                        )
                    )
                    out.write("\n")
                    atmno += 1
    return file1 + "1.pdb"

In [28]:
from utils.TMscore import TMscore

In [29]:
tms = TMscore('utils/TMscore.f')

In [None]:
import tmscoring

In [53]:
def do_something1(args):
    output_file='out_'+str(args[1])+'.pdb'
    with open(os.path.join(mypath, output_file),'w') as out:
        in2=open(inp1,'r')
        indexing=0
        new_co=args[0]
        for line in in2:
            if 'ATOM' in line:
                #print(line)
                l=line.split()
                l[0]=l[0].ljust(5)
                l[1]=l[1].rjust(5)
                l[2]=l[2].ljust(3)
                l[3]=l[3].ljust(3)
                l[4]=line[21]
                l[5]=('%4d' % (int(line[22:26]))).rjust(4)
                l[6]=('%8.3f' % (float(new_co[indexing][0]))).rjust(8)
                l[7]=('%8.3f' % (float(new_co[indexing][1]))).rjust(8)
                l[8]=('%8.3f' % (float(new_co[indexing][2]))).rjust(8)
                out.write('{0} {1}  {2} {3} {4}{5}    {6}{7}{8}' .format(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8]))
                out.write('\n')
                indexing+=1
    alignment = tmscoring.TMscoring(inp2, os.path.join('poses/', output_file))
    score = alignment.tmscore(**alignment.get_current_values())
    score1 = alignment.rmsd(**alignment.get_current_values())
    
    #tms()
    #score = tms.get_tm_score()
    print(score, score1)
    return score, args[1], args[2], args[3]

In [56]:
res = do_something1(Doargs[16])

0.0008423485683393902 52.30314890225165


In [11]:
pdb0 = '4dn4' 
pdb1 = ['LH', 'M']

In [12]:
class ArgsVal:
    def __init__(self, pdb):
        self.pdb = pdb

In [13]:
args = ArgsVal("Data/4dn4_LH:M")

In [14]:
depth = "msms"
dist = 8.6
pH = 7

In [15]:
rpdb=pdb1[0]+'_model_st.pdb'
lpdb=pdb1[1]+'_model_st.pdb'
lig_chain=[]
rec_chain=[]
for i in pdb1[0]:
    rec_chain.append(i)
for i in pdb1[1]:
    lig_chain.append(i)

In [16]:
inp1 = pdbpre(lpdb)
inp2 = pdbpre(rpdb)

In [17]:
lig_coord, lig_res,lig_res_id, lig_atom=chaindef(inp1, lig_chain)
rec_coord, rec_res,rec_res_id, rec_atom=chaindef(inp2, rec_chain)
print(len(rec_chain))
print(rec_chain)
#print(rec_coord)

2
['L', 'H']


In [18]:
rec_normal, rec_curve = findPointNormals(rec_coord, 20,[0,0,0], rec_res_id, rec_res, 'r')
lig_normal, lig_curve = findPointNormals(lig_coord, 20,[0,0,0], lig_res_id, lig_res, 'r')

In [46]:
frogs = 50 ## No of frogs (population)

StructInfo = {}
init = 0
mypath='poses/'

In [47]:
#generation of initial population
with concurrent.futures.ProcessPoolExecutor() as executor:
    Doargs = []
    for i in range(frogs):
        Quater = [0, 0, 0, 0]
        recRandIdx = rng.integers(0, rec_coord.shape[0] - 1)
        ligRandIdx = rng.integers(0, lig_coord.shape[0] - 1)

        axis = rec_coord[recRandIdx]
        a = rec_normal[recRandIdx]
        b = lig_normal[ligRandIdx]

        dotProduct = np.dot(a, b)
        theta = np.arccos(dotProduct) * 2 - np.pi

        Quater = Quaternion(axis=a, angle=theta)

        final = np.array([Quater.rotate(i) for i in lig_atom])

        Doargs += [[final, init, Quater, -1]]
        init += 1
"""
    results = executor.map(do_something1, Doargs)
    for r in results:
        if r:
            StructInfo[r[1]] = [r[0], r[2]] 
"""

'\n    results = executor.map(do_something1, Doargs)\n    for r in results:\n        if r:\n            StructInfo[r[1]] = [r[0], r[2]] \n'

In [218]:
from utils.TMscore import TMscore

In [221]:
tms = TMscore('./utils/TMscore.f')

## Multi - with Function

In [133]:
frogs = 200 ## No of frogs (population)

StructInfo = {}
init = 0
mypath='poses/'

In [134]:
def generate_one_frog(init):
    Quater = [0, 0, 0, 0]
    recRandIdx = rng.integers(0, rec_coord.shape[0] - 1)
    ligRandIdx = rng.integers(0, lig_coord.shape[0] - 1)
    axis = rec_coord[recRandIdx]
    a = rec_normal[recRandIdx]
    b = lig_normal[ligRandIdx]
    
    dotProduct = np.dot(a, b)
    theta = np.arccos(dotProduct) * 2 - np.pi
    
    Quater = Quaternion(axis=a, angle=theta)
    
    final = np.array([Quater.rotate(i) for i in lig_atom])
    args = [[final, init, Quater, -1]]
    return args

In [135]:
def generate_init_population():
    global init
    with concurrent.futures.ProcessPoolExecutor() as executor:
        Doargs = []
        for _ in range(frogs):
            Doargs += generate_one_frog(init)
            init += 1
        results = executor.map(find_score, Doargs)
        for r in results:
            if r:
                StructInfo[r[1]] = [r[0], r[2]]    

In [136]:
generate_init_population()

MSMS running for
 poses/out0.pdbMSMS running forMSMS running for  poses/out3.pdbposes/out4.pdb

MSMS running for poses/out5.pdb
MSMS running forMSMS running for  poses/out1.pdbposes/out2.pdb

MSMS running forMSMS running forMSMS running for   poses/out7.pdbposes/out8.pdbposes/out6.pdb


MSMS running forMSMS running for  poses/out10.pdbposes/out9.pdb

MSMS running for poses/out13.pdb
MSMS running for poses/out12.pdb
MSMS running forMSMS running for poses/out11.pdb 
poses/out15.pdb
MSMS running for poses/out17.pdb
MSMS running for poses/out14.pdb
MSMS running forposes/out16.pdb 
MSMS running forMSMS running for  poses/out18.pdbposes/out19.pdb

MSMS running for MSMS running forMSMS running forposes/out20.pdb 
poses/out23.pdb 
MSMS running for poses/out22.pdb
MSMS running forposes/out21.pdb
 poses/out24.pdb
MSMS running forMSMS running for MSMS running forMSMS running for  poses/out27.pdbposes/out26.pdb MSMS running forposes/out29.pdb
 poses/out25.pdb

poses/out30.pdbMSMS running forMSMS r

In [223]:
len(Doargs)

50

In [None]:
with concurrent.futures.ProcessPoolExecutor() as executor:
    Doargs = []
    for _ in range(frogs):
        Doargs += generate_one_frog(init)
        init += 1
    results = executor.map(find_score, Doargs)
    for r in results:
        if r:
            StructInfo[r[1]] = [r[0], r[2]]

In [None]:
StructInfo

In [149]:
rng.integers(0, 12)

7

In [112]:
StructInfo[int(mem[0])]

[-3.121259559452694,
 Quaternion(0.9500294077141206, -0.20983904643434914, -0.21258687585807717, 0.0906560493446668)]

In [33]:
mem = sort_frog(10)

In [115]:
mem

array([[ 8., 30.,  6., 19., 41.],
       [45., 48.,  9., 20., 42.],
       [28., 23., 10., 24., 44.],
       [43., 15., 11., 26., 46.],
       [35., 22., 12., 27., 47.],
       [36.,  4., 13., 32., 49.],
       [25.,  1., 14., 33.,  0.],
       [38.,  2., 16., 34., 29.],
       [ 7.,  3., 17., 39., 37.],
       [21.,  5., 18., 40., 31.]])

In [29]:
np.array(sorted(StructInfo, key = lambda x: StructInfo[x][0], reverse=True))

array([ 2,  4,  8, 11, 16, 18, 20, 23, 38, 40, 41,  0,  3,  5,  7, 10, 13,
       19, 21, 24, 25, 27, 28, 29, 32, 34, 39, 42, 43, 48,  1, 12, 15, 22,
       46, 49,  6,  9, 14, 17, 26, 30, 31, 33, 35, 36, 37, 44, 45, 47])

In [None]:
StructInfo[mem.flatten()]

In [116]:
mem[1][1]

48.0

In [119]:
StructInfo[mem[0][0]]

[-27.42530894702434,
 Quaternion(0.9631228922249896, 0.10238838890014193, -0.20305440473343023, 0.14380480175903962)]

In [32]:
def sort_frog(mplx_no):
    sorted_fitness = np.array(sorted(StructInfo, key = lambda x: StructInfo[x][0], reverse=True))

    memeplexes = np.zeros((mplx_no, int(frogs/mplx_no)))

    for j in range(memeplexes.shape[1]):
        for i in range(mplx_no):
            memeplexes[i, j] = sorted_fitness[i + (mplx_no*j)] 
    return memeplexes

In [54]:
def shuffle_memeplexes(memeplexes):
    """Shuffles the memeplexes and sorting them.
    
    Arguments:
        frogs {numpy.ndarray} -- All the frogs
        memeplexes {numpy.ndarray} -- The memeplexes
    
    Returns:
        None
    """
    mplx_no = memeplexes.shape[0]
    temp = memeplexes.flatten()
    temp = np.array(sorted(temp, key = lambda x: StructInfo[x][0], reverse=True))
    for j in range(memeplexes.shape[1]):
        for i in range(mplx_no):
            memeplexes[i, j] = temp[i + (mplx_no*j)]

In [240]:
shuffle_memeplexes(mem)

In [20]:
StructInfo

{}

## Local Search

In [87]:
len(mem)

10

In [80]:
#len(mem)
mem.shape[0]

10

In [95]:
rValue = np.random.random(FrogsEach) * weights # random value with probability weights
subindex = np.sort(np.argsort(rValue)[::-1][0:q]) # index of selected frogs in memeplex 

In [107]:
np.sort(np.argsort(rValue)[::-1][0:2])

array([0, 1])

In [120]:
submem = mem[0][subindex]

In [123]:
int(submem[0])

8

In [97]:
s[k]=frog_w[1][k]+L[k]*(frog_b[1][k] - frog_w[1][k])

array([ 0.19770418,  0.33067041,  0.        , -0.03530205, -0.63396841])

In [150]:
mem

array([0.26978671, 0.04097352, 0.01652764, 0.81327024, 0.91275558])

#### Local Search Main Code -- Multi

In [138]:
memeplexes = sort_frog(20)

In [191]:
def local_search_one_memeplex(inp):
    """
        q: The number of frogs in submemeplex
        N: No of mutations
    """
    im, N, q, Frog_gb = inp
    FrogsEach = int(frogs/len(memeplexes))
    weights = [2*(FrogsEach+1-j)/(FrogsEach*(FrogsEach+1)) for j in range(1, FrogsEach+1)] 
    
    Omega = [np.amin(rec_normal), np.amax(rec_normal)]
    max_step = (Omega[1]-Omega[0])/2 # maximum step size
    
    for iN in range(N):
        uId = init + im + 1
        rValue = rng.random(FrogsEach) * weights # random value with probability weights
        subindex = np.sort(np.argsort(rValue)[::-1][0:q]) # index of selected frogs in memeplex
        submemeplex = memeplexes[im][subindex] 

        #--- Improve the worst frog's position ---#
        # Learn from local best Pb #
        Pb = StructInfo[int(submemeplex[0])] # mark the best frog in submemeplex
        Pw = StructInfo[int(submemeplex[q-1])] # mark the worst frog in memeplex

        S = rng.random() * (Pb[1] - Pw[1]) 
        Uq = Pw[1] + S

        globStep = False
        censorship = False
        # Check feasible space and the performance #
        if Omega[0] <= min(Uq) and max(Uq) <= Omega[1]: # check feasible space
            final = np.array([Uq.rotate(i) for i in lig_atom])  
            results = find_score([final, uId, Uq, im])

            if results[0] < Pw[0]:
                globStep = True

        if globStep:
            S = rng.random() * (Frog_gb[1] - Pw[1])
            for i in range(4):
                if S[i] > 0:
                    S[i] = min(S[i],max_step)
                else:
                    S[i] = max(S[i],-max_step)
            Uq = Pw[1] + S

            if Omega[0] <= min(Uq) and max(Uq) <= Omega[1]: # check feasible space
                final = np.array([Uq.rotate(i) for i in lig_atom])  
                results = find_score([final, uId, Uq, im])
                if results[0] < Pw[0]:
                    censorship = True
            else:
                censorship = True

        if censorship:
            params = generate_one_frog(uId)
            results = find_score(params)            


        #StructInfo[im] = [results[0], results[2]]
        shutil.move(os.path.join('poses/', 'out'+str(uId)+'.pdb'), os.path.join('poses/', 'out'+ str(submemeplex[q-1]) + '.pdb'))
        StructInfo[int(submemeplex[q-1])] = [results[0], results[2]]
        memeplexes[im] = memeplexes[im][np.argsort(memeplexes[im])]

In [198]:
def local_search():
    Frog_gb = StructInfo[int(memeplexes[0][0])]
    N = 10
    q = 6
    with concurrent.futures.ProcessPoolExecutor() as executor:
        doargs = [[im, N, q, Frog_gb] for im in range(len(memeplexes))]
        results = executor.map(local_search_one_memeplex, doargs)

In [None]:
%%time
local_search()

In [194]:
memeplexes

array([[  1., 192., 133.,   8.,  11.,  25.,  36., 160.,   9.,  48.],
       [ 83.,  43.,  69.,  59.,  68.,  76.,  86.,  15., 157., 174.],
       [161., 103., 145.,  53., 119., 134., 142., 183.,   6.,  51.],
       [ 18.,  44.,  71.,  93., 171., 180., 191.,  16., 116., 175.],
       [ 88., 105., 149., 155.,  13.,  29.,  38.,  19., 118.,  82.],
       [162.,  45.,  75.,  54.,  70.,  78.,  87.,  32., 123., 178.],
       [ 27., 109., 159.,  97., 120., 135., 144.,  37.,   2.,  84.],
       [ 89.,  46.,  56., 156., 172., 184., 193.,  47., 126., 182.],
       [163., 112.,   4.,   0.,  14.,  30.,  39.,  50.,  10.,  94.],
       [ 28.,  52.,   7.,  55.,  72.,  80.,  90.,  60., 127., 189.],
       [ 96., 113.,  42., 104., 121., 138., 147.,  61.,  20.,  95.],
       [167.,  57.,  77., 164., 173., 187., 196.,  65., 143., 199.],
       [ 31., 115., 110.,   3.,  17.,  33.,  41.,  99.,  22., 102.],
       [ 98.,  63., 130.,  58.,  73.,  81.,  91., 107., 154., 108.],
       [181., 124., 136., 106., 12

In [222]:
doargs

[[0, 2, 3],
 [1, 2, 3],
 [2, 2, 3],
 [3, 2, 3],
 [4, 2, 3],
 [5, 2, 3],
 [6, 2, 3],
 [7, 2, 3],
 [8, 2, 3],
 [9, 2, 3]]

In [None]:
with concurrent.futures.ProcessPoolExecutor() as executor:
    N = 2
    q = 3
    doargs = [[im, N, q] for im in range(len(memeplexes))]
    _ = executor.map(local_search, doargs)

In [199]:
shuffle_memeplexes(memeplexes)

In [None]:
for _ in range(5):
    local_search()
    shuffle_memeplexes(memeplexes)

## MAIN RUN CODE

In [None]:
def run_sfla(mplx_no, n_iter):
    generate_init_population()
    memeplexes = sort_frog(mplx_no=mplx_no)
    for _ in range(n_iter):
        local_search()
        shuffle_memeplexes(memeplexes)

In [201]:
memeplexes

array([[  1.,  83., 113.,   8.,  41.,  87., 140.,   9.,  12.,  24.],
       [ 18., 145.,  71.,  59.,  29.,  74., 121.,  15., 107.,  23.],
       [112.,  34., 101., 187.,   5.,  55.,  53., 129.,   6., 174.],
       [ 28., 185.,  63., 171., 193.,  25.,  81., 132., 102., 127.],
       [163.,  89., 181., 148., 179.,  17., 153., 137.,  94.,  51.],
       [167., 100.,  27., 117., 164., 155.,  49., 141., 118., 158.],
       [105., 133., 192.,  90., 142., 188.,  30., 146., 178., 154.],
       [ 66., 162.,  56.,  76., 122., 172., 166., 152.,  26., 199.],
       [ 75., 103.,   4., 135.,  97.,  33., 196., 157., 165., 189.],
       [ 45., 125.,   7.,  38.,  85.,  70., 180., 183., 123., 116.],
       [ 69.,  46.,  42.,  21.,  72., 119., 104.,  16.,  10., 126.],
       [124.,  96.,  77.,   0.,  36.,  91., 144.,  19.,  95.,  82.],
       [ 44., 115., 110.,  58., 120.,  78., 131.,  32., 143., 182.],
       [ 43.,  88., 130., 173.,  13.,  62.,  54.,  37.,  84., 114.],
       [ 64.,  31., 136.,  93., 13

In [195]:
shuffle_memeplexes(memeplexes)

In [196]:
StructInfo[memeplexes[-1][-1]]

[0.5204008611111113,
 Quaternion(0.8253056370515306, -0.5565910091238468, 0.05185394732923476, -0.0799263546012341)]

In [197]:
StructInfo[memeplexes[0][0]]

[0.657472333333334,
 Quaternion(0.8990546147122486, -0.4105507045149465, 0.05919979793793172, -0.14015813465867544)]

In [207]:
StructInfo[memeplexes[2][0]]

[0.657472333333334,
 Quaternion(0.9941252011379279, 0.002317500344719261, -0.088033121051463, -0.06292760326410822)]

In [210]:
scores = []
for i in memeplexes:
    for j in i:
        scores.append(StructInfo[j][0])

In [213]:
npScores = np.array(scores)

In [214]:
npScores.sort()

In [217]:
npScores

array([0.52040086, 0.52538213, 0.52560013, 0.53092888, 0.541294  ,
       0.541294  , 0.541294  , 0.541294  , 0.541294  , 0.541294  ,
       0.541294  , 0.541294  , 0.541294  , 0.541294  , 0.541294  ,
       0.541294  , 0.541294  , 0.541294  , 0.541294  , 0.541294  ,
       0.541294  , 0.541294  , 0.541294  , 0.541294  , 0.541294  ,
       0.541294  , 0.541294  , 0.541294  , 0.541294  , 0.541294  ,
       0.541294  , 0.541294  , 0.541294  , 0.541294  , 0.541294  ,
       0.541294  , 0.541294  , 0.55506038, 0.55523138, 0.55523138,
       0.55523138, 0.55523138, 0.55523138, 0.55523138, 0.55523138,
       0.55523138, 0.55523138, 0.55523138, 0.55523138, 0.55523138,
       0.55523138, 0.55523138, 0.55523138, 0.55523138, 0.55523138,
       0.55523138, 0.55523138, 0.55523138, 0.55523138, 0.55523138,
       0.55523138, 0.55576539, 0.55576539, 0.55576539, 0.55576539,
       0.55576539, 0.55576539, 0.55576539, 0.55576539, 0.55576539,
       0.55576539, 0.55576539, 0.55576539, 0.55576539, 0.55576

In [234]:
memeplexes

array([ 8., 21., 11., 27.,  3.])

In [204]:
StructInfo[memeplexes[-1][-1]]

[0.5204008611111113,
 Quaternion(0.8253056370515306, -0.5565910091238468, 0.05185394732923476, -0.0799263546012341)]

In [205]:
StructInfo[memeplexes[0][0]]

[0.657472333333334,
 Quaternion(0.8990546147122486, -0.4105507045149465, 0.05919979793793172, -0.14015813465867544)]

In [76]:
memeplexes = sort_frog(10)

In [86]:
%%time
# im to count the number of memeplexes
# iN to count the number of evolution

Frog_gb = StructInfo[int(memeplexes[0][0])]
FrogsEach = int(frogs/len(memeplexes)) #the number of frogs in each memeplex
weights = [2*(FrogsEach+1-j)/(FrogsEach*(FrogsEach+1)) for j in range(1, FrogsEach+1)] 

Omega = [np.amin(rec_normal), np.amax(rec_normal)]
max_step = (Omega[1]-Omega[0])/2 # maximum step size
q = 3 # int, the number of frogs in submemeplex -- CHANGE
N = 2
for im in range(len(memeplexes)):
    for iN in range(N):
        rValue = rng.random(FrogsEach) * weights # random value with probability weights
        subindex = np.sort(np.argsort(rValue)[::-1][0:q]) # index of selected frogs in memeplex
        submemeplex = memeplexes[im][subindex] 
        
        #--- Improve the worst frog's position ---#
        # Learn from local best Pb #
        Pb = StructInfo[int(submemeplex[0])] # mark the best frog in submemeplex
        Pw = StructInfo[int(submemeplex[q-1])] # mark the worst frog in memeplex
        
        S = rng.random() * (Pb[1] - Pw[1]) 
        Uq = Pw[1] + S
        
        globStep = False
        censorship = False
        
        # Check feasible space and the performance #
        if Omega[0] <= min(Uq) and max(Uq) <= Omega[1]: # check feasible space
            final = np.array([Uq.rotate(i) for i in lig_atom])  
            results = find_score([final, init+1, Uq, im])
            
            if results[0] < Pw[0]:
                globStep = True
        
        if globStep:
            S = rng.random() * (Frog_gb[1] - Pw[1])
            for i in range(4):
                if S[i] > 0:
                    S[i] = min(S[i],max_step)
                else:
                    S[i] = max(S[i],-max_step)
            Uq = Pw[1] + S
            
            if Omega[0] <= min(Uq) and max(Uq) <= Omega[1]: # check feasible space
                final = np.array([Uq.rotate(i) for i in lig_atom])  
                results = find_score([final, init+1, Uq, im])
                if results[0] < Pw[0]:
                    censorship = True
            else:
                censorship = True
        
        if censorship:
            recRandIdx = rng.integers(0, rec_coord.shape[0] - 1)
            ligRandIdx = rng.integers(0, lig_coord.shape[0] - 1)

            axis = rec_coord[recRandIdx]
            a = rec_normal[recRandIdx]
            b = lig_normal[ligRandIdx]
            
            dotProduct = np.dot(a, b)
            theta = np.arccos(dotProduct) * 2 - np.pi
            Quater = Quaternion(axis=a, angle=theta)
            final = np.array([Quater.rotate(i) for i in lig_atom])
            results = find_score([final, init+1, Quater, im])            
        
        
        #StructInfo[im] = [results[0], results[2]]
        shutil.move(os.path.join('poses/', 'out'+str(init+1)+'.pdb'), os.path.join('poses/', 'out'+ str(submemeplex[q-1]) + '.pdb'))
        StructInfo[int(submemeplex[q-1])] = [results[0], results[2]]
        memeplexes[im] = memeplexes[im][np.argsort(memeplexes[im])]

MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished with poses/out51.pdb
MSMS running for poses/out51.pdb
MSMS finished w

* 5.35
* 3.25

In [81]:
memeplexes

array([[ 0.,  7., 12., 30., 49.],
       [ 1.,  6., 13., 16., 32.],
       [ 2., 11., 17., 20., 34.],
       [ 4., 21., 26., 33., 35.],
       [14., 24., 27., 36., 37.],
       [18., 28., 38., 40., 42.],
       [ 3., 19., 22., 39., 43.],
       [ 5., 10., 23., 41., 44.],
       [ 8., 15., 25., 45., 47.],
       [ 9., 29., 31., 46., 48.]])

In [82]:
shuffle_memeplexes(memeplexes)

In [83]:
memeplexes

array([[ 7., 28.,  0., 43., 12.],
       [30., 40.,  1., 23., 16.],
       [49., 42.,  2., 44., 21.],
       [13., 39., 34., 25., 24.],
       [32., 46.,  4., 45., 38.],
       [20.,  3., 35., 29.,  5.],
       [26., 10., 14.,  6., 41.],
       [33., 15., 36., 11.,  8.],
       [27., 31., 18., 17., 47.],
       [37., 48., 22., 19.,  9.]])

In [84]:
StructInfo[int(memeplexes[0][0])]

[0.657472333333334,
 Quaternion(0.9864083668562512, -0.06616377910983777, -0.1497948026904637, 0.01350574754735854)]

In [85]:
StructInfo[int(memeplexes[-1][-1])]

[0.5412940000000002,
 Quaternion(0.4286944547193265, -0.6876356833810233, -0.5857372781965245, 0.017031510941170013)]

In [106]:
shuffle_memeplexes(memeplexes)

In [100]:
StructInfo[int(memeplexes[1][1])]

[0.657472333333334,
 Quaternion(0.9960274953034043, 0.06560602892584959, -0.02614032887928352, -0.054238001201098784)]

In [90]:
StructInfo[int(memeplexes[-1][-1])]

[0.5412940000000002,
 Quaternion(0.4286944547193265, -0.6876356833810233, -0.5857372781965245, 0.017031510941170013)]

In [103]:
memeplexes

array([[ 7., 39., 29.,  0., 19.],
       [13., 27., 44., 12., 16.],
       [26., 30.,  6., 43., 21.],
       [28., 32., 49.,  4., 24.],
       [25., 37., 14., 45., 38.],
       [33., 40.,  3., 35.,  5.],
       [ 1., 46., 10., 36., 41.],
       [34.,  2., 15., 22.,  8.],
       [18., 20., 31., 11., 47.],
       [23., 42., 48., 17.,  9.]])

In [116]:
memeplexes

array([[ 7., 37.,  1.,  0., 19.],
       [32., 23.,  6., 12., 16.],
       [ 2., 27., 46., 43., 21.],
       [39., 14., 28.,  4., 24.],
       [49., 42., 34., 45., 38.],
       [18., 44.,  3., 35.,  5.],
       [29., 33., 10., 36., 41.],
       [25., 26., 15., 22.,  8.],
       [20., 40., 31., 11., 47.],
       [13., 30., 48., 17.,  9.]])

In [120]:
%%time
local_search()

MSMS running for poses/out51.pdb
MSMS running for poses/out52.pdbMSMS running for
 poses/out54.pdb
MSMS running for poses/out53.pdbMSMS running for
 poses/out55.pdb
MSMS running for poses/out56.pdb
MSMS running for poses/out57.pdb
MSMS running for poses/out59.pdbMSMS running for
 poses/out58.pdb
MSMS running for poses/out60.pdb
MSMS finished with poses/out53.pdb
MSMS finished with poses/out52.pdb
MSMS finished with poses/out55.pdb
MSMS finished with poses/out51.pdb
MSMS finished with poses/out57.pdb
MSMS finished with poses/out60.pdb
MSMS finished with poses/out56.pdb
MSMS finished with poses/out54.pdb
MSMS finished with poses/out58.pdb
MSMS finished with poses/out59.pdb
MSMS running for poses/out53.pdb
MSMS running for poses/out52.pdb
MSMS running for poses/out55.pdb
MSMS running for poses/out51.pdb
MSMS running for poses/out60.pdb
MSMS running for poses/out58.pdb
MSMS running for poses/out57.pdb
MSMS running for poses/out59.pdb
MSMS running for poses/out54.pdb
MSMS running for poses/

In [121]:
memeplexes

array([[ 7., 37.,  1.,  0., 19.],
       [32., 23.,  6., 12., 16.],
       [ 2., 27., 46., 43., 21.],
       [39., 14., 28.,  4., 24.],
       [49., 42., 34., 45., 38.],
       [18., 44.,  3., 35.,  5.],
       [29., 33., 10., 36., 41.],
       [25., 26., 15., 22.,  8.],
       [20., 40., 31., 11., 47.],
       [13., 30., 48., 17.,  9.]])

In [122]:
shuffle_memeplexes(memeplexes)

In [123]:
memeplexes

array([[ 7., 14., 26.,  0., 19.],
       [37., 28., 20., 12., 16.],
       [ 1., 49., 40., 43., 21.],
       [32., 42., 13.,  4., 24.],
       [23., 34., 30., 45., 38.],
       [ 6., 18.,  3., 35.,  5.],
       [ 2., 44., 10., 36., 41.],
       [27., 29., 15., 22.,  8.],
       [46., 33., 31., 11., 47.],
       [39., 25., 48., 17.,  9.]])

In [124]:
StructInfo[int(memeplexes[1][1])]

[0.657472333333334,
 Quaternion(0.9245783376322388, -0.3433804438208805, -0.06898496566990937, -0.14995280222157398)]

In [125]:
StructInfo[int(memeplexes[-1][-1])]

[0.5412940000000002,
 Quaternion(0.4286944547193265, -0.6876356833810233, -0.5857372781965245, 0.017031510941170013)]

In [191]:
weights = [2*(FrogsEach+1-j)/(FrogsEach*(FrogsEach+1)) for j in range(1, FrogsEach+1)] 

In [177]:
Omega = [np.amin(rec_normal), np.amax(rec_normal)]

In [178]:
max_step =  [(Omega[1]-Omega[0])/2]# maximum step size

In [None]:
S = [min(c[1][i], max_step) if c[1][i] > 0 else max(c[1][i], -max_step) for i in range(4)]

In [None]:
for i in range(4):
    if c[1][i] > 0:
        s[i]

In [179]:
max_step

[0.9955515696063104]

In [143]:
c =StructInfo[int(submem[0])]
d = StructInfo[int(submem[1])]

In [171]:
rng.random() * (c[1]-d[1])

Quaternion(0.20052135606848476, 0.30373043005892125, -0.5430434081434552, 0.3010171058166151)

array([ 6.,  8., 19., 30., 41.])

In [187]:
mem[0]

array([ 8., 30.,  6., 19., 41.])

In [None]:
rng.random

In [142]:
for i in S:
    print(i)

0.29250047496417275
0.44305153722860646
-0.7921373459786221
0.43909361152349957


In [138]:
StructInfo[int(submem[0])][1]

Quaternion(0.9631228922249896, 0.10238838890014193, -0.20305440473343023, 0.14380480175903962)

In [132]:
StructInfo[int(submem[1])][1]

Quaternion(0.6706224172608168, -0.34066314832846456, 0.5890829412451918, -0.29528880976446)

In [None]:
def SFLA(num_parameter,num_global,num_local,m,n,q,n1,rangeAlpha,x_train,y_train):
    '''
    num_parameter: int, number of parameter to optimize
    
    num_global: int, the maximum number of global iterations
    
    num_local: int, the maximum number of local iterations
    
    m : int, the number of memeplexes
    
    n : int, the number of frogs in each memeplex
    
    q : int, the number of frogs in submemeplex
    
    n1:  number of splits for cross validation for inner loop
    
    rangeAlpha: list, float, range of parameter alpha,eg.[10**-2, 10]

    x_train: feature

    y_train: lable

    '''

    #--- Step 0--Initialize parameters ---#
    sizeAlpha = 2
    max_step =  [(rangeAlpha[1]-rangeAlpha[0])/sizeAlpha] # maximum step size
    
    #--- Step 1--Generate initial population ---#
    frogAlpha = random.uniform(rangeAlpha[0],rangeAlpha[1],m*n)
    

    # Compute the performance value for each frog on validation data #
    KF = KFold(n_splits=n1,shuffle=True, random_state=920)
    f = zeros((m*n,n1))
    j = 0
    for train_indexcv,test_indexcv in KF.split(x_train):
        x_traincv, x_testcv = x_train.iloc[train_indexcv][:], x_train.iloc[test_indexcv][:]
        y_traincv, y_testcv = y_train.iloc[train_indexcv][:], y_train.iloc[test_indexcv][:]
        for i in range(m*n):
            f[i,j] = SFLA_L1(x_traincv, y_traincv,x_testcv, y_testcv,frogAlpha[i])
        j+=1
    f = f.mean(axis=1)
    f_parameter = c_[f,frogAlpha]

    #--- Step 2--Rank frogs ---#
    f_parameter = f_parameter[argsort(f_parameter[:,0])[::-1]]

    #######--- Global search start---######
    i_global = 0
    flag = 0
    fBest_iteration = f_parameter[0,0]
    weights = [2*(n+1-j)/(n*(n+1)) for j in range(1,n+1)] # weights of ranked frogs in each memeplex
    while i_global < num_global:
        frog_gb = f_parameter[0,0] # mark the global best frog      
        #--- Step 3--Partition frogs into memeplexes ---#
        memeplexes = zeros((m,n,num_parameter+1)) # [memeplexes, frog in memeplex,[f,C,Gamma] ]
        for i in range(m):
            memeplexes[i] = f_parameter[linspace(i,m*n+i,num=n,endpoint=False,dtype=int)]

        #######--- Local search start---######
        #--- Step 4--Memetic evolution within each memeplex ---#
        im = 0 # the number of memeplexes that have been optimized
        while im < m:
            i_local = 0 # counts the number of local evolutionary steps in each memeplex
            while i_local < num_local:

                #--- Construct a submemeplex ---#
                rValue = random.random(n)*weights # random value with probability weights
                subindex = sort(argsort(rValue)[::-1][0:q]) # index of selected frogs in memeplex 
                submemeplex = memeplexes[im][subindex] # form submemeplex

                #--- Improve the worst frog's position ---#
                # Learn from local best Pb #
                Pb = submemeplex[0] # mark the best frog in submemeplex
                Pw = submemeplex[q-1] # mark the worst frog in memeplex
                S = (Pb-Pw)[1:]*(Pb-Pw)[0] 
                Uq = Pw[1:]+S
                # Check feasible space and the performance #
                if rangeAlpha[0] <= Uq[0] <=rangeAlpha[1]: # check feasible space
                    fq = SFLA_L1_CV(x_train, y_train,n1,Uq[0])
                    if fq < Pw[0]: # if no improvement of performance,learn from global best randomly #
                        S = random.random(num_parameter)*(frog_gb-Pw)[1:]
                        for i in range(num_parameter):
                            if S[i] > 0:
                                S[i] = min(S[i],max_step[i])
                            else:
                                S[i] = min(S[i],-max_step[i])
                        Uq = Pw[1:]+S
                        if rangeAlpha[0] <= Uq[0] <=rangeAlpha[1]: # check feasible space
                            fq = SFLA_L1_CV(x_train, y_train,n1,Uq[0])
                            if fq < Pw[0]: # if no improvement of performance, randomly generate a new frog
                                Uq = random.uniform(rangeAlpha[0],rangeAlpha[1],1)
                                fq = SFLA_L1_CV(x_train, y_train,n1,Uq[0])
                        else: # if not in the feasible space, randomly generate a new frog
                            Uq = random.uniform(rangeAlpha[0],rangeAlpha[1],1)
                            fq = SFLA_L1_CV(x_train, y_train,n1,Uq[0])         
                else: # if not in the feasible space, learn from global best randomly 
                    S = random.random(num_parameter)*(frog_gb-Pw)[1:]
                    for i in range(num_parameter):
                        if S[i] > 0:
                            S[i] = min(S[i],max_step[i])
                        else:
                            S[i] = min(S[i],-max_step[i])
                    Uq = Pw[1:]+S
                    if rangeAlpha[0] <= Uq[0] <=rangeAlpha[1]: # check feasible space
                        fq = SFLA_L1_CV(x_train, y_train,n1,Uq[0])
                        if fq < Pw[0]: # if no improvement of performance, randomly generate a new frog
                            Uq = random.uniform(rangeAlpha[0],rangeAlpha[1],1)
                            fq = SFLA_L1_CV(x_train, y_train,n1,Uq[0]) 
                    else: # if not in the feasible space, randomly generate a new frog
                        Uq = random.uniform(rangeAlpha[0],rangeAlpha[1],1)
                        fq = SFLA_L1_CV(x_train, y_train,n1,Uq[0]) 

                #--- Upgrade the memeplex ---# 
                memeplexes[im][subindex[q-1]] = r_[fq,Uq]
                memeplexes[im] =  memeplexes[im][argsort( memeplexes[im][:,0])[::-1]]            

                i_local += 1

            im += 1
        #######--- Local search end---######    

        #--- Step 5--Shuffle memeplexes ---#
        f_parameter =  memeplexes.reshape(m*n,num_parameter+1)
        f_parameter = f_parameter[argsort(f_parameter[:,0])[::-1]]


        i_global += 1

        #--- Step 6--Check convergence ---#
        if f_parameter[0,0] > 0.9999:
            print('The program was terminated because it reached the optimization goal with f = %.3f' %f_parameter[0,0])
            break
            
        fBest_iteration = r_[fBest_iteration,f_parameter[0,0]] 

    #######--- Global search end---######
        
    return (f_parameter[0],fBest_iteration)

In [51]:
k = np.array([[12, , Quater], [13, 2304.201, Quater]])

In [52]:
np.array(k[:, 1], dtype=np.float)

array([2305.201, 2304.201])

In [54]:
np.argsort(k[:, 1])

array([1, 0])

In [22]:
m = {}

In [23]:
m[12] = [2305.201, Quater]
m[13] = [2304.201, Quater]

In [72]:
m.keys()

dict_keys([12, 13])

In [68]:
sorted_fitness = np.array(sorted(m, key = lambda ele: m[ele][0], reverse=True))

In [69]:
sorted_fitness

array([12, 13])

In [None]:
#mplx_no -- number of memeplex
frogs = 50
mplx_no = 10

memeplexes = np.zeros((mplx_no, int(frogs/mplx_no)))

for j in range(memeplexes.shape[1]):
    for i in range(mplx_no):
        memeplexes[i, j] = sorted_fitness[i + (mplx_no*j)]

In [None]:
findPointNormals(rec_coord, 20,[0,0,0], rec_res_id, rec_res, 'r')

In [62]:
findPointNormals(rec_coord, 20,[0,0,0], rec_res_id, rec_res, 'r')

[[  0   1   2 ... 220  21  85]
 [  1   0   2 ...  61  80  29]
 [  2   3   1 ...  81  60  62]
 ...
 [360 361 359 ... 308 288 293]
 [361 362 360 ... 342 103 104]
 [362 361 292 ... 343 339 296]]


In [52]:
rec_coord

array([[-34.67484472,  41.98275331,  -2.3416753 ],
       [-36.10776304,  40.86972307,  -5.02669737],
       [-35.58716059,  37.64397324,  -6.47031983],
       ...,
       [ -8.58011997,  46.7406809 , -50.57077398],
       [ -6.08653257,  45.27366286, -52.64355895],
       [ -4.59167991,  42.83612025, -54.58126628]])

In [16]:
inp1

'M_model_st.pdb1.pdb'

In [28]:
77*363

27951

In [20]:
lig_coord.shape

(69, 3)

In [21]:
rec_coord.shape

(363, 3)

In [24]:
len(rec_res)

363

In [90]:
rec_normal[300]

array([ 0.94733624, -0.31751113, -0.04172218])

In [91]:
rec_curve[300]

array([0.16748599])

In [93]:
rec_normal[150]

array([ 0.88772236,  0.23687235, -0.39476639])

In [92]:
rec_curve[150]

array([0.23911071])

In [89]:
rec_curve.shape

(363, 1)