In [2]:
#util
import os
import time

#Math/arrays
import numpy as np
import pandas as pd

import optimizationFuncs as optim
import cpso
from importlib import reload

# Plotting
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d 

#File io
#from Bio.PDB import *
from loadFile import *
from makePDB import *

#PSO
from particleSwarmOpt import *

from threeDMaxGrad import *
from myUtils import *

ModuleNotFoundError: No module named 'particleSwarmOpt'

In [2]:
#==========================================================================
# Make directory if it doesn't exist
#--------------------------------------------------------------------------
if not os.path.exists('my_folder'):
    os.makedirs('my_folder')

In [3]:
## constants
inFile = "../../examples/input/chr19.txt"
## load file 
lstCons=loadFile.loadFileFunc(inFile)


this is a sparse matrix
Number of points :  948
numBins  59.0


In [4]:
lstCons

array([[ 0.       ,  1.       ,  7.321659 ],
       [ 0.       , 51.       ,  1.8995112],
       [ 0.       , 53.       ,  2.5754905],
       ...,
       [57.       , 59.       ,  4.9063144],
       [57.       , 58.       , 23.9648   ],
       [58.       , 59.       , 13.488014 ]])

In [1]:
class outputObj:
    def __init__(self, xyzData, outputFile):
        self.xyzData = xyzData
        self.outputFile = outputFile
        
        #defaults
        self.recordName = []
        for i in range(len(xyzData)):
            self.recordName.append('ATOM')
            

In [6]:
from __future__ import division
import random
import math

#--- COST FUNCTION 
# function we are attempting to optimize (minimize)
def func1(x):
    total=0
    for i in range(len(x)):
        total+=x[i]**2
    return total

#--- MAIN 
class Particle:
    def __init__(self,x0):
        self.position_i=[]          # particle position
        self.velocity_i=[]          # particle velocity
        self.pos_best_i=[]          # best position individual
        self.err_best_i=-1          # best error individual
        self.err_i=-1               # error individual

        for i in range(0,num_dimensions):
            self.velocity_i.append(random.uniform(-1,1))
            self.position_i.append(x0[i])

    # evaluate current fitness
    def evaluate(self,costFunc):
        self.err_i=costFunc(self.position_i)

        # check to see if the current position is an individual best
        if self.err_i < self.err_best_i or self.err_best_i==-1:
            self.pos_best_i=self.position_i
            self.err_best_i=self.err_i

    # update new particle velocity
    def update_velocity(self,pos_best_g):
        w=0.5       # constant inertia weight (how much to weigh the previous velocity)
        c1=1        # cognative constant
        c2=2        # social constant

        for i in range(0,num_dimensions):
            r1=random.random()
            r2=random.random()

            vel_cognitive=c1*r1*(self.pos_best_i[i]-self.position_i[i])
            vel_social=c2*r2*(pos_best_g[i]-self.position_i[i])
            self.velocity_i[i]=w*self.velocity_i[i]+vel_cognitive+vel_social

    # update the particle position based off new velocity updates
    def update_position(self,bounds):
        for i in range(0,num_dimensions):
            self.position_i[i]=self.position_i[i]+self.velocity_i[i]

            # adjust maximum position if necessary
            if self.position_i[i]>bounds[i][1]:
                self.position_i[i]=bounds[i][1]

            # adjust minimum position if neseccary
            if self.position_i[i] < bounds[i][0]:
                self.position_i[i]=bounds[i][0]
                
class PSO():
    def __init__(self,costFunc,x0,bounds,num_particles,maxiter):
        global num_dimensions

        num_dimensions=len(x0)
        err_best_g=-1                   # best error for group
        pos_best_g=[]                   # best position for group

        # establish the swarm
        swarm=[]
        for i in range(0,num_particles):
            swarm.append(Particle(x0))

        # begin optimization loop
        i=0
        while i < maxiter:
            #print i,err_best_g
            # cycle through particles in swarm and evaluate fitness
            for j in range(0,num_particles):
                swarm[j].evaluate(costFunc)

                # determine if current particle is the best (globally)
                if swarm[j].err_i < err_best_g or err_best_g == -1:
                    pos_best_g=list(swarm[j].position_i)
                    err_best_g=float(swarm[j].err_i)

            # cycle through swarm and update velocities and position
            for j in range(0,num_particles):
                swarm[j].update_velocity(pos_best_g)
                swarm[j].update_position(bounds)
            i+=1

        # print final results
        print ('FINAL:')
        print (pos_best_g)
        print (err_best_g)

if __name__ == "__PSO__":
    main()

#--- EXECUTE

initial=[5,5]               # initial starting location [x1,x2...]
bounds=[(-10,10),(-10,10)]  # input bounds [(x1_min,x1_max),(x2_min,x2_max)...]
PSO(func1,initial,bounds,num_particles=15,maxiter=30)

FINAL:
[0.00017338026690873372, 0.0005070560141337797]
2.8716651842257957e-07


<__main__.PSO at 0x7f50d61be580>

In [7]:
## this is convert to distance
#CONVERT_FACTOR_R = np.arange(-0.5, .5, .2) # this is alpha and can be looped
CONVERT_FACTOR_R=[0.5]
MAX_ITERATION = 500; # maximum number of iterations

AVG_DIST = 10.0  # an arbitrary distance

n=int(max(max(lstCons[:,0]),max(lstCons[:,1])))+1
print("n= ",n)

bestAlpha= CONVERT_FACTOR_R[0]
lstConsReset = lstCons
for CONVERT_FACTOR in CONVERT_FACTOR_R : 
    lstCons = lstConsReset
    ## Find the average IF
    avgIF = 0.0
    for i in range(len(lstCons)):
        avgIF = avgIF + float(lstCons[i][2])
    avgIF = avgIF/len(lstCons)
    
    maxIF = 0.0
    ## scale average distance to AVG_DIST
    avgDist = 10.0;
    avgAdjIF = 0.0;
    avgAdjCount = 0;
    totalIF = 0;

    for i in range(len(lstCons)):
        x = lstCons[i][0]
        y = lstCons[i][1]
        IF = lstCons[i][2]
        lstCons[i][2] = IF / avgIF  # normallize IF by avgIF
        IF = lstCons[i][2]
        dist = 1/(IF**CONVERT_FACTOR)
        avgDist = avgDist + dist

        totalIF = totalIF +  IF

        if ( IF > maxIF):
            maxIF = lstCons[i][2]
        # Find the adjacent position IF
        if ( abs(x-y)==1):
            avgAdjCount= avgAdjCount+1
            avgAdjIF =  avgAdjIF + IF

    avgDist = avgDist/len(lstCons)
    avgAdjIF = avgAdjIF/avgAdjCount

    maxIF = min(avgAdjIF, maxIF)

    ## TODO Add adjacent if none exist
    i = 0
    
    print('TODO Added missing adjacent constraint...')

    print('Number of constraints: = ', n)
    maxD = 0
    distResultsList= []
    for i in range(len(lstCons)):
        IF = lstCons[i,2];
        dist = AVG_DIST/ ((IF**CONVERT_FACTOR)* avgDist)
        distResultsList.append(dist)
        if (dist > maxD):
            maxD = dist;

    result = np.hstack((lstCons, np.atleast_2d(distResultsList).T))
    lstCons = result
    print('Max distance is: = \n', maxD); 
    
    ## Optimization
    ## Initialize Structure
    #=========================================================================
    smooth_factor = 1e-6 ; # for numerical stability
    LEARNING_RATE = 0.1; # Specify the learning rate
    NEAR_ZERO =0.00001; # used to signify a boundary of convergence
    NUM_FIGURES_OUTPUT=5

    thisStr =  [];
    R = [-0.5,0.5];
    for i in range(n):
        xyz = np.array([random.random(),random.random(),random.random()]) * (R[1]-R[0]) + min(R) 
        thisStr.append(xyz)
    print(xyz)

    ## Variables declaration
    #=========================================================================
    thislen = len(lstCons)

    Sum_Grad = np.zeros(n);
    variables = thisStr;
    oldobj = 0;

    ## Calculate Objective function [ requires variables and derivatives]
    #=========================================================================
    
    tmper2 = threeDMaxGrad.gradientCalculator(threeDMaxGrad(), thislen,variables, lstCons, n, maxIF, dist) # returns the cost and derivative(change)
    change = tmper2[0]
    cost = tmper2[1]

    print(tmper2)

    ## updateVariables [ use current coordinate and derivatives] 
    #=========================================================================
    for i in range(n) :
        Sum_Grad[i] =  Sum_Grad[i] + (sum(change[i,:])**2);
        denum = smooth_factor + Sum_Grad[i];
        adagrad = (LEARNING_RATE * change[i]/np.sqrt(denum));
        variables[i] = variables[i] +  adagrad;

    ## loop until convergence
    #=========================================================================
    count = 0;
    oldobj = cost;

    converge = myUtils.isconvergence(myUtils(), change, cost, NEAR_ZERO);
    while(count < MAX_ITERATION and ~converge ) :

        # Objective function % returns cost and derivative
        tmper2 = threeDMaxGrad.gradientCalculator(threeDMaxGrad(), thislen,variables, lstCons, n, maxIF, dist) # returns the cost and derivative(change)
        change = tmper2[0]
        cost = tmper2[1]
        newobj = cost;
        # update variables with gradient
        for i in range(n):
            Sum_Grad[i-1] =  Sum_Grad[i-1] + (sum(change[i,:]**2))
            denum = smooth_factor + Sum_Grad[i-1];
            adagrad = (LEARNING_RATE * change[i]/np.sqrt(denum));
            variables[i] = variables[i] +  adagrad

        converge = myUtils.isconvergence(myUtils(), change, cost, NEAR_ZERO); # Alternative  converge = abs(newobj - oldobj); 

        oldobj = newobj;
        # visualize structure MAX 20
        #if count in range(0, MAX_ITERATION, int(MAX_ITERATION/NUM_FIGURES_OUTPUT)) :
            #titleStr = 'Iteration ' + str(count) + ' objective function:'+ str(newobj) + '  alpha : ' +CONVERT_FACTOR+'\n';
            #xyzNew = convert2xyz(n,variables);
            #fig = plt.figure()
            #fig.clear(True) 

            #ax = plt.axes(projection ='3d') 
            #ax.plot(xyzNew[:,0],xyzNew[:,1],xyzNew[:,2], 'green');
            #plt.title(titleStr)
            #plt.show()
        count = count + 1; 
    #========================================================================
    # scoring using spearman correlation, pearson correlation and  RMSD     
    #------------------------------------------------------------------------
    # calculate rmse    
    SUM = 0.0;

    Len = int(n * (n - 1) / 2) 
    Dist = np.zeros(Len);
    WishDist = np.zeros(Len);
    count = 1;
    structure = variables;
    for k in range(len(lstCons)):
        i = int(lstCons[k,0]);    j = int(lstCons[k,1]);    IF = lstCons[k,2];  dist = lstCons[k,3];
        # structure distance   
        x1=structure[i][0];  x2=structure[j][0];
        y1=structure[i][1];  y2=structure[j][1];
        z1=structure[i][2];  z2=structure[j][2];

        str_dist = threeDMaxGrad.calEuclidianDist(threeDMaxGrad(), x1,y1,z1,x2,y2,z2 );
        SUM = SUM + ((str_dist - dist)**2);

        # calculate spearman_correlation and Pearson correlation
        if (i != j and IF > 0 and count<=Len  ):   
            Dist[count] = str_dist;
            WishDist[count]= dist;
            count = count + 1

    SUM = SUM / len(lstCons);    
    rmse = np.sqrt(SUM);

    print("RMSE  : ", rmse)

    # let's convert to a dataframe
    df = pd.DataFrame({'Dist': Dist, 'wishDist': WishDist})
    pearsonCoeff = df.corr(method = 'pearson')
    spearmanCoeff = df.corr(method = 'spearman')

    print("pearson coeff : \n", pearsonCoeff)
    print("spearman Coeff : \n", spearmanCoeff)
    
    if CONVERT_FACTOR == CONVERT_FACTOR_R[0]:#first run
        bestSpearmanRHO = pearsonCoeff.iloc[1]['wishDist']
        bestMat =  variables
    elif bestSpearmanRHO < pearsonCoeff.iloc[1]['wishDist']:
        bestSpearmanRHO = pearsonCoeff.iloc[1]['wishDist']
        bestMat =  variables
        bestAlpha = CONVERT_FACTOR
print("\n\n",bestAlpha)

n=  60
TODO Added missing adjacent constraint...
Number of constraints: =  60
Max distance is: = 
 19.398909922884858
[-0.47918745 -0.00667439 -0.42193481]
(array([[ 0.07212223, -0.03105362,  0.00804577],
       [-0.04264614,  0.03842459,  0.00739319],
       [ 0.04743272,  0.0570234 , -0.02129045],
       ...,
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ]]), -248.06345725114713)
RMSE  :  4.589452487709661
pearson coeff : 
               Dist  wishDist
Dist      1.000000  0.844531
wishDist  0.844531  1.000000
spearman Coeff : 
               Dist  wishDist
Dist      1.000000  0.870188
wishDist  0.870188  1.000000


 0.5


In [8]:
#========================================================================
# create pdb
#------------------------------------------------------------------------
#Increase structure Size

xyz4pdb = myUtils.convert2xyz(myUtils(), n, bestMat) 
scale=100/np.amax(xyz4pdb)
xyz4pdb = xyz4pdb* scale
#output pdb file
outputData= np.round_(xyz4pdb ,3 )

outputFile = 'yolo.pdb' #output directory.


if os.path.exists(outputFile):
    os.remove(outputFile)

output = outputObj(outputData,outputFile)

pdbMaker = makePDB()
makePDB.mat2pdb(pdbMaker, output) # Converts the mat XYZ coordinates to PDB format.

outputting PDB in file :  yolo.pdb
done! closing file...



In [9]:
print("\n\n",bestAlpha)



 0.5
