In [1]:
from Swarm import Swarm
import Helper

import sys
import copy
import time
import argparse
import os

import numpy as np
from scipy import stats

from multiprocessing import Pool
from multiprocessing import cpu_count
from itertools import repeat
from tqdm import tqdm

from Helper import Write_Log

In [2]:
def lossFunction(tar,b):
    #newCost = np.sqrt( (1/b) * np.sum( (tar)**2, axis=1 ) )
    newCost = np.sum( (b-tar)**2)#SSE
    #newCost = np.sqrt(np.sum( (self.dist-tar)**2, axis=1 ))#RMSE
    
    #Heuber
    #delta = 1.0
    #y = tar
    #yHat = b
    #newCost = np.sum(np.where(np.abs(y-yHat) < delta,.5*(y-yHat)**2 , delta*(np.abs(y-yHat)-0.5*delta)))
    
    return newCost

# Prints statistics of the current swarm
def Print_Stats(swarm, contact, pointCount, i, outFilePtr, convFact):
    pers = stats.pearsonr(swarm.gBest[2], contact[:,3])
    spear = stats.spearmanr(swarm.gBest[2], contact[:,3])
    spearIF = stats.spearmanr(swarm.gBest[2], contact[:,2])

    error = np.sqrt( (1/pointCount) * np.sum( (swarm.gBest[2]-contact[:,3])**2 ) )

    print('id: ' + str(swarm.id) + 
        ' itt: ' + str(i) + 
        ' Cost: ' + str(swarm.gBest[1]) + 
        ' Pearson: ' + str(pers[0]) + 
        ' Spearmen: ' + str(spear[0]) +
        ' IFSpear: ' + str(spearIF[0]) +
        ' error: ' + str(error))
    thisOutFilePtr = 'outputFolder/'+outFilePtr +str(convFact)
    

def Write_Stats(swarm, contact, outFilePtr):
    Helper.Write_Output(outFilePtr, swarm.gBest[0])

# Performs one operation and prints statistics of current swarm
def One_Move(ittCount, swarm, contact, pointCount, threshold,  outFilePtr, convFact):
    saveGBestCost = float('inf')
    totTime = 0


    for i in range(ittCount):
        if (i%1000 == 0) and (swarm.gBest is not None):
            #error = np.sqrt( (1/pointCount) * np.sum( (swarm.gBest[2]-contact[:,3])**2 ) )
            error = lossFunction(contact[:,3],swarm.gBest[2])#np.sum( (swarm.gBest[2]-contact[:,3])**2 )
            Print_Stats(swarm, contact, pointCount, i, outFilePtr, convFact)
            
                

            if (np.abs(saveGBestCost - error)) >= threshold:
                saveGBestCost = error
            else:
                return i, totTime

        operation(i, swarm)


    return i

# Performs a single PSO pass: Velocity calculation, update position, get new cost
def operation(i, swarm):
    swarm.Calc_Vel(ittCount,i)
    swarm.Update_Pos(i)
    swarm.Cost()

# Optimizes single swarm
def Optimize(inFilePtr, outFilePtr, convFact,constraint,points,zeroInd):
    dist = 1.0 / (constraint[:,2]**convFact)
    constraint = np.insert(constraint,3, dist ,axis=1)
    
    swarm = Swarm(constraint, len(points), randVal=randRange, swarmCount=swarmCount, zeroInd=zeroInd)

    ittFin = One_Move(ittCount, swarm, constraint, len(points), threshold,  outFilePtr, convFact)
    
    pbar.update(1)
    return (stats.pearsonr(swarm.gBest[2], constraint[:,3])[0], 
                    stats.spearmanr(swarm.gBest[2], constraint[:,3])[0], 
                    lossFunction(constraint[:,3],swarm.gBest[2]),
                    ittFin,
                     swarm.id, swarm)

# Runs in paralel if passed multiple rangeSpace
def Par_Choice(inFilePtr, outFilePtr, alpha):
    contact, points, zeroInd = Helper.Read_Data(inFilePtr, alpha)
    
    bestSwarm = None
    if 1==1:
        convStore = []
        alphas = np.array(range(int(alpha[0]),int(alpha[1]),int(alpha[2])))/100
        pool = Pool(processes=PROC_COUNT)
        pbar = tqdm(total=len(alphas))#progress bar
        swarms = pool.starmap(Optimize,  zip(repeat(inFilePtr), repeat(outFilePtr), 
                                             alphas, 
                                             repeat(contact),repeat(points),repeat(zeroInd)))

        pool.close()
        pool.join()

        #swarms = sorted(swarms, key=lambda x: x[1])
        
        iforapl = 0
        for swarm in swarms:
            print(str(swarm[-1]) + ' ' + str(swarm[1]))
            contact = np.insert(contact,3, 1.0 / (contact[:,2]**alphas[iforapl]) ,axis=1)
            thisOutFile = 'outputFolder/'+outFilePtr+"_alpha_"+str(alphas[iforapl])
            
            Write_Stats(swarm[len(swarm)-1], contact, thisOutFile)
            convStore.append(swarm)
            if (bestSwarm is None) or (swarm[1] > bestSwarm[1]):
                bestSwarm = swarm
                swarmForPDB = swarm[len(swarm)-1]
                bestAlpha = alphas[iforapl]
            iforapl += 1
    else:#single thread
        bestSwarm = Optimize( inFilePtr, outFilePtr, alpha)
    contact = np.insert(contact,3, 1.0 / (contact[:,2]**bestAlpha) ,axis=1)
    outFilePtr = 'outputFolder/'+outFilePtr+"_best"
    print(bestSwarm)
    Write_Stats(swarmForPDB, contact, outFilePtr)

    return bestSwarm

def Full_List( inputFilePtr, outFilePtr , alpha):
    convStore = []
    
    convStore.append(Par_Choice( inputFilePtr, outFilePtr, alpha))
    print("pearson:" + str(convStore[0][0]) + " spearman:"+
          str(convStore[0][1]) + " rmse:" + str(convStore[0][2]))

    #Helper.Write_List(convStore, outFilePtr)
    return convStore

In [4]:
sys.setrecursionlimit(10000)
PROC_COUNT = cpu_count()




rangeSpace = [] # Max scaling factor. Needs to be optimized for each specific dataset. Use two values [one, two] to multithread through a range of those two values at a interval of 5000


# Arguments for running program
# python3 ParticleChromo3D.py <input_data> <other_parameter>
'''parser = argparse.ArgumentParser("ParticleChromo3D")
parser.add_argument("infile", help="Matrix of contacts", type=str)
parser.add_argument("-o","--outfile", help="File to output pdb model [Default ./]", type=str, default="./chr.pdb")

parser.add_argument("-sc","--swarmCount", help="Number of swarms in system [Default 20]", type=int, default=10)
parser.add_argument("-itt","--ittCount", help="Maximum itterations before stop [Default 20000]", type=int, default=30000)
parser.add_argument("-t","--threshold", help="Error threshold before stoping [Default 0.1]", type=float, default=0.000001)
parser.add_argument("-rr","--randRange", help="Range of x,y,z starting coords. Random value bewtween -randRange,randRange [Default 1]", type=float, default=1.0)
#parser.add_argument("-as","--aStep", help="Convert factor step [Default .2]", type=float, default=.2)
#parser.add_argument("-az","--aZero", help="Convert factor step [Default .2]", type=float, default=.2)
#parser.add_argument("-an", help="Convert factor step [Default .2]", type=float, default=.2)

args = parser.parse_args()

if args.infile:
    inFilePtr = args.infile
if args.outfile:
    outFilePtr = args.outfile
if args.swarmCount:
    swarmCount = args.swarmCount
if args.ittCount:
    ittCount = args.ittCount
if args.threshold:
    threshold = args.threshold
if args.randRange:
    randRange = args.randRange
'''
randRange = 1.0
swarmCount = 5
ittCount = 30000
threshold = 0.000001

import time
start = time.time()
print("Seconds since epoch =", start)

if len(rangeSpace) == 0:
    rangeSpace.append(20000)

if len(rangeSpace) > 2 and (rangeSpace[0] == rangeSpace[1]):
    rangeSpace.pop()
    
if not os.path.exists('outputFolder'):
    os.makedirs('outputFolder')

inFilePtr = '../input-and-models/Input/Synthetic/chainDres5_Matrix_noise000.txt'
#inFilePtr = '../input-and-models/Input/HiC/chr22_1mb_matrix.txt'
outFilePtr = './chr'

print(inFilePtr)

fout = inFilePtr + ".stripped"
clean_lines = []
f= open(inFilePtr, "r")
lines = f.readlines()
for l in lines:
    res = str(" ".join(l.split()))
    clean_lines.append(res)
f.close()

with open(fout, "w") as f:
    f.writelines('\n'.join(clean_lines))
f.close()

theseAlphas = np.array([0.1, 2.0, 0.1])*100
theAlphas = np.array(range(int(theseAlphas[0]),int(theseAlphas[1]),int(theseAlphas[2])))/100

outputOfSwarm = Full_List( inFilePtr+".stripped", outFilePtr, theseAlphas)[0]
print(outputOfSwarm)

bestSpearm = outputOfSwarm[1]
bestCost = outputOfSwarm[2]
bestAlpha = theAlphas[outputOfSwarm[4]]
bestPearsonRHO = outputOfSwarm[0]

    
print("Input file: ", inFilePtr)
print("Convert factor:: ",bestAlpha)
print("SSE at best spearman : ", bestCost)    
print("Best Spearman correlation Dist vs. Reconstructed Dist  : ", bestSpearm) 
print("Best Pearson correlation Dist vs. Reconstructed Dist: ", bestPearsonRHO) 
Write_Log("outputFolder/bestAlpha.log", inFilePtr, bestAlpha, bestCost, bestSpearm, bestPearsonRHO)
net = time.time() - start
print("time : ", net)

Seconds since epoch = 1610407323.1461143
../input-and-models/Input/Synthetic/chainDres5_Matrix_noise000.txt


  0%|          | 0/19 [00:00<?, ?it/s]

id: 1 itt: 0 Cost: 69992.20165789896 Pearson: -0.00041553914466656815 Spearmen: 0.00019303946346307116 IFSpear: -0.00019303946346307116 error: 10.711737216403181
id: 1 itt: 0 Cost: 78790.97626959394 Pearson: 0.00024121494186626034 Spearmen: 0.00019303946346307116 IFSpear: -0.00019303946346307116 error: 11.365101621551247
id: 1 itt: 0 Cost: 66395.29397070876 Pearson: -0.0007357827968960638 Spearmen: 0.00019303946346307116 IFSpear: -0.00019303946346307116 error: 10.432868455716129
id: 1 itt: 0 Cost: 83890.06279572596 Pearson: 0.0005706740715463437 Spearmen: 0.00019303946346307116 IFSpear: -0.00019303946346307116 error: 11.7270922699426
id: 1 itt: 0 Cost: 74158.24332192555 Pearson: -8.865364536879417e-05 Spearmen: 0.00019303946346307116 IFSpear: -0.00019303946346307116 error: 11.025919040682334
id: 1 itt: 0 Cost: 89527.02776227913 Pearson: 0.0008966380658539646 Spearmen: 0.00019303946346307116 IFSpear: -0.00019303946346307116 error: 12.114686099211355
id: 1 itt: 0 Cost: 95829.56918770785 

id: 2 itt: 1000 Cost: 238161.3348804312 Pearson: 0.0768114402310158 Spearmen: 0.05945841219356334 IFSpear: -0.05945841219356334 error: 19.759261571141074
id: 2 itt: 1000 Cost: 328730.47244433267 Pearson: 0.0727267391262397 Spearmen: 0.047629888526951325 IFSpear: -0.047629888526951325 error: 23.214271770683833
id: 2 itt: 2000 Cost: 283342.82522411336 Pearson: 0.042179066176231475 Spearmen: 0.02005011492007709 IFSpear: -0.02005011492007709 error: 21.55217935137021
id: 2 itt: 2000 Cost: 237483.22236662608 Pearson: 0.08069792612768581 Spearmen: 0.05982101414381304 IFSpear: -0.05982101414381304 error: 19.731111422777875
id: 2 itt: 2000 Cost: 327541.79433615925 Pearson: 0.07760101709922253 Spearmen: 0.04914180085235919 IFSpear: -0.04914180085235919 error: 23.172262755432808
id: 2 itt: 3000 Cost: 237483.22236662608 Pearson: 0.08069792612768581 Spearmen: 0.05982101414381304 IFSpear: -0.05982101414381304 error: 19.731111422777875
id: 2 itt: 3000 Cost: 326478.7705336866 Pearson: 0.08299807084351

Process ForkPoolWorker-13:
Process ForkPoolWorker-12:
Process ForkPoolWorker-3:
Process ForkPoolWorker-9:
Process ForkPoolWorker-14:
Process ForkPoolWorker-10:
Process ForkPoolWorker-7:
Process ForkPoolWorker-11:
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-6:
Process ForkPoolWorker-4:
Process ForkPoolWorker-2:
Process ForkPoolWorker-15:
Process ForkPoolWorker-5:
Process ForkPoolWorker-16:
Process ForkPoolWorker-1:
Process ForkPoolWorker-8:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):


In [22]:
theAlphas

array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 1.1, 1.2, 1.3,
       1.4, 1.5, 1.6, 1.7, 1.8, 1.9])