Skip to content

Commit

Permalink
Merge pull request #95 from oaxiom/master
Browse files Browse the repository at this point in the history
Port to Python 3 and pure python
  • Loading branch information
AliciaSchep committed Dec 13, 2020
2 parents bbb2402 + e1ec144 commit d12d58a
Show file tree
Hide file tree
Showing 31 changed files with 327 additions and 178 deletions.
8 changes: 4 additions & 4 deletions bin/nucleoatac
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ from nucleoatac.cli import nucleoatac_parser, nucleoatac_main


if __name__ == '__main__':
print "Command run: "+ ' '.join(map(str,sys.argv))
print "nucleoatac version " + __version__
print "start run at: " + time.strftime("%Y-%m-%d %H:%M")
print("Command run: "+ ' '.join(map(str,sys.argv)))
print("nucleoatac version " + __version__)
print("start run at: " + time.strftime("%Y-%m-%d %H:%M"))
try:
parser = nucleoatac_parser()
args = parser.parse_args()
nucleoatac_main(args)
print "end run at: " + time.strftime("%Y-%m-%d %H:%M")
print("end run at: " + time.strftime("%Y-%m-%d %H:%M"))
except KeyboardInterrupt:
sys.stderr.write("User interrupted nucleoatac.")
sys.exit(0)
Expand Down
8 changes: 4 additions & 4 deletions bin/pyatac
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ from pyatac.cli import pyatac_parser, pyatac_main


if __name__ == '__main__':
print "Command run: "+ ' '.join(map(str,sys.argv))
print "pyatac version "+__version__
print "start run at: " + time.strftime("%Y-%m-%d %H:%M")
print("Command run: "+ ' '.join(map(str,sys.argv)))
print("pyatac version "+__version__)
print("start run at: " + time.strftime("%Y-%m-%d %H:%M"))
try:
parser = pyatac_parser()
args = parser.parse_args()
pyatac_main(args)
print "end run at: " + time.strftime("%Y-%m-%d %H:%M")
print("end run at: " + time.strftime("%Y-%m-%d %H:%M"))
except KeyboardInterrupt:
sys.stderr.write("User interrupted pyatac.")
sys.exit(0)
Expand Down
4 changes: 2 additions & 2 deletions nucleoatac/NFRCalling.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def findNFRs(self):
if self.chrom in tbx.contigs:
for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()):
nucs.append(int(row[1]))
for j in xrange(1,len(nucs)):
for j in range(1,len(nucs)):
left = nucs[j-1] + 73
right = nucs[j] - 72
if right <= left:
Expand All @@ -106,7 +106,7 @@ def process(self, params):
self.findNFRs()
def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self,name)

35 changes: 17 additions & 18 deletions nucleoatac/NucleosomeCalling.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov):
self.bias_mat.start + offset,
self.bias_mat.end - offset),
vmat.mat,mode = 'valid')[0]
self.vals = self.vals * self.nuc_cov/ self.cov.vals
self.vals = self.vals * self.nuc_cov // self.cov.vals



Expand All @@ -79,7 +79,7 @@ def simulateReads(self):
sim_mat = np.reshape(sim_vect, self.vmat.mat.shape)
return sim_mat
def simulateDist(self, numiters = 1000):
self.scores = map(lambda x: np.sum(self.simulateReads() * self.vmat.mat),range(numiters))
self.scores = [np.sum(self.simulateReads() * self.vmat.mat) for x in range(numiters)]
def analStd(self):
flatv = np.ravel(self.vmat.mat)
var = calculateCov(self.probs, flatv, self.reads)
Expand All @@ -91,8 +91,7 @@ def analMean(self):

def norm(x, v, w, mean):
"""compute values of normal pdf with given mean and sd at values in x"""
norm = (1.0/(np.sqrt(2*np.pi*v)) *
np.exp(-(x - mean)**2/(2*v)))
norm = (1.0/(np.sqrt(2*np.pi*v)) * np.exp(-(x - mean)**2/(2*v)))
norm = norm * (w/max(norm))
return norm

Expand Down Expand Up @@ -139,7 +138,7 @@ def addNorms(x,params):
"""Add several normal distributions together"""
l = len(x)
fit = np.zeros(l)
i = len(params)/3
i = len(params)//3
for j in range(i):
fit += norm(x,params[j*3],params[3*j+1],params[3*j+2])
return fit
Expand All @@ -156,21 +155,21 @@ def fitNorm(guess, bound, sig):
allnucs = nuctrack.sorted_nuc_keys
x = bisect_left(allnucs,index)
if x == 0:
left = index - nuctrack.params.nonredundant_sep/3
means = (nuctrack.params.nonredundant_sep/3,)
left = index - nuctrack.params.nonredundant_sep//3
means = (nuctrack.params.nonredundant_sep//3,)
elif index - allnucs[x-1] < nuctrack.params.nonredundant_sep:
left = allnucs[x-1]
means = (index - allnucs[x-1],0)
else:
left = index - nuctrack.params.nonredundant_sep/3
means = (nuctrack.params.nonredundant_sep/3,)
left = index - nuctrack.params.nonredundant_sep//3
means = (nuctrack.params.nonredundant_sep//3,)
if x == len(allnucs)-1:
right = index + nuctrack.params.nonredundant_sep/3 + 1
right = index + nuctrack.params.nonredundant_sep//3 + 1
elif allnucs[x+1] - index < nuctrack.params.nonredundant_sep:
right = allnucs[x+1]
means += (allnucs[x+1] - left,)
else:
right = index + nuctrack.params.nonredundant_sep/3 +1
right = index + nuctrack.params.nonredundant_sep//3 +1
sig = nuctrack.smoothed.vals[left:right]
sig[sig<0] = 0
if len(means)==1:
Expand Down Expand Up @@ -237,14 +236,14 @@ def __init__(self, chunk):
def initialize(self, parameters):
self.params = parameters
def getFragmentMat(self):
self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1),
self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac)
self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper//2+1),
self.end + max(self.params.window,self.params.upper//2+1), 0, self.params.upper, atac = self.params.atac)
self.mat.makeFragmentMat(self.params.bam)
def makeBiasMat(self):
self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window,
self.end + self.params.window, 0, self.params.upper)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
self.end + self.params.window + self.params.upper/2 + 1, log = True)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2,
self.end + self.params.window + self.params.upper//2 + 1, log = True)
if self.params.fasta is not None:
bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
self.bias_mat.makeBiasMat(bias_track)
Expand Down Expand Up @@ -298,7 +297,7 @@ def findAllNucs(self):
#find peaks in normalized sigal
cands1 = call_peaks(combined, min_signal = 0,
sep = self.params.redundant_sep,
boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2)
boundary = self.params.nonredundant_sep//2, order = self.params.redundant_sep//2)
for i in cands1:
nuc = Nucleosome(i + self.start, self)
if nuc.nuc_cov > self.params.min_reads:
Expand All @@ -310,7 +309,7 @@ def findAllNucs(self):
self.nuc_collection[i] = nuc
self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys()))
self.nonredundant = reduce_peaks( self.sorted_nuc_keys,
map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys),
[self.nuc_collection[x].z for x in self.sorted_nuc_keys],
self.params.nonredundant_sep)
self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant)
def fit(self):
Expand Down Expand Up @@ -340,7 +339,7 @@ def process(self, params):
self.makeInsertionTrack()
def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self,name)

38 changes: 24 additions & 14 deletions nucleoatac/Occupancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from scipy import signal, optimize, stats
import numpy as np
import matplotlib.pyplot as plt
import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()})
from pyatac.fragmentsizes import FragmentSizes
from pyatac.tracks import Track, CoverageTrack
from pyatac.chunk import Chunk
Expand All @@ -23,9 +22,11 @@ class FragmentMixDistribution:
def __init__(self, lower = 0, upper =2000):
self.lower = lower
self.upper = upper

def getFragmentSizes(self, bamfile, chunklist = None):
self.fragmentsizes = FragmentSizes(self.lower, self.upper)
self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist)

def modelNFR(self, boundaries = (35,115)):
"""Model NFR distribution with gamma distribution"""
b = np.where(self.fragmentsizes.get(self.lower,boundaries[1]) == max(self.fragmentsizes.get(self.lower,boundaries[1])))[0][0] + self.lower
Expand Down Expand Up @@ -64,14 +65,15 @@ def gamma_fit(X,o,p):
self.nfr_fit.get(boundaries[1],self.upper)))
nuc[nuc<=0]=min(min(nfr)*0.1,min(nuc[nuc>0])*0.001)
self.nuc_fit = FragmentSizes(self.lower, self.upper, vals = nuc)

def plotFits(self,filename=None):
"""plot the Fits"""
fig = plt.figure()
plt.plot(range(self.lower,self.upper),self.fragmentsizes.get(),
plt.plot(list(range(self.lower,self.upper)),self.fragmentsizes.get(),
label = "Observed")
plt.plot(range(self.lower,self.upper),self.nfr_fit0.get(), label = "NFR Fit")
plt.plot(range(self.lower,self.upper),self.nuc_fit.get(), label = "Nucleosome Model")
plt.plot(range(self.lower,self.upper),self.nfr_fit.get(), label = "NFR Model")
plt.plot(list(range(self.lower,self.upper)),self.nfr_fit0.get(), label = "NFR Fit")
plt.plot(list(range(self.lower,self.upper)),self.nuc_fit.get(), label = "Nucleosome Model")
plt.plot(list(range(self.lower,self.upper)),self.nfr_fit.get(), label = "NFR Model")
plt.legend()
plt.xlabel("Fragment size")
plt.ylabel("Relative Frequency")
Expand Down Expand Up @@ -109,8 +111,8 @@ def calculateOccupancy(inserts, bias, params):
nuc_probs = nuc_probs / np.sum(nuc_probs)
nfr_probs = params.nfr_probs * bias
nfr_probs = nfr_probs / np.sum(nfr_probs)
x = map(lambda alpha: np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs), params.alphas)
logliks = np.array(map(lambda j: np.sum(x[j]*inserts),range(params.l)))
x = [np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs) for alpha in params.alphas]
logliks = np.array([np.sum(x[j]*inserts) for j in range(params.l)])
logliks[np.isnan(logliks)] = -float('inf')
occ = params.alphas[np.argmax(logliks)]
#Compute upper and lower bounds for 95% confidence interval
Expand All @@ -129,11 +131,11 @@ def calculateOccupancyMLE(self, mat, bias_mat, params):
"""Calculate Occupancy track"""
offset=self.start - mat.start
if offset<params.flank:
raise Exception("For calculateOccupancyMLE, mat does not have sufficient flanking regions"),offset
raise Exception("For calculateOccupancyMLE, mat does not have sufficient flanking regions")(offset)
self.vals=np.ones(self.end - self.start)*float('nan')
self.lower_bound = np.ones(self.end - self.start)*float('nan')
self.upper_bound =np.ones(self.end - self.start)*float('nan')
for i in xrange(params.halfstep,len(self.vals),params.step):
for i in range(params.halfstep,len(self.vals),params.step):
new_inserts = np.sum(mat.get(lower = 0, upper = params.upper,
start = self.start+i-params.flank, end = self.start+i+params.flank+1),
axis = 1)
Expand Down Expand Up @@ -190,7 +192,7 @@ def __init__(self, insert_dist, upper, fasta, pwm, sep = 120, min_occ = 0.1, fla
if step%2 == 0:
step = step - 1
self.step = step
self.halfstep = (self.step-1) / 2
self.halfstep = (self.step-1) // 2

class OccChunk(Chunk):
"""Class for calculating occupancy and occupancy peaks
Expand All @@ -201,43 +203,50 @@ def __init__(self, chunk):
self.chrom = chunk.chrom
self.peaks = {}
self.nfrs = []

def getFragmentMat(self):
self.mat = FragmentMat2D(self.chrom, self.start - self.params.flank,
self.end + self.params.flank, 0, self.params.upper)
self.mat.makeFragmentMat(self.params.bam)

def makeBiasMat(self):
self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.flank,
self.end + self.params.flank, 0, self.params.upper)
if self.params.fasta is not None:
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
self.end + self.params.window + self.params.upper/2 + 1, log = True)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2,
self.end + self.params.window + self.params.upper//2 + 1, log = True)
bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
self.bias_mat.makeBiasMat(bias_track)

def calculateOcc(self):
"""calculate occupancy for chunk"""
self.occ = OccupancyTrack(self.chrom,self.start,self.end)
self.occ.calculateOccupancyMLE(self.mat, self.bias_mat, self.params)
self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank/3.0)

def getCov(self):
"""Get read coverage for regions"""
self.cov = CoverageTrack(self.chrom, self.start, self.end)
self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window)

def callPeaks(self):
"""Call peaks of occupancy profile"""
peaks = call_peaks(self.occ.smoothed_vals, sep = self.params.sep, min_signal = self.params.min_occ)
for peak in peaks:
tmp = OccPeak(peak + self.start, self)
if tmp.occ_lower > self.params.min_occ and tmp.reads > 0:
self.peaks[peak] = tmp

def getNucDist(self):
"""Get nucleosomal insert distribution"""
nuc_dist = np.zeros(self.params.upper)
for peak in self.peaks.keys():
for peak in list(self.peaks.keys()):
sub = self.mat.get(start = self.peaks[peak].start-self.params.flank, end = self.peaks[peak].start+1+self.params.flank)
sub_sum = np.sum(sub,axis=1)
sub_sum = sub_sum / float(sum(sub_sum))
nuc_dist += sub_sum
return(nuc_dist)

def process(self, params):
"""proces chunk -- calculat occupancy, get coverage, call peaks"""
self.params = params
Expand All @@ -246,9 +255,10 @@ def process(self, params):
self.calculateOcc()
self.getCov()
self.callPeaks()

def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self, name)

Expand Down
4 changes: 1 addition & 3 deletions nucleoatac/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
#Define version based on setup script
import pkg_resources
__version__ = pkg_resources.require("NucleoATAC")[0].version
__version__ = '0.3.4.py3'
4 changes: 2 additions & 2 deletions nucleoatac/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def nucleoatac_main(args):
print('---------Calling NFR positions--------------------------------------------------')
run_nfr(args)
elif call == "run":
occ_args = parser.parse_args(map(str,['occ','--bed',args.bed,'--bam',args.bam,
occ_args = parser.parse_args(list(map(str,['occ','--bed',args.bed,'--bam',args.bam,
'--fasta', args.fasta, '--pwm', args.pwm,
'--out',args.out,'--cores',args.cores]))
'--out',args.out,'--cores',args.cores])))
vprocess_args = parser.parse_args(['vprocess','--sizes',args.out+'.nuc_dist.txt','--out',args.out])
nuc_args_list = ['nuc','--bed',args.bed,'--bam',args.bam,'--out',args.out,'--cores', str(args.cores),
'--occ_track', args.out + '.occ.bedgraph.gz','--vmat', args.out + '.VMat',
Expand Down
8 changes: 4 additions & 4 deletions nucleoatac/diff_occ.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _diffHelper(arg):
occ.occ, [occ.peaks[i] for i in sorted(occ.peaks.keys())])
occ.removeData()
except Exception as e:
print('Caught exception when processing:\n'+ chunk.asBed()+"\n")
print(('Caught exception when processing:\n'+ chunk.asBed()+"\n"))
traceback.print_exc()
print()
raise e
Expand All @@ -43,7 +43,7 @@ def _writeDiff(pos_queue, out):
for pos in poslist:
pos.write(out_handle)
pos_queue.task_done()
except Exception, e:
except Exception as e:
print('Caught exception when writing occupancy track\n')
traceback.print_exc()
print()
Expand All @@ -57,7 +57,7 @@ def run_diff(args, bases = 500000):
"""
chrs = read_chrom_sizes_from_bam(args.bam)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down))
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down))
chunks.merge()
maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks])))
#get fragmentsizes
Expand All @@ -78,7 +78,7 @@ def run_diff(args, bases = 500000):
diff_process.start()
nuc_dist = np.zeros(args.upper)
for j in sets:
tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params)))
tmp = pool1.map(_occHelper, list(zip(j,itertools.repeat(params))))
for result in tmp:
diff_queue.put(result[1])
pool1.close()
Expand Down
8 changes: 4 additions & 4 deletions nucleoatac/run_nfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _nfrHelper(arg):
out = nfr.nfrs
nfr.removeData()
except Exception as e:
print('Caught exception when processing:\n'+ chunk.asBed()+"\n")
print(('Caught exception when processing:\n'+ chunk.asBed()+"\n"))
traceback.print_exc()
print()
raise e
Expand All @@ -45,7 +45,7 @@ def _writeNFR(pos_queue, out):
for pos in poslist:
pos.write(out_handle)
pos_queue.task_done()
except Exception, e:
except Exception as e:
print('Caught exception when writing occupancy track\n')
traceback.print_exc()
print()
Expand All @@ -59,7 +59,7 @@ def _writeIns(track_queue, out):
for track in iter(track_queue.get, 'STOP'):
track.write_track(out_handle)
track_queue.task_done()
except Exception, e:
except Exception as e:
print('Caught exception when writing insertion track\n')
traceback.print_exc()
print()
Expand Down Expand Up @@ -104,7 +104,7 @@ def run_nfr(args):
ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
ins_process.start()
for j in sets:
tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
tmp = pool1.map(_nfrHelper, list(zip(j,itertools.repeat(params))))
for result in tmp:
if params.ins_track is None:
nfr_queue.put(result[0])
Expand Down
Loading

0 comments on commit d12d58a

Please sign in to comment.