Skip to content
Permalink
Browse files

Merge branch 'develop'

  • Loading branch information...
bchen4
bchen4 committed Mar 24, 2015
2 parents f53c34b + d8521c5 commit 31610932f9d2d2b2ac149c9a9ebc74b362977ee7
Showing with 124 additions and 17 deletions.
  1. +1 −1 lib/CLIP.py
  2. +60 −13 lib/Enrich.py
  3. +1 −0 lib/ZTNB_tryCatch.R
  4. +5 −1 lib/pipeclip.py
  5. +55 −0 lib/runR1.sh
  6. +2 −2 pipeclip.py
@@ -104,7 +104,7 @@ def readfile(self):
self.refInfo = zip(self.originalBAM.references,self.originalBAM.lengths)
return True
except IOError,message:
logging.error("Cannot open input file"+message)
logging.error("Cannot open input file")
return False

# def printFilteredReads(self):
@@ -130,25 +130,22 @@ def KMvalue_test(clip,mutations,chr,chrlen):
try:
K = poswig[item.start]
except:
#log.warning("Did not find mutation in poswig")
continue
elif strand == "-":
try:
K = negwig[item.start]
except:
continue
if K>=M:
item.updateK(K)

pair_name = str(K)+"_"+str(M)
if clip.kmpair.has_key(pair_name):
clip.kmpair[pair_name] += 1
else:
clip.kmpair[pair_name] = 1
if K>=M:
item.updateK(K)
pair_name = str(K)+"_"+str(M)
if clip.kmpair.has_key(pair_name):
clip.kmpair[pair_name] += 1
else:
clip.kmpair[pair_name] = 1
posBAM.close()
negBAM.close()
del posBAM
del negBAM
gc.collect()

def uniq(b): #b is a list
uniqElements = []
@@ -164,6 +161,7 @@ def mutationEnrich(clip,threshold=0.01):
mutations = []
total_test = 0
for chr,chrlen in clip.refInfo:
#logging.debug(chr)
try:
mufile = open(clip.outprefix+"."+chr+".mutations.bed")
except:
@@ -175,10 +173,12 @@ def mutationEnrich(clip,threshold=0.01):
new_mu = Alignment.MutationBed(info[0],int(info[1]),int(info[2]),info[3],int(info[4]),info[5],info[6])
mutations.append(new_mu)
try:
os.remove(clip.outprefix+"."+chr+".mutations.bed")
pass
#os.remove(clip.outprefix+"."+chr+".mutations.bed")
except:
pass
KM_test = KMvalue_test(clip,mutations,chr,chrlen)#check after doing KM, if clip.mutations changed
logging.debug(len(mutations))
KMvalue_test(clip,mutations,chr,chrlen)#check after doing KM, if clip.mutations changed
try:
os.remove(clip.posfilteredBAM)
os.remove(clip.negfilteredBAM)
@@ -366,6 +366,53 @@ def clusterEnrich(clip,threshold=0.01):
else:
return True

def clusterEnrich_outsource(clip, threshold=0.01):
cluster_filename = clip.outprefix+".clusters.bed"#clip.filepath.split("/")[-1].split(".")[0]
#Call R code and get result
#epsilon = [0.01,0.15,0.1]
#step = [0.1,0.08,0.05]
sh_args = ['sh','lib/runR1.sh',cluster_filename,str(threshold)]
p = subprocess.Popen(sh_args)
stdout_value = p.communicate()[0]

#check ztnb file
try:
enrich_parameter = open(cluster_filename+".pipeclip.ztnb","r")
except IOError,message:
logging.error("Cannot open ztnb result file")
return False
nbDic = {}
for item in enrich_parameter:
buf = item.rstrip().split("\t")
if buf[0]!="#":
nb_key = "_".join(buf[0:2]) #reads_length as key
#logging.debug("NB_key %s" % nb_key)
if not nbDic.has_key(nb_key):
nbDic[nb_key]=(buf[2],buf[3])#pvalue and qvalue
#logging.info("There are %d read-length pairs" % (len(nbDic.keys())))
if len(nbDic.keys())==0:
logging.error("There are no read-length pairs found by ZTNB. Exit.")
return False
else:
for i in range(len(clip.clusters)):
r_key = str(clip.clusters[i].score)+"_"+str(clip.clusters[i].stop-clip.clusters[i].start)
#logging.debug("Keys from clip.clusters,%s" % r_key)
if nbDic.has_key(r_key):
clip.clusters[i].pvalue = nbDic[r_key][0]
clip.clusters[i].qvalue = nbDic[r_key][1]
clip.clusters[i].sig = True
clip.sigClusterCount += 1
nbDic = None
try:
os.remove(cluster_filename)
except:
pass
if clip.sigClusterCount == 0:
return False
else:
return True


def fisherTest(clusterp,mutationp):
R = robject.r
min_mp = min(mutationp)
@@ -156,6 +156,7 @@ if((biggest_likelihood==-99999999) && (khat==0) && (muhat ==0))
} else {
# model parameters
errmsg = paste("Y:Coverged for this run.",errmsg,sep="\n");
write(errmsg,paste(args[1],".Converge.txt",sep=""))
nbsize = khat * mu;
nbmu = muhat * mu;
nb.pvalue[cdx] = pnbinom(tread_fit - 1, size = nbsize, mu = nbmu, lower.tail = FALSE) / (1-dnbinom(0, size = nbsize, mu = nbmu))
@@ -12,12 +12,15 @@
import logging
from subprocess import call
import os
import gc
import CLIP
import Alignment
import Utils
import Enrich
import OptValidator

gc.enable()

def prepare_argparser():
description = "Find mutations"
epilog = "For command line options of each command, type %(prog)s COMMAND -h"
@@ -34,7 +37,7 @@ def prepare_argparser():
return(argparser)

def runPipeClip(infile,outputPrefix,matchLength,mismatch,rmdup,fdrEnrichedCluster,clipType,fdrReliableMutation,species):
myClip = CLIP.CLIP(infile)
myClip = CLIP.CLIP(infile,outputPrefix)
logging.info("Start to run")
if myClip.testInput():#check input
logging.info("Input file OK,start to run PIPE-CLIP")
@@ -44,6 +47,7 @@ def runPipeClip(infile,outputPrefix,matchLength,mismatch,rmdup,fdrEnrichedCluste
myClip.printMutations()
if len(myClip.clusters)>0:
logging.info("Get enriched clusters")
gc.collect()
status = Enrich.clusterEnrich(myClip,fdrEnrichedCluster)
if status:
logging.info("Found %d enriched clusters" % myClip.sigClusterCount)
@@ -0,0 +1,55 @@
#!/bin/sh
filename=$1
pvalue=$2
#declare -a epsilon
#declare -a steps
epsilon=(0.01 0.15 0.1)
steps=(0.1 0.08 0.05)
#sleep 500
#status=0

#check if file is generated
#while ["$status" == "0"]
#do
# if [-p "$filename"]
# then
# status = 1
# filesize = $(stat -c%s "$filename")
# else
# sleep 60
# fi
#done
#
##make sure file does not change
#filestat = 0
#while ["$filestat"=="0"]
#do
# currentsize = $(stat -c%s "$filename")
# if ["$filesize" == "$currentsize"]
# filestat = 1
# else
# sleep 60
# fi
#done

#Call R function
r_status="1"
count=1
for e in "${epsilon[@]}"
do
for s in "${steps[@]}"
do
echo "$e,$s"
if [ -s "$filename.Converge.txt" ]
then
echo
else
#echo "$e,$s"
Rscript lib/ZTNB_tryCatch.R $filename $pvalue $e $s
fi
#echo "$filename.$count"
count=$((count+1))
done
done


@@ -30,7 +30,7 @@ def prepare_argparser():

def runPipeClip(infile,control,outputPrefix,matchLength,mismatch,rmdup,fdrEnrichedCluster,clipType,fdrReliableMutation,species):
myClip = CLIP.CLIP(infile,outputPrefix)
contrlFlag = False
controlFlag = False
if control != None:
controlClip = CLIP.CLIP(control,outputPrefix+"Control")
logging.info("Start to run")
@@ -53,7 +53,7 @@ def runPipeClip(infile,control,outputPrefix,matchLength,mismatch,rmdup,fdrEnrich
#myClip.printMutations()
if myClip.clusterCount>0:
logging.info("Get enriched clusters")
status = Enrich.clusterEnrich(myClip,fdrEnrichedCluster)
status = Enrich.clusterEnrich_outsource(myClip,fdrEnrichedCluster)
if status:
logging.info("Found %d enriched clusters" % myClip.sigClusterCount)
myClip.printEnrichedClusters()

0 comments on commit 3161093

Please sign in to comment.
You can’t perform that action at this time.