Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/McGill-CSB/RNApyro
Browse files Browse the repository at this point in the history
  • Loading branch information
Vladimir REINHARZ committed May 27, 2013
2 parents 578a798 + 3e52221 commit 6a5c9bc
Show file tree
Hide file tree
Showing 42 changed files with 329 additions and 67 deletions.
Binary file added scripts/Figs_Bench/10fold-a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/10fold-b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/118id_5fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/118id_5fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/121id_10fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/130id_5fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/130id_5fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/148id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/15fold-a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/15fold-b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/170id_5fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/170id_5fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/177id_10fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/198id_5fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/198id_5fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/200id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/209id_10fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/209id_10fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/20id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/26id_5fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/26id_5fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/270id_5fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/270id_5fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/298id_5fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/298id_5fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/316id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/332id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/360id_15fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/360id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/361id_10fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/368id_15fold_a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/368id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/5fold-a.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/5fold-b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/76id_15fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/77id_10fold_b.pdf
Binary file not shown.
Binary file added scripts/Figs_Bench/9id_10fold_b.pdf
Binary file not shown.
100 changes: 66 additions & 34 deletions scripts/createROC.py
Expand Up @@ -21,27 +21,30 @@ def loadMutationalProfiles(path):
fileHandler.close()
return seq,struct,nbMut,profiles

BASES = ["A","C","G","U"]
BASES = ["a","c","g","u"]

def classifyProfile(profile,seq,struct,mode):
def decide(classes,b,mutchar,seqchar,prob):
if b!=mutchar:
if b!=seqchar:
classes.append((0,prob))
else:
classes.append((1,prob))

def classifyProfile(profile,seq,struct,mode,mutseq):
result = []
mutseq = mutseq.lower()
seq = seq.lower()
for i in range(len(profile)):
if mode == "a":
for j in range(len(profile[i])):
prob = profile[i][j]
b = BASES[j]
if b.lower() == seq[i].lower():
result.append((1,prob))
else:
result.append((0,prob))
b = BASES[j].lower()
decide(result,b,mutseq[i],seq[i],prob)
elif mode == "b" and struct[i]!=".":
for j in range(len(profile[i])):
prob = profile[i][j]
b = BASES[j]
if b.lower() == seq[i].lower():
result.append((1,prob))
else:
result.append((0,prob))
decide(result,b,mutseq[i],seq[i],prob)
elif mode == "c" and struct[i]!=".":
bestB,probB = "",0.
for j in range(len(profile[i])):
Expand All @@ -50,21 +53,17 @@ def classifyProfile(profile,seq,struct,mode):
if prob > probB:
bestB,probB = b,prob
for j in range(len(profile[i])):
prob = profile[i][j]
b = BASES[j]
if (b.lower() == seq[i].lower()):
cl=1
else:
cl=0
if b.lower() == bestB.lower():
result.append((cl,prob))
decide(result,b,mutseq[i],seq[i],prob)
else:
result.append((cl,0.0))
decide(result,b,mutseq[i],seq[i],0.)
#print result
return result


def evaluatePrediction(profile,seq,nbMut):
ok = 0.
seq = seq.lower()
for i in range(len(profile)):
bestB,probB = "",0.
for j in range(len(profile[i])):
Expand All @@ -74,50 +73,83 @@ def evaluatePrediction(profile,seq,nbMut):
bestB,probB = b,prob
if bestB==seq[i]:
ok += 1
return ok-(len(seq)-nbMut)
return nbMut-(len(seq)-ok)

def getMutatedPositions(seq,mutseq):
result = set()
if len(seq)!= len(mutseq):
raise Exception("Mutated and original sequence lengths do not match!")
seq = seq.lower()
mutseq = mutseq.lower()
for i in range(len(seq)):
if (seq[i]!=mutseq[i]):
result.add(i)
return result

def computePlotROCs(path,mode):
seq,struc,nbMut,profiles = loadMutationalProfiles(path)
def computePlotROCs(paths,mode,mutpaths):
allalphas = set()
exps = []
pylab.rc('text', usetex=True)
for i in range(len(paths)):
path = paths[i]
mutpath = mutpaths[i]
mutseq = open(mutpath,"r").readlines()[0]
seq,struc,nbMut,profiles = loadMutationalProfiles(path)
mutpos = getMutatedPositions(seq,mutseq)
exps.append((path,mutseq,seq,struc,nbMut,profiles,mutpos))
allalphas.update(profiles.keys())
rocs = []
lbls = []
pylab.rc('text', usetex=True)
alphas = profiles.keys()
alphas = list(allalphas)
alphas.sort()
for alpha in alphas:
c = classifyProfile(profiles[alpha],seq,struc,mode)
delta = evaluatePrediction(profiles[alpha],seq,nbMut)
c = []
delta = 0
for (path,mutseq,seq,struc,nbMut,profiles,mutpos) in exps:
c += classifyProfile(profiles[alpha],seq,struc,mode,mutseq)
delta += evaluatePrediction(profiles[alpha],seq,len(mutpos))
roc = ROCData(c)
rocs.append(roc)
lbls.append("$\\alpha=%.1f,$ {\\sf AUC} $=%.1f\\%%$"%(alpha,(100.*roc.auc(0))))
print "File:%s, Mode:%s, Alpha:%s, ROC AUC:%.2f, Delta:%s" % (path.split(os.sep)[-1],mode,alpha,(100.*roc.auc(0)),delta)
plt = plot_multiple_roc(rocs,"", lbls)
if len(paths)==1:
cap = paths[0].split(os.sep)[-1]
else:
cap = "Multiple"
print "%s\t%s\t%s\t%.2f\t%s\t%s\t%s" % (cap,mode,alpha,(100.*roc.auc(0)),delta,nbMut,len(mutpos))
plt = plot_multiple_roc(rocs,"", lbls)
return plt


def listConversionCallback(option, opt, value, parser):
setattr(parser.values, option.dest, value.split(','))

if __name__ == '__main__':
print "createROC - ROC Curve Generator from RNAPyro profiles"
#print "createROC - ROC Curve Generator from RNAPyro profiles"


from optparse import OptionParser
parser = OptionParser()
parser.add_option('-f', '--file', dest='origFile', help="Path to a file with the mutational profile.")
parser.add_option('-d', '--dir', dest='origDir', help="Path to a directory with mutational profiles (full generation mode).")
parser.add_option("-o",'--output', dest= 'outpath' , default='out.pdf' , help = 'Destination (must be PDF).')
parser.add_option("-m",'--mode', dest= 'mode' , default='a' , help = 'Evaluation mode: "a" (default) - Conservative prediction, predicts multiple mutations per position, anywhere in the sequence; "b" - Multiple mutations per positions, only in helices; "c" - Single mutations, only in helices')

parser.add_option("-f", "--file", dest="origFile",type='string',action='callback',callback=listConversionCallback,help="Path to a (set of) file(s) with the mutational profile.",default=[])
parser.add_option("-k",'--muts', dest= 'mutpath' ,type='string',action='callback',callback=listConversionCallback, help = 'Path to a (set of) mutated sequence(s).')

(options,args) = parser.parse_args()
if (not options.origFile) and (not options.origDir) :
if ((not options.origFile) and (not options.origDir)) or (not options.mutpath) :
parser.print_help()
exit()

#print options.origFile, options.mutpath
if options.origFile:
plt = computePlotROCs(options.origFile,options.mode)
plt = computePlotROCs(options.origFile,options.mode,options.mutpath)
plt.savefig(options.outpath, format='pdf',bbox_inches='tight')
elif options.origDir:
for f in os.listdir(options.origDir):
if (f.endswith(".txt")):
basename = f[:-4]
for m in ["a","b","c"]:
fname = options.origDir+os.sep+basename+"-"+m+".pdf"
plt = computePlotROCs(options.origDir+os.sep+f,m)
plt = computePlotROCs(options.origDir+os.sep+f,m,options.mutpath)
plt.savefig(fname, format='pdf',bbox_inches='tight')

76 changes: 46 additions & 30 deletions scripts/jcb_graphs.py
Expand Up @@ -3,40 +3,49 @@
import cPickle
from subprocess import call

def ind(m='a'):
def ind(modes):
print "File\tMode\tAlpha\tAUC\tDelta\t#MutEst\t#MutReal"
with open(os.path.join('..','16S','key_to_Bench.txt')) as f:
key = eval(f.readline())
d_key = {x[0]:x[1:] for x in key}
with open(os.path.join('..','16S','d_clusters_trim.cPickle')) as f:
with open(os.path.join('..','16S','d_clusters_trim.cPickle'),'rb') as f:
data = cPickle.load(f)


mutsfiles = {}
for f_name in os.listdir(os.path.join('..','16S','Mut')):
pos = f_name.find("id")
idnum = int(f_name[:pos])
mutsfiles[idnum] = os.path.join('..','16S','Mut',f_name)
for f_name in os.listdir(os.path.join('..','16S','Bench_iso')):
sys.stderr.write("%s\n"%f_name)
if f_name.endswith('mut'):
continue
id = int(f_name.split('.')[0])
seq = d_key[id][0]
fold = d_key[id][-1]
mut = data[seq]['mut'][fold]
mut_file = mutsfiles[id]
try:
os.mkdir('Figs_Bench')
except OSError:
pass
cmd = ['python',
'createROC.py',
'-f', '%s' % os.path.join('..','16S','Bench_iso',f_name),
'-o', '%s' % os.path.join('Figs_Bench','%sid_%sfold_%s.pdf' % (id, fold,m)),
'-m', '%s' % m]
print cmd
call(cmd)

def batch(m='a'):
for m in modes:
cmd = ['python',
'createROC.py',
'-f', '%s' % os.path.join('..','16S','Bench_iso',f_name),
'-o', '%s' % os.path.join('Figs_Bench','%sid_%sfold_%s.pdf' % (id, fold,m)),
'-k', '%s' % mut_file,
'-m', '%s' % m]
#print cmd
call(cmd)

def batch(modes):
print "File\tMode\tAlpha\tAUC\tDelta\t#MutEst\t#MutReal"
with open(os.path.join('..','16S','key_to_Bench.txt')) as f:
key = eval(f.readline())
d_key = {x[0]:x[1:] for x in key}
with open(os.path.join('..','16S','d_clusters_trim.cPickle')) as f:
with open(os.path.join('..','16S','d_clusters_trim.cPickle'),'rb') as f:
data = cPickle.load(f)

l_f_name = os.listdir(os.path.join('..','16S','Bench_iso'))
l_id = set([x for x in l_f_name if not x.endswith('mut')])
d_fold = {}
Expand All @@ -48,32 +57,39 @@ def batch(m='a'):
d_fold[fold].append(id)

for fold in d_fold:
to_do = d_fold[fold]
for m in modes:
to_do = d_fold[fold]

cmd = ['python',
'createROC.py',
'-f']
for x in to_do:
cmd.append(os.path.join('..','16S','Bench_iso',x))
cmd = ['python',
'createROC.py',
'-f']
flist = []
for x in to_do:
flist.append(os.path.join('..','16S','Bench_iso',x))
cmd.extend([",".join(flist)])

cmd.extend(['-o', '%s' % os.path.join('Figs_Bench','%sfold.pdf' % (fold)),
'-m', '%s' % m])
print cmd
continue
call(cmd)
cmd.extend(["-k"])
flist = []
for x in to_do:
id = int(x[:x.find(".txt")])
flist.append(os.path.join('..','16S','Mut',"%sid_%sfold.mut"%(id,fold)))
cmd.extend([",".join(flist)])
cmd.extend(['-o', '%s' % os.path.join('Figs_Bench','%sfold-%s.pdf' % (fold,m)),
'-m', '%s' % m])
call(cmd)


if __name__ == '__main__':
m = 'a'
modes = ['a','b']
batch_flag = False
for i, opt in enumerate(sys.argv):
if opt == '-m':
m = sys.argv[i + 1]
modes = [sys.argv[i + 1]]
elif opt == '-b':
batch_flag = True


if batch_flag:
batch(m)
batch(modes)
else:
ind(m)
ind(modes)
6 changes: 3 additions & 3 deletions scripts/pyroc.py
Expand Up @@ -98,14 +98,14 @@ def plot_multiple_roc(rocList,title='',labels=None, include_baseline=False, equa
if equal_aspect:
cax = pylab.gca()
cax.set_aspect('equal')
pylab.xlabel("1 - Specificity")
pylab.ylabel("Sensitivity")
pylab.xlabel("1 - Specificity", fontsize=16)
pylab.ylabel("Sensitivity", fontsize=16)
pylab.title(title)
if not labels:
labels = [ '' for x in rocList]
_remove_duplicate_styles(rocList)
for ix, r in enumerate(rocList):
pylab.plot([x[0] for x in r.derived_points], [y[1] for y in r.derived_points], r.linestyle, linewidth=1.5, label=labels[ix])
pylab.plot([x[0] for x in r.derived_points], [y[1] for y in r.derived_points], r.linestyle, marker=None,linewidth=1.5, label=labels[ix])
if include_baseline:
pylab.plot([0.0,1.0], [0.0, 1.0], 'k-', label= 'random')
if labels:
Expand Down
25 changes: 25 additions & 0 deletions scripts/summary16sBatch.dat
@@ -0,0 +1,25 @@
File Mode Alpha AUC Delta #MutEst #MutReal Fold
Multiple a 0.0 78.54 -3.0 27 5 5
Multiple a 0.5 68.43 -204.0 27 5
Multiple a 0.8 67.64 -195.0 27 5
Multiple a 1.0 67.32 -206.0 27 5
Multiple b 0.0 99.76 -3.0 27 5
Multiple b 0.5 93.06 -204.0 27 5
Multiple b 0.8 92.79 -195.0 27 5
Multiple b 1.0 92.51 -206.0 27 5
Multiple a 0.0 83.51 23.0 73 67 15
Multiple a 0.5 81.15 -127.0 73 67
Multiple a 0.8 79.53 -137.0 73 67
Multiple a 1.0 77.55 -172.0 73 67
Multiple b 0.0 97.78 23.0 73 67
Multiple b 0.5 95.22 -127.0 73 67
Multiple b 0.8 93.15 -137.0 73 67
Multiple b 1.0 91.14 -172.0 73 67
Multiple a 0.0 77.40 7.0 18 7 10
Multiple a 0.5 78.33 -103.0 18 7
Multiple a 0.8 76.70 -115.0 18 7
Multiple a 1.0 75.46 -137.0 18 7
Multiple b 0.0 93.72 7.0 18 7
Multiple b 0.5 93.26 -103.0 18 7
Multiple b 0.8 92.55 -115.0 18 7
Multiple b 1.0 93.04 -137.0 18 7

0 comments on commit 6a5c9bc

Please sign in to comment.