Skip to content

Commit

Permalink
Modified hisat2 perl script to handle path names including space, and…
Browse files Browse the repository at this point in the history
… further elaborated HISAT2's alignment summary
  • Loading branch information
infphilo committed Jun 1, 2017
1 parent ee8f209 commit da647a7
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 86 deletions.
8 changes: 4 additions & 4 deletions aln_sink.h
Expand Up @@ -1503,15 +1503,15 @@ void AlnSink<index_t>::printAlSumm(
uint64_t tot_al = (met.nconcord_uni + met.nconcord_rep) * 2 + (met.ndiscord) * 2 + met.nunp_0_uni + met.nunp_0_rep + met.nunp_uni + met.nunp_rep;
assert_leq(tot_al, tot_al_cand);
if(newSummary) {
out << "Summary stats:" << endl;
out << "HISAT2 summary stats:" << endl;
if(totpair > 0) {
uint64_t ncondiscord_0 = met.nconcord_0 - met.ndiscord;
out << "\tTotal pairs: " << totpair << endl;
out << "\t\tAligned concordantly 0 time: " << met.nconcord_0 << " ("; printPct(out, met.nconcord_0, met.npaired); out << ")" << endl;
out << "\t\tAligned concordantly or discordantly 0 time: " << ncondiscord_0 << " ("; printPct(out, ncondiscord_0, met.npaired); out << ")" << endl;
out << "\t\tAligned concordantly 1 time: " << met.nconcord_uni1 << " ("; printPct(out, met.nconcord_uni1, met.npaired); out << ")" << endl;
out << "\t\tAligned concordantly >1 times: " << met.nconcord_uni2 << " ("; printPct(out, met.nconcord_uni2, met.npaired); out << ")" << endl;
out << "\t\tAligned discordantly 1 time: " << met.ndiscord << " ("; printPct(out, met.ndiscord, met.nconcord_0); out << ")" << endl;
out << "\t\tAligned discordantly 1 time: " << met.ndiscord << " ("; printPct(out, met.ndiscord, met.npaired); out << ")" << endl;

uint64_t ncondiscord_0 = met.nconcord_0 - met.ndiscord;
out << "\tTotal unpaired reads: " << ncondiscord_0 * 2 << endl;
out << "\t\tAligned 0 time: " << met.nunp_0_0 << " ("; printPct(out, met.nunp_0_0, ncondiscord_0 * 2); out << ")" << endl;
out << "\t\tAligned 1 time: " << met.nunp_0_uni1 << " ("; printPct(out, met.nunp_0_uni1, ncondiscord_0 * 2); out << ")" << endl;
Expand Down
4 changes: 2 additions & 2 deletions diff_sample.h
Expand Up @@ -172,8 +172,8 @@ void calcExhaustiveDC(T i, bool verbose = false, bool sanityCheck = false) {
assert_lt(d2, v);
assert_gt(d1, 0);
assert_gt(d2, 0);
if(!diffs[d1]) diffCnt++; diffs[d1] = true;
if(!diffs[d2]) diffCnt++; diffs[d2] = true;
if(!diffs[d1]) { diffCnt++; diffs[d1] = true; }
if(!diffs[d2]) { diffCnt++; diffs[d2] = true; }
}
}
// Do we observe all possible differences (except 0)
Expand Down
3 changes: 2 additions & 1 deletion doc/sidebar.inc.shtml
Expand Up @@ -371,7 +371,8 @@

<h2>Related Tools</h2>
<div class="box">
<ul>
<ul>
<li><a href="http://www.ccb.jhu.edu/hisat-genotype">HISAT-genotype</a>: Next-generation analysis of human genomes</li>
<li><a href="http://www.ccb.jhu.edu/software/hisat">HISAT</a>: Fast and sensitive spliced alignment</li>
<li><a href="http://bowtie-bio.sourceforge.net/bowtie2">Bowtie2</a>: Ultrafast read alignment</li>
<li><a href="http://www.ccb.jhu.edu/software/tophat">TopHat2</a>: Spliced read mapper for RNA-Seq</li>
Expand Down
38 changes: 21 additions & 17 deletions evaluation/simulation/calculate_read_cost.py
Expand Up @@ -312,7 +312,7 @@ def is_small_exon_junction_read(cigars, min_exon_len = 23):

"""
"""
def extract_single(infilename, outfilename, chr_dic, aligner, debug_dic):
def extract_single(infilename, outfilename, chr_dic, aligner, version, debug_dic):
infile = open(infilename)
outfile = open(outfilename, "w")
prev_read_id = ""
Expand All @@ -338,8 +338,7 @@ def extract_single(infilename, outfilename, chr_dic, aligner, debug_dic):
if read_id != prev_read_id:
num_reads += 1

flag = int(flag)
pos = int(pos)
flag, pos, mapQ = int(flag), int(pos), int(mapQ)
if flag & 0x4 != 0:
prev_read_id = read_id
continue
Expand All @@ -360,7 +359,9 @@ def extract_single(infilename, outfilename, chr_dic, aligner, debug_dic):
if aligner == "hisat2":
if prev_read_id == read_id:
assert prev_NH == NH

if NH == 1 or mapQ == 60:
assert NH == 1 and mapQ == 60

if read_id != prev_read_id:
num_aligned_reads += 1
if aligner == "hisat2" and \
Expand Down Expand Up @@ -435,7 +436,7 @@ def extract_single(infilename, outfilename, chr_dic, aligner, debug_dic):
hisat2_reads, hisat2_0aligned_reads, hisat2_ualigned_reads, hisat2_maligned_reads = 0, 0, 0, 0
for line in open(infilename + ".summary"):
line = line.strip()
if line.startswith("Summary") or \
if line.startswith("HISAT2 summary") or \
line.startswith("Overall"):
continue
category, num = line.split(':')
Expand All @@ -459,7 +460,7 @@ def extract_single(infilename, outfilename, chr_dic, aligner, debug_dic):

"""
"""
def extract_pair(infilename, outfilename, chr_dic, aligner, debug_dic):
def extract_pair(infilename, outfilename, chr_dic, aligner, version, debug_dic):
read_dic = {}
pair_reported = set()

Expand Down Expand Up @@ -497,6 +498,7 @@ def extract_pair(infilename, outfilename, chr_dic, aligner, debug_dic):
canonical_pos1, canonical_pos2 = int(pos1), int(pos2)
left_read = (flag & 0x40 != 0)
pos1 = canonical_pos1
mapQ = int(mapQ)
if flag & 0x4 != 0:
prev_read_id, is_prev_read_left = read_id, left_read
continue
Expand All @@ -522,20 +524,23 @@ def extract_pair(infilename, outfilename, chr_dic, aligner, debug_dic):
assert prev_NH1 == 0 or prev_NH1 == NH
else:
assert prev_NH2 == 0 or prev_NH2 == NH
if NH == 1 or mapQ == 60:
assert NH == 1 and mapQ == 60

unpaired = (flag & 0x8 != 0) or (YT in ["UU", "UP"])
if unpaired:
if left_read not in pair_list:
pair_list.add(left_read)
num_aligned_reads += 1
if aligner == "hisat2" and NH == 1:
num_ualigned_reads += 1
num_ualigned_reads += 1
assert mapQ == 60
else:
if read_id != prev_read_id:
if concordant:
num_conc_aligned_pairs += 1
if aligner == "hisat2" and NH == 1:
num_conc_ualigned_pairs += 1
num_conc_ualigned_pairs += 1
else:
if aligner == "hisat2":
assert YT == "DP"
Expand Down Expand Up @@ -646,27 +651,26 @@ def extract_pair(infilename, outfilename, chr_dic, aligner, debug_dic):

for line in open(infilename + ".summary"):
line = line.strip()
if line.startswith("Summary") or \
if line.startswith("HISAT2 summary") or \
line.startswith("Overall"):
continue
category, num = line.split(':')
num = num.strip()
num = int(num.split(' ')[0])
if category.startswith("Total pairs"):
hisat2_pairs = num
elif category.startswith("Aligned concordantly 0 time"):
hisat2_conc_0aligned_pairs = num
elif category.startswith("Aligned concordantly or discordantly 0 time"):
hisat2_0aligned_pairs = num
elif category.startswith("Aligned concordantly 1 time"):
hisat2_conc_ualigned_pairs = num
elif category.startswith("Aligned concordantly >1 time"):
hisat2_conc_maligned_pairs = num
assert hisat2_pairs == hisat2_conc_0aligned_pairs + hisat2_conc_ualigned_pairs + hisat2_conc_maligned_pairs
elif category.startswith("Aligned discordantly"):
hisat2_disc_aligned_pairs = num
assert hisat2_disc_aligned_pairs <= hisat2_conc_0aligned_pairs
assert hisat2_pairs == hisat2_0aligned_pairs + hisat2_conc_ualigned_pairs + hisat2_conc_maligned_pairs + hisat2_disc_aligned_pairs
elif category.startswith("Total unpaired reads"):
hisat2_reads = num
assert hisat2_reads == (hisat2_conc_0aligned_pairs - hisat2_disc_aligned_pairs) * 2
assert hisat2_reads == hisat2_0aligned_pairs * 2
elif category.startswith("Aligned 0 time"):
hisat2_0aligned_reads = num
elif category.startswith("Aligned 1 time"):
Expand Down Expand Up @@ -1334,7 +1338,7 @@ def calculate_read_cost(verbose):

aligners = [
# ["hisat", "", "", ""],
# ["hisat2", "", "", "205"],
["hisat2", "", "", "205"],
# ["hisat2", "", "snp", "203b"],
# ["hisat2", "", "snp", "205"],
["hisat2", "", "", ""],
Expand Down Expand Up @@ -1811,9 +1815,9 @@ def get_aligner_cmd(RNA, aligner, type, index_type, version, read1_fname, read2_
if not os.path.exists(out_fname2):
debug_dic = {}
if paired:
extract_pair(out_fname, out_fname2, chr_dic, aligner, debug_dic)
extract_pair(out_fname, out_fname2, chr_dic, aligner, version, debug_dic)
else:
extract_single(out_fname, out_fname2, chr_dic, aligner, debug_dic)
extract_single(out_fname, out_fname2, chr_dic, aligner, version, debug_dic)

for readtype2 in readtypes:
if not two_step and readtype != readtype2:
Expand Down

0 comments on commit da647a7

Please sign in to comment.