Skip to content

Commit

Permalink
Merge pull request #47 from HumanCellAtlas/jx-ss2-group-qc
Browse files Browse the repository at this point in the history
Jx ss2 group qc
  • Loading branch information
samanehsan committed Oct 9, 2018
2 parents 63a7932 + 1d0d928 commit 13b7ebd
Show file tree
Hide file tree
Showing 21 changed files with 556 additions and 6 deletions.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ coverage.xml
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ pandas>=0.22.0
pytest>=3.4.2
pytest-cov>=2.5.1
scipy>=1.0.1
crimson>=0.3.0
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
'sphinx_rtd_theme',
'setuptools_scm',
'scipy>=1.0.0',
'crimson>=0.3.0',

],
entry_points={
'console_scripts': [
Expand All @@ -45,7 +47,8 @@
'CreateCountMatrix = sctools.platform:GenericPlatform.bam_to_count_matrix',
'MergeCountMatrices = sctools.platform:GenericPlatform.merge_count_matrices',
'TagSortBam = sctools.platform:GenericPlatform.tag_sort_bam',
'VerifyBamSort = sctools.platform:GenericPlatform.verify_bam_sort'
'VerifyBamSort = sctools.platform:GenericPlatform.verify_bam_sort',
'GroupQCs = sctools.platform:GenericPlatform.group_qc_outputs'
]
},
classifiers=CLASSIFIERS,
Expand Down
1 change: 1 addition & 0 deletions src/sctools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from . import metrics
from . import platform
from . import consts
from . import groups
from pkg_resources import get_distribution, DistributionNotFound


Expand Down
184 changes: 184 additions & 0 deletions src/sctools/groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""
Group QC outputs
"""

from crimson import picard
import os
import pandas as pd


def write_aggregated_picard_metrics_by_row(file_names, output_name):
"""Command line entrypoint to parse, aggreagete and write Picard row metrics.
Parameters
----------
args:
file_names: array of files. the basename of inputs should be formated
as 'samplename_qc',such as
"samplename_qc.alignment_summary_metrics.txt" and "samplename_qc.insert_size_metrics.txt"
output_name: prefix of output file name without extension.
Returns
----------
return: 0
return if the program completes successfully.
"""
# initial output
metrics = {}
d = pd.DataFrame()
for file_name in file_names:
cell_id = os.path.basename(file_name).split('_qc')[0]
metrics[cell_id] = {}
parsed = picard.parse(file_name)
class_name = parsed['metrics']['class'].split('.')[2]
# Alignment metrics return multiple lines,
# but only output PAIRED-READS/third line
contents = parsed['metrics']['contents']
if class_name == "AlignmentSummaryMetrics":
# parse out PE, R1 and R2
rows = {}
for m in contents:
cat = m['CATEGORY']
rows.update({k + '.' + cat: v for k, v in m.items() if k not in
['SAMPLE', 'LIBRARY', 'READ_GROUP', 'CATEGORY']})
# sometimes(very rare), insertion metrics also return multiple lines
# results to include TANDEM repeats. but we only output the first line.
elif class_name == "InsertSizeMetrics":
# if the element counts is less than 21,
# it means insertion metrics returns multiple line results.
if len(contents) < 21:
rows = contents[0]
else:
rows = contents
else:
# other metrics(so far) only return one line results.
rows = contents
metrics[cell_id].update({
k: rows[k] for k in rows if k not in
['SAMPLE', 'LIBRARY', 'READ_GROUP', 'CATEGORY']
})
df = pd.DataFrame.from_dict(metrics, orient='columns')
df.insert(0, 'Class', class_name)
d = d.append(df)
d_T = d.T
d_T.to_csv(output_name + '.csv')


def write_aggregated_picard_metrics_by_table(file_names, output_name):
"""Command line entrypoint to parse and write Picard table metrics.
Parameters
----------
args:
file_names: array of files.the basename of inputs should be formated as 'samplename_qc'
output_name: prefix of output file name. the basename of outputs
includes the Picard metrics class name.
Returns
----------
return: 0
return if the program completes successfully.
"""
for file_name in file_names:
cell_id = os.path.basename(file_name).split('_qc')[0]
class_name = os.path.basename(file_name).split('.')[1]
parsed = picard.parse(file_name)
dat = pd.DataFrame.from_dict(parsed['metrics']['contents'])
dat.insert(0, 'Sample', cell_id)
dat.to_csv(output_name + "_" + class_name + '.csv', index=False)


def write_aggregated_qc_metrics(file_names, output_name):
"""Command line entrypoint to merge Picard metrics along with RSEM and HISAT2 log
Parameters
----------
args:
file_names: array of files,such as Picard row metric, hisat2 metrics.
output_name: prefix of output file name.
Returns
----------
return: 0
return if the program completes successfully.
"""
df = pd.DataFrame()
for file_name in file_names:
dat = pd.read_csv(file_name, index_col=0)
print(dat.index)
df = pd.concat([df, dat], axis=1, join_axes=[dat.index])
df.to_csv(output_name + '.csv', index=True)


def parse_hisat2_log(file_names, output_name):
"""Command line entrypoint parse, aggreagete and write HISAT2 logs
Parameters
----------
args:
file_names: array of HISAT2 log files. Basename of file indicates
the alignment references 'samplename_qc.log' indicates the genome reference and
'samplename_rsem.log' indicates the transcriptome reference alignment.
output_name: prefix of output file name.
Returns
----------
return: 0
return if the program completes successfully.
"""
metrics = {}
tag = "NONE"
for file_name in file_names:
if '_qc' in file_name:
cell_id = os.path.basename(file_name).split('_qc')[0]
tag = "HISAT2G"
elif '_rsem' in file_name:
cell_id = os.path.basename(file_name).split('_rsem')[0]
tag = "HISAT2T"
with open(file_name) as f:
dat = f.readlines()
d = [x.strip().split(':') for x in dat]
# remove the first row of each section.
d.pop(0)
metrics[cell_id] = {x[0]: x[1].strip().split(' ')[0] for x in d}
df = pd.DataFrame.from_dict(metrics, orient='columns')
df.insert(0, "Class", tag)
df_T = df.T
df_T.to_csv(output_name + '.csv')


def parse_rsem_cnt(file_names, output_name):
"""Command line entrypoint parse, aggreagete and write RSEM cnt
Parameters
----------
args:
file_names: array of RSEM cnt files. The basename of inputs should be
'samplename_rsem.cnt'
output_name: prefix of output file name.
Returns
----------
return: 0
return if the program completes successfully.
"""
metrics = {}
for file_name in file_names:
cell_id = os.path.basename(file_name).split('_rsem')[0]
i = 0
with open(file_name) as f:
while i < 3:
if i == 0:
[N0, N1, N2, N_tot] = f.readline().strip().split(" ")
elif i == 1:
[n_unique, n_multi, n_uncertain] = \
f.readline().strip().split(" ")
elif i == 2:
[n_hits, read_type] = f.readline().strip().split(" ")
i = i+1
metrics[cell_id] = {
"unalignable reads": N0,
"alignable reads": N1,
"filtered reads": N2,
"total reads": N_tot,
"unique aligned": n_unique,
"multiple mapped": n_multi,
"total alignments": n_hits,
"strand": read_type,
"uncertain reads": n_uncertain
}
df = pd.DataFrame.from_dict(metrics, orient='columns')
df.insert(0, "Class", "RSEM")
df_T = df.T
df_T.to_csv(output_name + '.csv')
57 changes: 56 additions & 1 deletion src/sctools/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from itertools import chain

import pysam
from sctools import fastq, bam, metrics, count, consts, gtf
from sctools import fastq, bam, metrics, count, consts, gtf, groups


class GenericPlatform:
Expand All @@ -48,6 +48,8 @@ class GenericPlatform:
construct a compressed sparse row count file from a tagged, aligned bam file
merge_count_matrices()
merge multiple csr-format count matrices into a single csr matrix
group_qc_outputs()
aggregate Picard, HISAT2 and RSME QC statisitics
"""

@classmethod
Expand Down Expand Up @@ -419,6 +421,59 @@ def merge_count_matrices(cls, args: Iterable[str]=None) -> int:

return 0

@classmethod
def group_qc_outputs(cls, args: Iterable[str]=None) -> int:
"""Commandline entrypoint for parsing picard metrics files, hisat2 and rsem statistics log files.
Parameters
----------
args:
file_names: array of files
output_name: prefix of output file name.
metrics_type: Picard, PicardTable, HISAT2, RSEM and Core.
Returns
----------
return: 0
return if the program completes successfully.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"-f",
"--file_names",
dest="file_names",
nargs='+',
required=True,
help="a list of files to be parsed out.")
parser.add_argument(
"-o",
"--output_name",
dest="output_name",
required=True,
help="The output file name")
parser.add_argument(
"-t",
"--metrics_type",
dest="metrics_type",
choices=['Picard', 'PicardTable', 'Core', 'HISAT2', 'RSEM'],
required=True,
help="a list of string to represent metrics types,such Picard, PicardTable, HISAT2,RSEM, Core")

if args is not None:
args = parser.parse_args(args)
else:
args = parser.parse_args()

if args.metrics_type == "Picard":
groups.write_aggregated_picard_metrics_by_row(args.file_names, args.output_name)
elif args.metrics_type == "PicardTable":
groups.write_aggregated_picard_metrics_by_table(args.file_names, args.output_name)
elif args.metrics_type == "Core":
groups.write_aggregated_qc_metrics(args.file_names, args.output_name)
elif args.metrics_type == "HISAT2":
groups.parse_hisat2_log(args.file_names, args.output_name)
elif args.metrics_type == "RSEM":
groups.parse_rsem_cnt(args.file_names, args.output_name)
return 0


class TenXV2(GenericPlatform):
"""Command Line Interface for 10x Genomics v2 RNA-sequencing programs
Expand Down
3 changes: 3 additions & 0 deletions src/sctools/test/data/group_metrics/expected_picard_group.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
,BAD_CYCLES.FIRST_OF_PAIR,BAD_CYCLES.PAIR,BAD_CYCLES.SECOND_OF_PAIR,MEAN_READ_LENGTH.FIRST_OF_PAIR,MEAN_READ_LENGTH.PAIR,MEAN_READ_LENGTH.SECOND_OF_PAIR,PCT_ADAPTER.FIRST_OF_PAIR,PCT_ADAPTER.PAIR,PCT_ADAPTER.SECOND_OF_PAIR,PCT_CHIMERAS.FIRST_OF_PAIR,PCT_CHIMERAS.PAIR,PCT_CHIMERAS.SECOND_OF_PAIR,PCT_PF_READS.FIRST_OF_PAIR,PCT_PF_READS.PAIR,PCT_PF_READS.SECOND_OF_PAIR,PCT_PF_READS_ALIGNED.FIRST_OF_PAIR,PCT_PF_READS_ALIGNED.PAIR,PCT_PF_READS_ALIGNED.SECOND_OF_PAIR,PCT_PF_READS_IMPROPER_PAIRS.FIRST_OF_PAIR,PCT_PF_READS_IMPROPER_PAIRS.PAIR,PCT_PF_READS_IMPROPER_PAIRS.SECOND_OF_PAIR,PCT_READS_ALIGNED_IN_PAIRS.FIRST_OF_PAIR,PCT_READS_ALIGNED_IN_PAIRS.PAIR,PCT_READS_ALIGNED_IN_PAIRS.SECOND_OF_PAIR,PF_ALIGNED_BASES.FIRST_OF_PAIR,PF_ALIGNED_BASES.PAIR,PF_ALIGNED_BASES.SECOND_OF_PAIR,PF_HQ_ALIGNED_BASES.FIRST_OF_PAIR,PF_HQ_ALIGNED_BASES.PAIR,PF_HQ_ALIGNED_BASES.SECOND_OF_PAIR,PF_HQ_ALIGNED_Q20_BASES.FIRST_OF_PAIR,PF_HQ_ALIGNED_Q20_BASES.PAIR,PF_HQ_ALIGNED_Q20_BASES.SECOND_OF_PAIR,PF_HQ_ALIGNED_READS.FIRST_OF_PAIR,PF_HQ_ALIGNED_READS.PAIR,PF_HQ_ALIGNED_READS.SECOND_OF_PAIR,PF_HQ_ERROR_RATE.FIRST_OF_PAIR,PF_HQ_ERROR_RATE.PAIR,PF_HQ_ERROR_RATE.SECOND_OF_PAIR,PF_HQ_MEDIAN_MISMATCHES.FIRST_OF_PAIR,PF_HQ_MEDIAN_MISMATCHES.PAIR,PF_HQ_MEDIAN_MISMATCHES.SECOND_OF_PAIR,PF_INDEL_RATE.FIRST_OF_PAIR,PF_INDEL_RATE.PAIR,PF_INDEL_RATE.SECOND_OF_PAIR,PF_MISMATCH_RATE.FIRST_OF_PAIR,PF_MISMATCH_RATE.PAIR,PF_MISMATCH_RATE.SECOND_OF_PAIR,PF_NOISE_READS.FIRST_OF_PAIR,PF_NOISE_READS.PAIR,PF_NOISE_READS.SECOND_OF_PAIR,PF_READS.FIRST_OF_PAIR,PF_READS.PAIR,PF_READS.SECOND_OF_PAIR,PF_READS_ALIGNED.FIRST_OF_PAIR,PF_READS_ALIGNED.PAIR,PF_READS_ALIGNED.SECOND_OF_PAIR,PF_READS_IMPROPER_PAIRS.FIRST_OF_PAIR,PF_READS_IMPROPER_PAIRS.PAIR,PF_READS_IMPROPER_PAIRS.SECOND_OF_PAIR,READS_ALIGNED_IN_PAIRS.FIRST_OF_PAIR,READS_ALIGNED_IN_PAIRS.PAIR,READS_ALIGNED_IN_PAIRS.SECOND_OF_PAIR,STRAND_BALANCE.FIRST_OF_PAIR,STRAND_BALANCE.PAIR,STRAND_BALANCE.SECOND_OF_PAIR,TOTAL_READS.FIRST_OF_PAIR,TOTAL_READS.PAIR,TOTAL_READS.SECOND_OF_PAIR,MAX_INSERT_SIZE,MEAN_INSERT_SIZE,MEDIAN_ABSOLUTE_DEVIATION,MEDIAN_INSERT_SIZE,MIN_INSERT_SIZE,PAIR_ORIENTATION,READ_PAIRS,STANDARD_DEVIATION,WIDTH_OF_10_PERCENT,WIDTH_OF_20_PERCENT,WIDTH_OF_30_PERCENT,WIDTH_OF_40_PERCENT,WIDTH_OF_50_PERCENT,WIDTH_OF_60_PERCENT,WIDTH_OF_70_PERCENT,WIDTH_OF_80_PERCENT,WIDTH_OF_90_PERCENT,WIDTH_OF_99_PERCENT,ESTIMATED_LIBRARY_SIZE,PERCENT_DUPLICATION,READ_PAIRS_EXAMINED,READ_PAIR_DUPLICATES,READ_PAIR_OPTICAL_DUPLICATES,SECONDARY_OR_SUPPLEMENTARY_RDS,UNMAPPED_READS,UNPAIRED_READS_EXAMINED,UNPAIRED_READ_DUPLICATES,CODING_BASES,CORRECT_STRAND_READS,IGNORED_READS,INCORRECT_STRAND_READS,INTERGENIC_BASES,INTRONIC_BASES,MEDIAN_3PRIME_BIAS,MEDIAN_5PRIME_BIAS,MEDIAN_5PRIME_TO_3PRIME_BIAS,MEDIAN_CV_COVERAGE,NUM_R1_TRANSCRIPT_STRAND_READS,NUM_R2_TRANSCRIPT_STRAND_READS,NUM_UNEXPLAINED_READS,PCT_CODING_BASES,PCT_CORRECT_STRAND_READS,PCT_INTERGENIC_BASES,PCT_INTRONIC_BASES,PCT_MRNA_BASES,PCT_R1_TRANSCRIPT_STRAND_READS,PCT_R2_TRANSCRIPT_STRAND_READS,PCT_RIBOSOMAL_BASES,PCT_USABLE_BASES,PCT_UTR_BASES,PF_ALIGNED_BASES,PF_BASES,RIBOSOMAL_BASES,UTR_BASES,ACCUMULATION_LEVEL,ALIGNED_READS,AT_DROPOUT,GC_DROPOUT,GC_NC_0_19,GC_NC_20_39,GC_NC_40_59,GC_NC_60_79,GC_NC_80_100,READS_USED,TOTAL_CLUSTERS,WINDOW_SIZE
Class,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,AlignmentSummaryMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,InsertSizeMetrics,DuplicationMetrics,DuplicationMetrics,DuplicationMetrics,DuplicationMetrics,DuplicationMetrics,DuplicationMetrics,DuplicationMetrics,DuplicationMetrics,DuplicationMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,RnaSeqMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics,GcBiasSummaryMetrics
test,0.0,0.0,0.0,25.0,25.0,25.0,0.0,0.0,0.0,0.006141,0.006153,0.006165,1.0,1.0,1.0,0.959299,0.956379,0.953459,0.036149,0.033206,0.030245,0.966514,0.969466,0.972435,131124.0,261405.0,130281.0,116063.0,231550.0,115487.0,115095.0,229110.0,114015.0,4650.0,9279.0,4629.0,0.000922,0.000885,0.000849,0.0,0.0,0.0,6.9e-05,5.4e-05,3.8e-05,0.0009,0.000876,0.000852,0.0,0.0,0.0,5479.0,10958.0,5479.0,5256.0,10480.0,5224.0,190.0,348.0,158.0,5080.0,10160.0,5080.0,0.494292,0.501527,0.508806,5479.0,10958.0,5479.0,2725787,207.219528,63,191,33,FR,5067,106.256303,25,49,73,99,127,157,195,267,641,87835,612743.0,0.007156,5080.0,21.0,0.0,4393.0,478.0,320.0,33.0,56934.0,0.0,0.0,0.0,65569.0,101238.0,0.705663,0.680576,0.496023,0.939679,719.0,795.0,60.0,0.2178,0.0,0.250833,0.387284,0.361883,0.474901,0.525099,0.0,0.345311,0.144083,261405.0,273950.0,0.0,37664.0,All Reads,14873,10.733266,1.82225,0.112713,0.817807,1.086361,2.181453,0.143318,ALL,7701,100
3 changes: 3 additions & 0 deletions src/sctools/test/data/group_metrics/test_hisat2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
,Aligned 0 time,Aligned 1 time,Aligned >1 times,Aligned concordantly 1 time,Aligned concordantly >1 times,Aligned concordantly or discordantly 0 time,Aligned discordantly 1 time,Overall alignment rate,Total pairs,Total unpaired reads
Class,HISAT2G,HISAT2G,HISAT2G,HISAT2G,HISAT2G,HISAT2G,HISAT2G,HISAT2G,HISAT2G,HISAT2G
test,478,240,106,4414,652,412,1,95.64%,5479,824
11 changes: 11 additions & 0 deletions src/sctools/test/data/group_metrics/test_hisat2_paired_end_qc.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
HISAT2 summary stats:
Total pairs: 5479
Aligned concordantly or discordantly 0 time: 412 (7.52%)
Aligned concordantly 1 time: 4414 (80.56%)
Aligned concordantly >1 times: 652 (11.90%)
Aligned discordantly 1 time: 1 (0.02%)
Total unpaired reads: 824
Aligned 0 time: 478 (58.01%)
Aligned 1 time: 240 (29.13%)
Aligned >1 times: 106 (12.86%)
Overall alignment rate: 95.64%
3 changes: 3 additions & 0 deletions src/sctools/test/data/group_metrics/test_hisat2_trans.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
,Aligned 0 time,Aligned 1 time,Aligned >1 times,Aligned concordantly 1 time,Aligned concordantly >1 times,Aligned concordantly or discordantly 0 time,Aligned discordantly 1 time,Overall alignment rate,Total pairs,Total unpaired reads
Class,HISAT2T,HISAT2T,HISAT2T,HISAT2T,HISAT2T,HISAT2T,HISAT2T,HISAT2T,HISAT2T,HISAT2T
test,7270,0,0,360,1484,3635,0,33.66%,5479,7270
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
HISAT2 summary stats:
Total pairs: 5479
Aligned concordantly or discordantly 0 time: 3635 (66.34%)
Aligned concordantly 1 time: 360 (6.57%)
Aligned concordantly >1 times: 1484 (27.09%)
Aligned discordantly 1 time: 0 (0.00%)
Total unpaired reads: 7270
Aligned 0 time: 7270 (100.00%)
Aligned 1 time: 0 (0.00%)
Aligned >1 times: 0 (0.00%)
Overall alignment rate: 33.66%

0 comments on commit 13b7ebd

Please sign in to comment.