Skip to content

Commit

Permalink
skip eventalign index
Browse files Browse the repository at this point in the history
  • Loading branch information
ploy-np committed Mar 1, 2021
1 parent 7321ee4 commit 740bb6d
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 14 deletions.
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '0.5.3'
release = '1.0.0'


# -- General configuration ---------------------------------------------------
Expand Down
5 changes: 2 additions & 3 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Each dataset under the ``data`` directory contains the following directories:
* ``bamtx`` : Transcriptome-aligned sequence
* ``nanopolish``: Eventalign files obtained from `nanopolish eventalign <https://nanopolish.readthedocs.io/en/latest/quickstart_eventalign.html>`_

1. Preprocess the data for each data set using ``xpore-dataprep`` (This step will take a while..).::
1. Preprocess the data for each data set using ``xpore-dataprep``. (This step will take approximately 5h for 1 million reads)::

# Within each dataset directory i.e. demo/data/HEK293T-METTL3-KO-rep1 and demo/data/HEK293T-WT-rep1, run
xpore-dataprep \
Expand All @@ -34,8 +34,7 @@ Each dataset under the ``data`` directory contains the following directories:

The output files are stored under ``dataprep`` in each dataset directory:

* ``eventalign.hdf5`` : Merged segments from ``nanopolish eventalign``, stored with the hierarchical keys ``<TRANSCRIPT_ID>/<READ_ID>/events``
* ``eventalign.log`` : Log file
* ``eventalign.index`` : Index file to access ``eventalign.txt``, the output from nanopolish eventalign
* ``data.json`` : Preprocessed data for ``xpore-diffmod``
* ``data.index`` : File index of ``data.json`` for random access per gene
* ``data.readcount`` : Summary of readcounts per gene
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
maintainer_email="naruemon.p@chula.ac.th",
name=__pkg_name__,
license="MIT",
description='xpore is a python package for Nanopore data analysis.',
version='v0.5.6',
description='xpore is a python package for Nanopore data analysis of differential RNA modifications.',
version='v1.0.0',
long_description=README,
long_description_content_type='text/markdown',
url='https://github.com/GoekeLab/xpore',
Expand Down
16 changes: 8 additions & 8 deletions xpore/scripts/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ def get_args():
required.add_argument('--eventalign', dest='eventalign', help='eventalign filepath, the output from nanopolish.',required=True)
required.add_argument('--summary', dest='summary', help='eventalign summary filepath, the output from nanopolish.',required=True)
required.add_argument('--out_dir', dest='out_dir', help='output directory.',required=True)




# Optional
Expand All @@ -36,12 +34,13 @@ def get_args():

# Use customised db
# These arguments will be passed to Genome from pyensembl
optional.add_argument('--customised_genome', dest='customised_genome', help='customised_genome.',default=False,action='store_true')
optional.add_argument('--reference_name', dest='reference_name', help='reference_name.',type=str)
optional.add_argument('--annotation_name', dest='annotation_name', help='annotation_name.',type=str)
optional.add_argument('--gtf_path_or_url', dest='gtf_path_or_url', help='gtf_path_or_url.',type=str)
optional.add_argument('--transcript_fasta_paths_or_urls', dest='transcript_fasta_paths_or_urls', help='transcript_fasta_paths_or_urls.',type=str)
optional.add_argument('--customised_genome', dest='customised_genome', help='if customised genome provided.',default=False,action='store_true')
optional.add_argument('--reference_name', dest='reference_name', help='reference name.',type=str)
optional.add_argument('--annotation_name', dest='annotation_name', help='annotation name.',type=str)
optional.add_argument('--gtf_path_or_url', dest='gtf_path_or_url', help='gtf file path or url.',type=str)
optional.add_argument('--transcript_fasta_paths_or_urls', dest='transcript_fasta_paths_or_urls', help='transcript fasta paths or urls.',type=str)

optional.add_argument('--skip_eventalign_index', dest='skip_eventalign_index', help='skip indexing the eventalign nanopolish output.',default=False,action='store_true')

# parser.add_argument('--features', dest='features', help='Signal features to extract.',type=list,default=['norm_mean'])
optional.add_argument('--genome', dest='genome', help='to run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates',default=False,action='store_true')
Expand Down Expand Up @@ -626,7 +625,8 @@ def main():
misc.makedirs(out_dir) #todo: check every level.

# (1) For each read, combine multiple events aligned to the same positions, the results from nanopolish eventalign, into a single event per position.
parallel_index(eventalign_filepath,summary_filepath,chunk_size,out_dir,n_processes,resume)
if not args.skip_eventalign_index:
parallel_index(eventalign_filepath,summary_filepath,chunk_size,out_dir,n_processes,resume)

# (2) Create a .json file, where the info of all reads are stored per position, for modelling.
if genome:
Expand Down

0 comments on commit 740bb6d

Please sign in to comment.