-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #58 from yuukiiwa/master
replace if statement with KeyError in g2t mapping, add post-processing option, and add xpore wrapper
- Loading branch information
Showing
7 changed files
with
120 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = "1.0" | ||
__version__ = "1.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import os | ||
|
||
def run_postprocessing(diffmod_table_path,out_dir): | ||
file=open(diffmod_table_path,"r") | ||
header=file.readline() | ||
entries=file.readlines() | ||
outfile_path=os.path.join(out_dir,"majority_direction_kmer_diffmod.table") | ||
outfile=open(outfile_path,"w") | ||
outfile.write(header) | ||
header=header.strip().split(',') | ||
kmer_ind,dir_ind=header.index('kmer'),header.index('mod_assignment') | ||
dict={} | ||
for ln in entries: | ||
l=ln.strip().split(",") | ||
if l[kmer_ind] not in dict: | ||
dict[l[kmer_ind]]={l[dir_ind]:1} | ||
else: | ||
if l[dir_ind] not in dict[l[kmer_ind]]: | ||
dict[l[kmer_ind]][l[dir_ind]]=1 | ||
else: | ||
dict[l[kmer_ind]][l[dir_ind]]+=1 | ||
for k in dict: | ||
if len(dict[k]) > 1: ##consider one modification type per k-mer | ||
if dict[k]['higher'] <= dict[k]['lower']: ##choose the majority | ||
dict[k]['choose']='lower' | ||
else: | ||
dict[k]['choose']='higher' | ||
else: | ||
dict[k]['choose']=list(dict[k].keys())[0] | ||
for ln in entries: | ||
l=ln.strip().split(",") | ||
if l[dir_ind] == dict[l[kmer_ind]]['choose']: | ||
outfile.write(ln) | ||
outfile.close() | ||
|
||
def postprocessing(args): | ||
diffmod_dir = args.diffmod_dir | ||
diffmod_table_path = os.path.join(diffmod_dir,"diffmod.table") | ||
run_postprocessing(diffmod_table_path,diffmod_dir) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import sys | ||
|
||
from .dataprep import dataprep | ||
from .diffmod import diffmod | ||
from .postprocessing import postprocessing | ||
|
||
def parse_options(argv): | ||
|
||
"""Parses options from the command line """ | ||
|
||
from argparse import ArgumentParser | ||
from xpore import __version__ | ||
|
||
parser = ArgumentParser(prog='xpore') | ||
subparsers = parser.add_subparsers(help='Running modes', metavar='{dataprep, diffmod, postprocessing}') | ||
parser.add_argument('-v', '--version', action='version', version='%(prog)s {version}'.format(version=__version__)) | ||
|
||
### RUN MODE "DATAPREP" | ||
parser_dataprep = subparsers.add_parser('dataprep', help='run mode to preprocess nanopolish eventalign.txt before differential modification analysis') | ||
optional_dataprep = parser_dataprep._action_groups.pop() | ||
required_dataprep = parser_dataprep.add_argument_group('required arguments') | ||
# Required arguments | ||
required_dataprep.add_argument('--eventalign', dest='eventalign', help='eventalign filepath, the output from nanopolish.',required=True) | ||
##required.add_argument('--summary', dest='summary', help='eventalign summary filepath, the output from nanopolish.',required=True) | ||
required_dataprep.add_argument('--out_dir', dest='out_dir', help='output directory.',required=True) | ||
optional_dataprep.add_argument('--gtf_path_or_url', dest='gtf_path_or_url', help='gtf file path or url.',type=str) | ||
optional_dataprep.add_argument('--transcript_fasta_paths_or_urls', dest='transcript_fasta_paths_or_urls', help='transcript fasta paths or urls.',type=str) | ||
# Optional arguments | ||
optional_dataprep.add_argument('--skip_eventalign_indexing', dest='skip_eventalign_indexing', help='skip indexing the eventalign nanopolish output.',default=False,action='store_true') | ||
# parser.add_argument('--features', dest='features', help='Signal features to extract.',type=list,default=['norm_mean']) | ||
optional_dataprep.add_argument('--genome', dest='genome', help='to run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates',default=False,action='store_true') | ||
optional_dataprep.add_argument('--n_processes', dest='n_processes', help='number of processes to run.',type=int, default=1) | ||
optional_dataprep.add_argument('--chunk_size', dest='chunk_size', help='number of lines from nanopolish eventalign.txt for processing.',type=int, default=1000000) | ||
optional_dataprep.add_argument('--readcount_min', dest='readcount_min', help='minimum read counts per gene.',type=int, default=1) | ||
optional_dataprep.add_argument('--readcount_max', dest='readcount_max', help='maximum read counts per gene.',type=int, default=1000) | ||
optional_dataprep.add_argument('--resume', dest='resume', help='with this argument, the program will resume from the previous run.',default=False,action='store_true') #todo | ||
parser_dataprep._action_groups.append(optional_dataprep) | ||
parser_dataprep.set_defaults(func=dataprep) | ||
|
||
### RUN MODE "DIFFMOD" | ||
parser_diffmod = subparsers.add_parser('diffmod', help='run mode to perform differential modification analysis') | ||
optional_diffmod = parser_diffmod._action_groups.pop() | ||
required_diffmod = parser_diffmod.add_argument_group('required arguments') | ||
# Required arguments | ||
required_diffmod.add_argument('--config', dest='config', help='yaml configuraion filepath.',required=True) | ||
# Optional arguments | ||
optional_diffmod.add_argument('--n_processes', dest='n_processes', help='number of processes to run.',type=int,default=1) | ||
optional_diffmod.add_argument('--save_models', dest='save_models', help='with this argument, the program will save the model parameters for each id.',default=False,action='store_true') # todo | ||
optional_diffmod.add_argument('--resume', dest='resume', help='with this argument, the program will resume from the previous run.',default=False,action='store_true') | ||
optional_diffmod.add_argument('--ids', dest='ids', help='gene / transcript ids to model.',default=[],nargs='*') | ||
parser_diffmod._action_groups.append(optional_diffmod) | ||
parser_diffmod.set_defaults(func=diffmod) | ||
|
||
### RUN MODE "POSTPROCESSING" | ||
parser_postprocessing = subparsers.add_parser('postprocessing', help='run mode to post process diffmod.table') | ||
required_postprocessing = parser_postprocessing.add_argument_group('required arguments') | ||
# Required arguments | ||
required_postprocessing.add_argument('--diffmod_dir', dest='diffmod_dir', help='diffmod directory path, the output from xpore-diffmod.',required=True) | ||
parser_postprocessing.set_defaults(func=postprocessing) | ||
|
||
return parser.parse_args(argv[1:]) | ||
|
||
def main(argv=sys.argv): | ||
|
||
### get command line options | ||
options = parse_options(argv) | ||
options.func(options) | ||
|
||
if __name__ == "__main__": | ||
main(sys.argv) |