-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #53 from McTavishLab/JanMerge
Jan merge - pulled commits until Jan 22 from dev.
- Loading branch information
Showing
77 changed files
with
26,345 additions
and
2,296 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from physcraper import wrappers, OtuJsonDict, ConfigObj, IdDicts | ||
import os | ||
import json | ||
|
||
################################# | ||
seqaln = "tests/data/tiny_comb_its/tiny_comb_its.fasta" | ||
mattype = "fasta" | ||
trfn = "tests/data/tiny_comb_its/tiny_comb_its.tre" | ||
schema_trf = "newick" | ||
blacklist = None | ||
workdir="tests/output/addLocal" | ||
|
||
id_to_spn = r"tests/data/tiny_comb_its/nicespl.csv" | ||
otu_jsonfi = "{}/otu_dict.json".format(workdir) | ||
otu_jsonfi_local = "{}/otu_dict_local.json".format(workdir) | ||
|
||
configfi = "tests/data/localblast.config" | ||
threshold=10 | ||
selectby="blast" | ||
downto= None | ||
ingroup_mrca = None | ||
add_unpubl_seq = "tests/data/local_seqs" | ||
id_to_spn_addseq = "tests/data/tipnTOspn_localAdd.csv" | ||
|
||
|
||
if not os.path.exists("{}".format(workdir)): | ||
os.makedirs("{}".format(workdir)) | ||
|
||
conf = ConfigObj(configfi) | ||
ids = IdDicts(conf, workdir=workdir, mrca=ingroup_mrca) | ||
|
||
|
||
if os.path.exists(otu_jsonfi): | ||
print("load json") | ||
otu_json = json.load(open(otu_jsonfi)) | ||
else: | ||
otu_json = OtuJsonDict(id_to_spn, ids) | ||
json.dump(otu_json, open(otu_jsonfi,"w")) | ||
|
||
if os.path.exists(otu_jsonfi_local): | ||
print("load json local") | ||
otu_json_local = json.load(open(otu_jsonfi_local)) | ||
print(otu_json_local) | ||
else: | ||
otu_json_local = OtuJsonDict(id_to_spn_addseq, ids) | ||
json.dump(otu_json_local, open(otu_jsonfi_local,"w")) | ||
print(otu_json_local) | ||
|
||
# print(id_to_spn_addseq_json) | ||
|
||
wrappers.filter_data_run(seqaln, | ||
mattype, | ||
trfn, | ||
schema_trf, | ||
workdir, | ||
threshold, | ||
otu_jsonfi, | ||
configfi, | ||
selectby=selectby, | ||
downtorank=downto, | ||
ingroup_mrca=ingroup_mrca, | ||
add_unpubl_seq=add_unpubl_seq, | ||
id_to_spn_addseq_json=otu_json_local) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,77 @@ | ||
### never have in-line comments in this file | ||
|
||
[blast] | ||
e_value_thresh = 0.001 | ||
#The statistcal cutoff for matches | ||
unmapped = keep | ||
# unmapped remove: remove tips, keep = set to id of mrca | ||
|
||
Entrez.email = xz@xz.com | ||
####### | ||
## BLAST SETTINGS | ||
|
||
[blast] | ||
#Use your email address, please, this is just for NCBI records | ||
Entrez.email = ejmctavish@gmail.com | ||
|
||
#The statistcal cutoff for matches | ||
e_value_thresh = 0.001 | ||
|
||
hitlist_size = 100 | ||
#hitlist_size =5000 | ||
#the max number of matches for each search | ||
# the max number of matches for each blast search | ||
hitlist_size = 25 | ||
|
||
# define location for blast database, should be local for the moment. | ||
location = local | ||
#Options [local, remote] | ||
#Unless you have set up a local blast database, leave as remote | ||
|
||
#url_base = | ||
#default url_base is ncbi, to run on AWS set url here | ||
# if location = local: | ||
localblastdb = /shared/localblastdb_meta/ | ||
#localblastdb = /home/mkandziora/blastdb_ncbi/ | ||
#localblastdb = /home/blubb/local_blast_db/ | ||
# localblastdb path must have '/' | ||
|
||
# if location = remote: | ||
#default url_base is ncbi, to run on AWS set url here | ||
#url_base = | ||
|
||
# number of cores to use | ||
num_threads = 8 | ||
|
||
#Only required if blast location is local | ||
num_threads = 2 | ||
# use Genbank identifier as blast output file, if False it will use the otuID | ||
gb_id_filename = True | ||
|
||
# when to reblast the included sequences | ||
delay = 90 | ||
|
||
####### | ||
## PHYSCRAPER SETTINGS | ||
|
||
[physcraper] | ||
# what to do whith tips from OToL that are not known: unmapped remove: remove tips, keep = set to id of mrca | ||
unmapped = keep | ||
|
||
#This is how much shorter new sequences are allowed to be compared to your original sequence lengths when added to aln. Is used in during the process of adding new seqs as well as removing seq that are too short | ||
seq_len_perc = 0.8 | ||
#This is how much shorter new sequences are alllowed to be compared to your original sequence lengths. | ||
|
||
[ncbi_parser] | ||
nodes_fn = ./tests/data/nodes.dmp | ||
names_fn = ./tests/data/names.dmp | ||
# value that determines how many seq need to be present before the beginning and end of alignment will be trimmed | ||
trim_perc = 0.75 | ||
|
||
# max length for values to add to aln | ||
max_len = 2.5 | ||
|
||
|
||
####### | ||
## INTERNAL PHYSCRAPER SETTINGS | ||
#--------------------------------------------------------------------------------- | ||
#Things below here you should not need to change! | ||
|
||
#Only required if blast location is local | ||
[ncbi_parser] | ||
nodes_fn = ./tests/data/nodes.dmp | ||
names_fn = ./tests/data/names.dmp | ||
|
||
[phylesystem] | ||
location = api | ||
#local or api, leave set to api unless you have installed phylesystem locally | ||
|
||
|
||
[taxonomy] | ||
#You should not need to change any of these! | ||
ott_ncbi = taxonomy/ott_ncbi | ||
get_ncbi_taxonomy = taxonomy/get_ncbi_taxonomy.sh | ||
ncbi_dmp = taxonomy/gi_taxid_nucl.dmp | ||
#acc2taxid = taxonomy/nucl_gb.accession2taxid.gz | ||
#rankedlineages = rankedlineage.dmp.gz | ||
id_pickle = taxonomy/id_dmp.p | ||
#You should not need to change any of these! | ||
|
Oops, something went wrong.