Skip to content

Commit

Permalink
Merge pull request #495 from Ecogenomics/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
pchaumeil committed Mar 23, 2023
2 parents 4a922a7 + a2d71f4 commit 21bd14d
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 16 deletions.
2 changes: 1 addition & 1 deletion gtdbtk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@
__status__ = 'Production'
__title__ = 'GTDB-Tk'
__url__ = 'https://github.com/Ecogenomics/GTDBTk'
__version__ = '2.2.5'
__version__ = '2.2.6'
30 changes: 18 additions & 12 deletions gtdbtk/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,13 +605,15 @@ def run(self,
for disappearing_genome in disappearing_genomes:
disappearing_genomes_file.add_genome(disappearing_genome, tree_iter)

class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(mrca_lowtree, genomes, msa_dict,
percent_multihit_dict,genes, tln_table_summary_file.genomes,
bac_ar_diff, submsa_file_path, red_dict_file.data,
summary_file, pplacer_taxonomy_dict,warning_counter,
high_classification, debug_file, debugopt,
tree_mapping_file, tree_iter,
tree_mapping_dict_reverse)
class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(mrca_lowtree, genomes,
msa_dict,percent_multihit_dict,
genes, tln_table_summary_file.genomes,
bac_ar_diff, submsa_file_path,
red_dict_file.data,summary_file,
pplacer_taxonomy_dict,warning_counter,
high_classification, debug_file,
debugopt,tree_mapping_file,
tree_iter,tree_mapping_dict_reverse)

if debugopt:
with open(out_dir + '/' + prefix + '_class_level_classification.txt', 'a') as olf:
Expand Down Expand Up @@ -658,11 +660,15 @@ def run(self,
tree_to_process)

disappearing_genomes = [seq_id for seq_id in genomes_to_process if seq_id not in pplacer_taxonomy_dict]
class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(tree_to_process, genomes, msa_dict, percent_multihit_dict,
tln_table_summary_file.genomes,
bac_ar_diff, user_msa_file, red_dict_file.data, summary_file,
pplacer_taxonomy_dict,warning_counter, None,
debug_file,skip_ani_screen, debugopt, None, None, None)
class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(tree_to_process, genomes,
msa_dict, percent_multihit_dict,
genes,tln_table_summary_file.genomes,
bac_ar_diff, user_msa_file,
red_dict_file.data, summary_file,
pplacer_taxonomy_dict,warning_counter,
None,debug_file,
debugopt,None,
None, None)
# add filtered genomes to the summary file
warning_counter = self.add_filtered_genomes_to_summary(align_dir,warning_counter, summary_file, marker_set_id, prefix)

Expand Down
2 changes: 1 addition & 1 deletion gtdbtk/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
print('_' * 80 + '\n')
print("The 'GTDBTK_DATA_PATH' environment variable is not defined.".center(80) + '\n')
print('Please set this variable to your reference data package.'.center(80))
print('https://github.com/Ecogenomics/GTDBTk#installation'.center(80))
print('https://ecogenomics.github.io/GTDBTk/installing/index.html'.center(80))
print('=' * 80)
sys.exit(1)

Expand Down
9 changes: 7 additions & 2 deletions gtdbtk/external/mash.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,18 @@ def run(self, qry, ref, mash_d, mash_k, mash_v, mash_s, mash_max_dist, mash_db)
self.cpus, max_d=mash_d, mash_v=mash_v)
results = mash_dists.read(mash_max_dist)

# mash_db can be moved from filesystem to filesystem, so we need to update the path in the mash_db to
# reflect the new location.
current_ref = {os.path.basename(v): v for v in ref.values()}

# Convert the results back to the accession
path_to_qry = {v: k for (k, v) in qry.items()}
path_to_ref = {v: k for (k, v) in ref.items()}
out = defaultdict(dict)
for qry_path, ref_hits in results.items():
for ref_path, hit in ref_hits.items():
out[path_to_qry[qry_path]][path_to_ref[ref_path]] = hit
current_ref_path = current_ref[ref_path]
out[path_to_qry[qry_path]][path_to_ref[current_ref_path]] = hit
return out


Expand Down Expand Up @@ -149,7 +154,7 @@ def read(self,max_mash_dist=100) -> Dict[str, Dict[str, Tuple[float, float, int,
dist, p_val = float(dist), float(p_val)
if dist <= max_mash_dist:
shared_num, shared_den = int(shared_n), int(shared_d)
out[qry_id][ref_id] = (dist, p_val, shared_num, shared_den)
out[qry_id][os.path.basename(ref_id)] = (dist, p_val, shared_num, shared_den)
return out


Expand Down

0 comments on commit 21bd14d

Please sign in to comment.