diff --git a/gtdbtk/__init__.py b/gtdbtk/__init__.py index 09f9364..a8a6d5e 100644 --- a/gtdbtk/__init__.py +++ b/gtdbtk/__init__.py @@ -29,4 +29,4 @@ __status__ = 'Production' __title__ = 'GTDB-Tk' __url__ = 'https://github.com/Ecogenomics/GTDBTk' -__version__ = '2.2.5' +__version__ = '2.2.6' diff --git a/gtdbtk/classify.py b/gtdbtk/classify.py index 0eaf682..ad6a915 100644 --- a/gtdbtk/classify.py +++ b/gtdbtk/classify.py @@ -605,13 +605,15 @@ def run(self, for disappearing_genome in disappearing_genomes: disappearing_genomes_file.add_genome(disappearing_genome, tree_iter) - class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(mrca_lowtree, genomes, msa_dict, - percent_multihit_dict,genes, tln_table_summary_file.genomes, - bac_ar_diff, submsa_file_path, red_dict_file.data, - summary_file, pplacer_taxonomy_dict,warning_counter, - high_classification, debug_file, debugopt, - tree_mapping_file, tree_iter, - tree_mapping_dict_reverse) + class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(mrca_lowtree, genomes, + msa_dict,percent_multihit_dict, + genes, tln_table_summary_file.genomes, + bac_ar_diff, submsa_file_path, + red_dict_file.data,summary_file, + pplacer_taxonomy_dict,warning_counter, + high_classification, debug_file, + debugopt,tree_mapping_file, + tree_iter,tree_mapping_dict_reverse) if debugopt: with open(out_dir + '/' + prefix + '_class_level_classification.txt', 'a') as olf: @@ -658,11 +660,15 @@ def run(self, tree_to_process) disappearing_genomes = [seq_id for seq_id in genomes_to_process if seq_id not in pplacer_taxonomy_dict] - class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(tree_to_process, genomes, msa_dict, percent_multihit_dict, - tln_table_summary_file.genomes, - bac_ar_diff, user_msa_file, red_dict_file.data, summary_file, - pplacer_taxonomy_dict,warning_counter, None, - debug_file,skip_ani_screen, debugopt, None, None, None) + class_level_classification, classified_user_genomes,warning_counter = self._parse_tree(tree_to_process, genomes, + msa_dict, percent_multihit_dict, + genes,tln_table_summary_file.genomes, + bac_ar_diff, user_msa_file, + red_dict_file.data, summary_file, + pplacer_taxonomy_dict,warning_counter, + None,debug_file, + debugopt,None, + None, None) # add filtered genomes to the summary file warning_counter = self.add_filtered_genomes_to_summary(align_dir,warning_counter, summary_file, marker_set_id, prefix) diff --git a/gtdbtk/config/config.py b/gtdbtk/config/config.py index 4af87a0..ccccac9 100644 --- a/gtdbtk/config/config.py +++ b/gtdbtk/config/config.py @@ -15,7 +15,7 @@ print('_' * 80 + '\n') print("The 'GTDBTK_DATA_PATH' environment variable is not defined.".center(80) + '\n') print('Please set this variable to your reference data package.'.center(80)) - print('https://github.com/Ecogenomics/GTDBTk#installation'.center(80)) + print('https://ecogenomics.github.io/GTDBTk/installing/index.html'.center(80)) print('=' * 80) sys.exit(1) diff --git a/gtdbtk/external/mash.py b/gtdbtk/external/mash.py index 735e271..fa18bd7 100644 --- a/gtdbtk/external/mash.py +++ b/gtdbtk/external/mash.py @@ -79,13 +79,18 @@ def run(self, qry, ref, mash_d, mash_k, mash_v, mash_s, mash_max_dist, mash_db) self.cpus, max_d=mash_d, mash_v=mash_v) results = mash_dists.read(mash_max_dist) + # mash_db can be moved from filesystem to filesystem, so we need to update the path in the mash_db to + # reflect the new location. + current_ref = {os.path.basename(v): v for v in ref.values()} + # Convert the results back to the accession path_to_qry = {v: k for (k, v) in qry.items()} path_to_ref = {v: k for (k, v) in ref.items()} out = defaultdict(dict) for qry_path, ref_hits in results.items(): for ref_path, hit in ref_hits.items(): - out[path_to_qry[qry_path]][path_to_ref[ref_path]] = hit + current_ref_path = current_ref[ref_path] + out[path_to_qry[qry_path]][path_to_ref[current_ref_path]] = hit return out @@ -149,7 +154,7 @@ def read(self,max_mash_dist=100) -> Dict[str, Dict[str, Tuple[float, float, int, dist, p_val = float(dist), float(p_val) if dist <= max_mash_dist: shared_num, shared_den = int(shared_n), int(shared_d) - out[qry_id][ref_id] = (dist, p_val, shared_num, shared_den) + out[qry_id][os.path.basename(ref_id)] = (dist, p_val, shared_num, shared_den) return out