From 972a1911e513003289e88df7d6d6af2ac938ead1 Mon Sep 17 00:00:00 2001 From: singjc Date: Fri, 17 Feb 2023 23:20:58 -0500 Subject: [PATCH 1/3] [ADD] Make dynamic main score selection optional if ss_main_score set to auto --- pyprophet/main.py | 2 +- pyprophet/pyprophet.py | 6 +++--- pyprophet/runner.py | 19 ++++++++++++++----- pyprophet/semi_supervised.py | 7 ++++--- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/pyprophet/main.py b/pyprophet/main.py index 8385e9d..47000b0 100644 --- a/pyprophet/main.py +++ b/pyprophet/main.py @@ -112,7 +112,7 @@ def score(infile, outfile, classifier, xgb_autotune, apply_weights, xeval_fracti PyProphetLearner(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report).run() else: - PyProphetWeightApplier(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, apply_weights, ss_score_filter, color_palette).run() + PyProphetWeightApplier(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, apply_weights, ss_score_filter, color_palette, main_score_selection_report).run() # IPF diff --git a/pyprophet/pyprophet.py b/pyprophet/pyprophet.py index 3f35216..54fe39e 100644 --- a/pyprophet/pyprophet.py +++ b/pyprophet/pyprophet.py @@ -352,10 +352,10 @@ def _build_result(self, table, final_classifier, score_columns, experiment): @profile -def PyProphet(classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report, outfile, level): +def PyProphet(classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report, outfile, level, ss_use_dynamic_main_score): if classifier == "LDA": - return HolyGostQuery(StandardSemiSupervisedLearner(LDALearner(), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette) + return HolyGostQuery(StandardSemiSupervisedLearner(LDALearner(), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level, ss_use_dynamic_main_score), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette) elif classifier == "XGBoost": - return HolyGostQuery(StandardSemiSupervisedLearner(XGBLearner(xgb_hyperparams, xgb_params, xgb_params_space, threads), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette) + return HolyGostQuery(StandardSemiSupervisedLearner(XGBLearner(xgb_hyperparams, xgb_params, xgb_params_space, threads), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level, ss_use_dynamic_main_score), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette) else: raise click.ClickException("Classifier not supported.") diff --git a/pyprophet/runner.py b/pyprophet/runner.py index 27ff0a9..78003a8 100644 --- a/pyprophet/runner.py +++ b/pyprophet/runner.py @@ -194,13 +194,21 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m con.close() return(table) + # Check for auto main score selection + if ss_main_score=="auto": + # Set starting default main score + ss_main_score = "var_xcorr_shape" + use_dynamic_main_score = True + else: + use_dynamic_main_score = False + # Main function if is_sqlite_file(infile): self.mode = 'osw' self.table = read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn) else: self.mode = 'tsv' - self.table = read_tsv(infile) + self.table = read_tsv(infile) self.infile = infile self.outfile = outfile @@ -233,6 +241,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m self.ss_score_filter = ss_score_filter self.color_palette = color_palette self.main_score_selection_report = main_score_selection_report + self.ss_use_dynamic_main_score = use_dynamic_main_score self.prefix = os.path.splitext(outfile)[0] @@ -415,7 +424,7 @@ def save_bin_weights(self, weights, extra_writes): class PyProphetLearner(PyProphetRunner): def run_algo(self): - (result, scorer, weights) = PyProphet(self.classifier, self.xgb_hyperparams, self.xgb_params, self.xgb_params_space, self.xeval_fraction, self.xeval_num_iter, self.ss_initial_fdr, self.ss_iteration_fdr, self.ss_num_iter, self.group_id, self.parametric, self.pfdr, self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0, self.lfdr_truncate, self.lfdr_monotone, self.lfdr_transformation, self.lfdr_adj, self.lfdr_eps, self.tric_chromprob, self.threads, self.test, self.ss_score_filter, self.color_palette, self.main_score_selection_report, self.outfile, self.level).learn_and_apply(self.table) + (result, scorer, weights) = PyProphet(self.classifier, self.xgb_hyperparams, self.xgb_params, self.xgb_params_space, self.xeval_fraction, self.xeval_num_iter, self.ss_initial_fdr, self.ss_iteration_fdr, self.ss_num_iter, self.group_id, self.parametric, self.pfdr, self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0, self.lfdr_truncate, self.lfdr_monotone, self.lfdr_transformation, self.lfdr_adj, self.lfdr_eps, self.tric_chromprob, self.threads, self.test, self.ss_score_filter, self.color_palette, self.main_score_selection_report, self.outfile, self.level, self.ss_use_dynamic_main_score).learn_and_apply(self.table) return (result, scorer, weights) def extra_writes(self): @@ -432,8 +441,8 @@ def extra_writes(self): class PyProphetWeightApplier(PyProphetRunner): - def __init__(self, infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, apply_weights, ss_score_filter, color_palette): - super(PyProphetWeightApplier, self).__init__(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, ss_score_filter, color_palette) + def __init__(self, infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, apply_weights, ss_score_filter, color_palette, main_score_selection_report): + super(PyProphetWeightApplier, self).__init__(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report) if not os.path.exists(apply_weights): raise click.ClickException("Weights file %s does not exist." % apply_weights) if self.mode == "tsv": @@ -481,7 +490,7 @@ def __init__(self, infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb raise def run_algo(self): - (result, scorer, weights) = PyProphet(self.classifier, self.xgb_hyperparams, self.xgb_params, self.xgb_params_space, self.xeval_fraction, self.xeval_num_iter, self.ss_initial_fdr, self.ss_iteration_fdr, self.ss_num_iter, self.group_id, self.parametric, self.pfdr, self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0, self.lfdr_truncate, self.lfdr_monotone, self.lfdr_transformation, self.lfdr_adj, self.lfdr_eps, self.tric_chromprob, self.threads, self.test, self.ss_score_filter, self.color_palette).apply_weights(self.table, self.persisted_weights) + (result, scorer, weights) = PyProphet(self.classifier, self.xgb_hyperparams, self.xgb_params, self.xgb_params_space, self.xeval_fraction, self.xeval_num_iter, self.ss_initial_fdr, self.ss_iteration_fdr, self.ss_num_iter, self.group_id, self.parametric, self.pfdr, self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0, self.lfdr_truncate, self.lfdr_monotone, self.lfdr_transformation, self.lfdr_adj, self.lfdr_eps, self.tric_chromprob, self.threads, self.test, self.ss_score_filter, self.color_palette, self.main_score_selection_report, self.outfile, self.level, self.ss_use_dynamic_main_score).apply_weights(self.table, self.persisted_weights) return (result, scorer, weights) def extra_writes(self): diff --git a/pyprophet/semi_supervised.py b/pyprophet/semi_supervised.py index 0b3b029..4b70c89 100644 --- a/pyprophet/semi_supervised.py +++ b/pyprophet/semi_supervised.py @@ -42,11 +42,11 @@ def learn_randomized(self, experiment, score_columns, working_thread_number): train.rank_by("main_score") params, clf_scores, use_as_main_score = self.start_semi_supervised_learning(train, score_columns, working_thread_number) - + click.echo(f"Info: Using dynamic score set to {self.ss_use_dynamic_main_score}") # Get current main score column name old_main_score_column = [col for col in score_columns if 'main' in col][0] # Only Update if chosen main score column has changed - if use_as_main_score != old_main_score_column: + if use_as_main_score != old_main_score_column and self.ss_use_dynamic_main_score: train, _ = update_chosen_main_score_in_table(train, score_columns, use_as_main_score) train.rank_by("main_score") experiment, score_columns = update_chosen_main_score_in_table(experiment, score_columns, use_as_main_score) @@ -106,7 +106,7 @@ def learn_final(self, experiment): class StandardSemiSupervisedLearner(AbstractSemiSupervisedLearner): - def __init__(self, inner_learner, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level): + def __init__(self, inner_learner, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level, ss_use_dynamic_main_score): assert isinstance(inner_learner, AbstractLearner) AbstractSemiSupervisedLearner.__init__(self, xeval_fraction, xeval_num_iter, test) self.inner_learner = inner_learner @@ -123,6 +123,7 @@ def __init__(self, inner_learner, xeval_fraction, xeval_num_iter, ss_initial_fdr self.main_score_selection_report = main_score_selection_report self.outfile = outfile self.level = level + self.ss_use_dynamic_main_score = ss_use_dynamic_main_score def select_train_peaks(self, train, sel_column, cutoff_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, mapper=None, main_score_selection_report=False, outfile=None, level=None, working_thread_number=None): assert isinstance(train, Experiment) From 1a495666523a7950133896b4b8686de4c493de7e Mon Sep 17 00:00:00 2001 From: singjc Date: Fri, 17 Feb 2023 23:24:19 -0500 Subject: [PATCH 2/3] [UPDATE] help message for ss_main_score --- pyprophet/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyprophet/main.py b/pyprophet/main.py index 47000b0..9cace09 100644 --- a/pyprophet/main.py +++ b/pyprophet/main.py @@ -61,7 +61,7 @@ def type_cast_value(self, ctx, value): @click.option('--ss_initial_fdr', default=0.15, show_default=True, type=float, help='Initial FDR cutoff for best scoring targets.') @click.option('--ss_iteration_fdr', default=0.05, show_default=True, type=float, help='Iteration FDR cutoff for best scoring targets.') @click.option('--ss_num_iter', default=10, show_default=True, type=int, help='Number of iterations for semi-supervised learning step.') -@click.option('--ss_main_score', default="var_xcorr_shape", show_default=True, type=str, help='Main score to start semi-supervised-learning.') +@click.option('--ss_main_score', default="auto", show_default=True, type=str, help='Main score to start semi-supervised-learning. Default is set to auto, meaning each iteration of learning a dynamic main score selection process will occur. If you want to have a set starting main score for each learning iteration, you can set a specifc score, i.e. "var_xcorr_shape"') @click.option('--ss_score_filter', default='', help='Specify scores which should used for scoring. In addition specific predefined profiles can be used. For example for metabolomis data use "metabolomics". Please specify any additional input as follows: "var_ms1_xcorr_coelution,var_library_corr,var_xcorr_coelution,etc."') # Statistics @click.option('--group_id', default="group_id", show_default=True, type=str, help='Group identifier for calculation of statistics.') From 266bab8cb59bcbfdd858f07af2ac7a722c43b742 Mon Sep 17 00:00:00 2001 From: Justin Sing <32938975+singjc@users.noreply.github.com> Date: Fri, 17 Feb 2023 23:33:52 -0500 Subject: [PATCH 3/3] Remove debug echo statement --- pyprophet/semi_supervised.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyprophet/semi_supervised.py b/pyprophet/semi_supervised.py index 4b70c89..87b56cb 100644 --- a/pyprophet/semi_supervised.py +++ b/pyprophet/semi_supervised.py @@ -42,7 +42,7 @@ def learn_randomized(self, experiment, score_columns, working_thread_number): train.rank_by("main_score") params, clf_scores, use_as_main_score = self.start_semi_supervised_learning(train, score_columns, working_thread_number) - click.echo(f"Info: Using dynamic score set to {self.ss_use_dynamic_main_score}") + # Get current main score column name old_main_score_column = [col for col in score_columns if 'main' in col][0] # Only Update if chosen main score column has changed