diff --git a/pyprophet/_config.py b/pyprophet/_config.py index 318c2ed..81e1134 100644 --- a/pyprophet/_config.py +++ b/pyprophet/_config.py @@ -463,6 +463,8 @@ class IPFIOConfig(BaseIOConfig): propagate_signal_across_runs (bool): Propagate signal across runs (requires alignment step). ipf_max_alignment_pep (float): Maximum PEP to consider for good alignments. across_run_confidence_threshold (float): Maximum PEP threshold for propagating signal across runs for aligned features. + use_alignment_candidates (bool): Use FEATURE_MS2_ALIGNMENT_CANDIDATE instead of FEATURE_MS2_ALIGNMENT when available. + min_alignment_mapping_confidence (float): Minimum MAPPING_CONFIDENCE required when using FEATURE_MS2_ALIGNMENT_CANDIDATE. """ ipf_ms1_scoring: bool = True @@ -476,6 +478,8 @@ class IPFIOConfig(BaseIOConfig): propagate_signal_across_runs: bool = False ipf_max_alignment_pep: float = 0.7 across_run_confidence_threshold: float = 0.5 + use_alignment_candidates: bool = False + min_alignment_mapping_confidence: float = 0.5 @classmethod def from_cli_args( @@ -496,6 +500,8 @@ def from_cli_args( propagate_signal_across_runs, ipf_max_alignment_pep, across_run_confidence_threshold, + use_alignment_candidates=False, + min_alignment_mapping_confidence=0.5, ): """ Creates a configuration object from command-line arguments. @@ -517,6 +523,8 @@ def from_cli_args( propagate_signal_across_runs=propagate_signal_across_runs, ipf_max_alignment_pep=ipf_max_alignment_pep, across_run_confidence_threshold=across_run_confidence_threshold, + use_alignment_candidates=use_alignment_candidates, + min_alignment_mapping_confidence=min_alignment_mapping_confidence, ) @@ -719,4 +727,4 @@ class ExportIOConfig(BaseIOConfig): rt_unit: Literal["iRT", "RT"] = "iRT" # TSV/Matrix export options - exclude_decoys: bool = True # Whether to exclude decoy entries from TSV/matrix export (default: True, exclude decoys) \ No newline at end of file + exclude_decoys: bool = True # Whether to exclude decoy entries from TSV/matrix export (default: True, exclude decoys) diff --git a/pyprophet/cli/ipf.py b/pyprophet/cli/ipf.py index ee3a7a5..3a33651 100644 --- a/pyprophet/cli/ipf.py +++ b/pyprophet/cli/ipf.py @@ -96,6 +96,19 @@ type=float, help="Maximum PEP to consider for propagating signal across runs for aligned features.", ) +@click.option( + "--use_alignment_candidates/--no-use_alignment_candidates", + default=False, + show_default=True, + help="Use FEATURE_MS2_ALIGNMENT_CANDIDATE for across-run alignment groups when available.", +) +@click.option( + "--min_alignment_mapping_confidence", + default=0.5, + show_default=True, + type=float, + help="Minimum MAPPING_CONFIDENCE to keep selected candidate alignments when using FEATURE_MS2_ALIGNMENT_CANDIDATE.", +) @click.pass_context @measure_memory_usage_and_time @logger.catch(reraise=True) @@ -114,6 +127,8 @@ def ipf( propagate_signal_across_runs, ipf_max_alignment_pep, across_run_confidence_threshold, + use_alignment_candidates, + min_alignment_mapping_confidence, ): """ Infer peptidoforms after scoring of MS1, MS2 and transition-level data. @@ -147,6 +162,8 @@ def ipf( propagate_signal_across_runs, ipf_max_alignment_pep, across_run_confidence_threshold, + use_alignment_candidates, + min_alignment_mapping_confidence, ) write_logfile( ctx.obj["LOG_LEVEL"], f"{config.prefix}_pyp_ipf.log", ctx.obj["LOG_HEADER"] diff --git a/pyprophet/io/ipf/osw.py b/pyprophet/io/ipf/osw.py index cd8515c..98e570a 100644 --- a/pyprophet/io/ipf/osw.py +++ b/pyprophet/io/ipf/osw.py @@ -299,6 +299,62 @@ def _read_pyp_transition_duckdb(self, con): def _fetch_alignment_features_duckdb(self, con): pep_threshold = self.config.ipf_max_alignment_pep + use_alignment_candidates = self.config.use_alignment_candidates + min_confidence = self.config.min_alignment_mapping_confidence + + if use_alignment_candidates: + if check_duckdb_table(con, "main", "FEATURE_MS2_ALIGNMENT_CANDIDATE"): + logger.info( + "Using FEATURE_MS2_ALIGNMENT_CANDIDATE for across-run alignment groups " + f"with MAPPING_CONFIDENCE >= {min_confidence}." + ) + query = f""" + SELECT + DENSE_RANK() OVER (ORDER BY merged.PRECURSOR_ID, merged.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, + merged.ALIGNMENT_ID, + merged.FEATURE_ID, + merged.PRECURSOR_ID, + merged.FEATURE_TYPE + FROM ( + SELECT DISTINCT + fmac.ALIGNMENT_ID, + fmac.REFERENCE_FEATURE_ID AS FEATURE_ID, + fmac.PRECURSOR_ID, + 'REFERENCE' AS FEATURE_TYPE + FROM osw.FEATURE_MS2_ALIGNMENT_CANDIDATE AS fmac + WHERE fmac.SELECTED = 1 + AND fmac.MAPPING_CONFIDENCE >= {min_confidence} + AND fmac.REFERENCE_FEATURE_ID != fmac.ALIGNED_FEATURE_ID + AND fmac.ALIGNED_FEATURE_ID != -1 + + UNION + + SELECT DISTINCT + fmac.ALIGNMENT_ID, + fmac.ALIGNED_FEATURE_ID AS FEATURE_ID, + fmac.PRECURSOR_ID, + 'QUERY' AS FEATURE_TYPE + FROM osw.FEATURE_MS2_ALIGNMENT_CANDIDATE AS fmac + WHERE fmac.SELECTED = 1 + AND fmac.MAPPING_CONFIDENCE >= {min_confidence} + AND fmac.REFERENCE_FEATURE_ID != fmac.ALIGNED_FEATURE_ID + AND fmac.ALIGNED_FEATURE_ID != -1 + ) AS merged + ORDER BY + ALIGNMENT_GROUP_ID, + CASE merged.FEATURE_TYPE + WHEN 'REFERENCE' THEN 0 + WHEN 'QUERY' THEN 1 + END; + """ + + df = con.execute(query).fetchdf() + return df.rename(columns=str.lower) + + logger.warning( + "Requested FEATURE_MS2_ALIGNMENT_CANDIDATE for IPF propagation, " + "but the table was not found. Falling back to FEATURE_MS2_ALIGNMENT." + ) if not check_duckdb_table( con, "main", "FEATURE_MS2_ALIGNMENT" @@ -534,6 +590,64 @@ def _read_pyp_transition_sqlite(self, con): def _fetch_alignment_features_sqlite(self, con): pep_threshold = self.config.ipf_max_alignment_pep + use_alignment_candidates = self.config.use_alignment_candidates + min_confidence = self.config.min_alignment_mapping_confidence + + if use_alignment_candidates: + if check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT_CANDIDATE"): + logger.info( + "Using FEATURE_MS2_ALIGNMENT_CANDIDATE for across-run alignment groups " + f"with MAPPING_CONFIDENCE >= {min_confidence}." + ) + query = """ + SELECT + DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, + ALIGNMENT_ID, + FEATURE_ID, + PRECURSOR_ID, + FEATURE_TYPE + FROM ( + SELECT DISTINCT + ALIGNMENT_ID, + PRECURSOR_ID, + REFERENCE_FEATURE_ID AS FEATURE_ID, + 'REFERENCE' AS FEATURE_TYPE + FROM FEATURE_MS2_ALIGNMENT_CANDIDATE + WHERE SELECTED = 1 + AND MAPPING_CONFIDENCE >= ? + AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID + AND ALIGNED_FEATURE_ID != -1 + + UNION + + SELECT DISTINCT + ALIGNMENT_ID, + PRECURSOR_ID, + ALIGNED_FEATURE_ID AS FEATURE_ID, + 'QUERY' AS FEATURE_TYPE + FROM FEATURE_MS2_ALIGNMENT_CANDIDATE + WHERE SELECTED = 1 + AND MAPPING_CONFIDENCE >= ? + AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID + AND ALIGNED_FEATURE_ID != -1 + ) AS feature_list + ORDER BY + ALIGNMENT_GROUP_ID, + CASE FEATURE_TYPE + WHEN 'REFERENCE' THEN 0 + WHEN 'QUERY' THEN 1 + END + """ + + df = pd.read_sql_query( + query, con, params=[min_confidence, min_confidence] + ) + return df.rename(columns=str.lower) + + logger.warning( + "Requested FEATURE_MS2_ALIGNMENT_CANDIDATE for IPF propagation, " + "but the table was not found. Falling back to FEATURE_MS2_ALIGNMENT." + ) if not check_sqlite_table( con, "FEATURE_MS2_ALIGNMENT"