# Debugging notebook for alphaDIA
This notebooks allows you to run a search in a jupyter notebook and debug the results.
It allows you to debug a search on the workflow level as well on the peakgroup level

In [None]:
%reload_ext autoreload
%autoreload 2

import os

from alphadia import search_step
from alphadia.workflow.peptidecentric import peptidecentric

os.environ["NUMBA_BOUNDSCHECK"] = "1"
os.environ["NUMBA_DEVELOPER_MODE"] = "1"
os.environ["NUMBA_FULL_TRACEBACKS"] = "1"

In [None]:
import tempfile

test_folder = tempfile.mkdtemp()

# uncomment the following line to use a specific folder
# test_folder = "/Users/georgwallmann/Documents/data/alphadia_performance_tests"
#
# library_path
#
# raw_data_path_list = [
#   "/Users/georgwallmann/Documents/data/alphadia_performance_tests/20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_05.raw",
#   "/Users/georgwallmann/Documents/data/alphadia_performance_tests/20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_06.raw",
#   "/Users/georgwallmann/Documents/data/alphadia_performance_tests/20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_07.raw",
# ]

# HeLa library as used in the getting started guide
library_url = "https://datashare.biochem.mpg.de/s/Uw2yfNSbApfPpTk"

# Bulk injections of HeLa cell lysate acquired on the Orbitrap Astral
raw_data_url_list = [
    "https://datashare.biochem.mpg.de/s/339jg5HtGrwLwDN/download?files=20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_05.raw",
    "https://datashare.biochem.mpg.de/s/339jg5HtGrwLwDN/download?files=20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_06.raw",
    "https://datashare.biochem.mpg.de/s/339jg5HtGrwLwDN/download?files=20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_07.raw",
]

from alphabase.tools.data_downloader import DataShareDownloader

library_path = DataShareDownloader(library_url,test_folder).download()
raw_data_path_list = [DataShareDownloader(url,test_folder).download() for url in raw_data_url_list]

In [None]:
config = {
    "general": {
        "reuse_calibration": True,
        "reuse_quant": False,
        "thread_count": 10,
    },
    "search": {
        "target_num_candidates": 2,
        "target_ms1_tolerance": 4,
        "target_ms2_tolerance": 7,
        "target_rt_tolerance": 200,
    },
    "raw_paths": raw_data_path_list,
    "library_path": library_path,
}
output_folder = test_folder + "/output"
step = search_step.SearchStep(output_folder, config=config)

In [None]:
for raw_name, dia_path, speclib in step.get_run_data():
    pass

In [None]:
workflow = peptidecentric.PeptideCentricWorkflow(
    raw_name,
    step.config,
)
workflow.load(dia_path, speclib)
workflow.search_parameter_optimization()

In [None]:
workflow.extraction()

In [None]:
test_df = workflow.spectral_library.precursor_df.sample(1000)

In [None]:
from alphadia.search.selection.config_df import CandidateSelectionConfig
from alphadia.search.selection.selection import CandidateSelection

config = CandidateSelectionConfig()
config.update(workflow.config["selection_config"])
config.update(
    {
        "rt_tolerance": workflow.optimization_manager.rt_error,
        "mobility_tolerance": workflow.optimization_manager.mobility_error,
        "candidate_count": workflow.optimization_manager.num_candidates,
        "precursor_mz_tolerance": workflow.optimization_manager.ms1_error,
        "fragment_mz_tolerance": workflow.optimization_manager.ms2_error,
        "exclude_shared_ions": workflow.config["search"]["exclude_shared_ions"],
    }
)

extraction = CandidateSelection(
    workflow.dia_data,
    test_df,
    workflow.spectral_library.fragment_df,
    config,
    rt_column=f"rt_{workflow.optimization_manager.column_type}",
    precursor_mz_column=f"mz_{workflow.optimization_manager.column_type}",
    fragment_mz_column=f"mz_{workflow.optimization_manager.column_type}",
    mobility_column=f"mobility_library",
    fwhm_rt=workflow.optimization_manager.fwhm_rt,
    fwhm_mobility=workflow.optimization_manager.fwhm_mobility,
)
candidates_df = extraction(thread_count=workflow.config["general"]["thread_count"])

In [None]:
from alphadia.search.scoring.config import CandidateConfig
from alphadia.search.scoring.scoring import CandidateScoring

config = CandidateConfig()
config.update(workflow.config["scoring_config"])
config.update(
    {
        "precursor_mz_tolerance": workflow.optimization_manager.ms1_error,
        "fragment_mz_tolerance": workflow.optimization_manager.ms2_error,
        "exclude_shared_ions": workflow.config["search"]["exclude_shared_ions"],
    }
)

candidate_scoring = CandidateScoring(
    workflow.dia_data,
    workflow.spectral_library._precursor_df,
    workflow.spectral_library._fragment_df,
    config=config,
    rt_column=f"rt_{workflow.optimization_manager.column_type}",
    mobility_column=f"mobility_{workflow.optimization_manager.column_type}",
    precursor_mz_column=f"mz_{workflow.optimization_manager.column_type}",
    fragment_mz_column=f"mz_{workflow.optimization_manager.column_type}",
)

features_df, fragments_df = candidate_scoring(
    candidates_df,
    thread_count=workflow.config["general"]["thread_count"],
    debug=True,
)

## Multistep Search

In [None]:
from alphadia.search_plan import SearchPlan

config = {
    "raw_paths": raw_data_path_list,
    "library_path": library_path,
}

plan = SearchPlan(output_folder, config=config)
plan.run_plan()