In [2]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from alphabase.spectral_library.base import SpecLibBase
from alphadia import data, planning
from alphadia.workflow import manager, peptidecentric



In [5]:
performance_test_folder = "/Users/georgwallmann/Documents/data/performance_tests"

MODE = "astral"

if MODE == "astral":
    raw_files = [
        os.path.join(
            performance_test_folder,
            "raw_data/astral_lf_dia/20230815_OA1_SoSt_SA_Whisper40_ADIAMA_HeLa_5ng_8Th14ms_FAIMS-40_1900V_noLoopCount_01.raw",
        )
    ]
    output_location = os.path.join(performance_test_folder, "outputs/astral_lf_dia")
    speclib = os.path.join(
        performance_test_folder,
        "libraries/astral/48_fraction_hela_PaSk_orbitrap_ms2.hdf",
    )
elif MODE == "timstof":
    raw_files = [
        os.path.join(
            performance_test_folder,
            "raw_data/timstof_lf_diap/20230502_TIMS05_PaSk_SA_HeLa_21min_diaP_12scans_S2-A3_1_2089.d",
        )
    ]
    output_location = os.path.join(
        performance_test_folder, "outputs/timstof_lf_diaPASEF"
    )
    speclib = os.path.join(
        performance_test_folder,
        "libraries/timstof/21min_Evosep_HeLa_BR14_48fractions_diaPASEF_py_diAID_2_egs.hdf",
    )
else:
    raise ValueError("Unknown mode")

In [6]:
config_update = {
    "general": {
        "reuse_calibration": True,
        "reuse_quant": True,
        "thread_count": 10,
        "astral_ms1": False,
    },
    "search_initial": {
        "initial_num_candidates": 2,
    },
    "search": {
        "target_num_candidates": 5,
        "target_ms1_tolerance": 3 if MODE == "astral" else 15,
        "target_ms2_tolerance": 5 if MODE == "astral" else 15,
        "target_rt_tolerance": 120,
    },
    "fdr": {"library_grouping": True},
}
plan = planning.Plan(output_location, raw_files, speclib, config_update=config_update)

0:00:00.000299 [32;20mPROGRESS:       _   _      _         ___ ___   _   [0m
0:00:00.000810 [32;20mPROGRESS:      /_\ | |_ __| |_  __ _|   \_ _| /_\  [0m
0:00:00.001200 [32;20mPROGRESS:     / _ \| | '_ \ ' \/ _` | |) | | / _ \ [0m
0:00:00.001430 [32;20mPROGRESS:    /_/ \_\_| .__/_||_\__,_|___/___/_/ \_\[0m
0:00:00.001851 [32;20mPROGRESS:            |_|                            [0m
0:00:00.002328 [32;20mPROGRESS: [0m
0:00:00.002817 INFO: loading default config from /Users/georgwallmann/Documents/git/alphadia/alphadia/../misc/config/default.yaml
0:00:00.010220 INFO: Applying config update from dict
0:00:00.010736 [32;20mPROGRESS: version: 1.3.2[0m
0:00:00.010906 [32;20mPROGRESS: hostname: Georgs-MacBook-Pro.local[0m
0:00:00.011244 [32;20mPROGRESS: date: 2023-11-14 16:30:39[0m
0:00:00.011553 INFO: Running DynamicLoader
0:00:01.958157 INFO: Running PrecursorInitializer
0:00:01.959985 INFO: Running AnnotateFasta
0:00:01.960559 INFO: Dropping decoys from input library bef

In [7]:
for raw_name, dia_path, speclib in plan.get_run_data():
    pass

0:00:13.456621 [32;20mPROGRESS: Loading raw file 1/1: 20230815_OA1_SoSt_SA_Whisper40_ADIAMA_HeLa_5ng_8Th14ms_FAIMS-40_1900V_noLoopCount_01[0m


In [9]:
workflow = peptidecentric.PeptideCentricWorkflow(
    raw_name,
    plan.config,
)
workflow.load(dia_path, speclib)
workflow.calibration()

11it [00:15,  1.43s/it]


None True


0:01:34.889078 INFO: Loaded CalibrationManager from /Users/georgwallmann/Documents/data/performance_tests/outputs/astral_lf_dia/.progress/20230815_OA1_SoSt_SA_Whisper40_ADIAMA_HeLa_5ng_8Th14ms_FAIMS-40_1900V_noLoopCount_01/calibration_manager.pkl
0:01:34.889792 INFO: Initializing CalibrationManager
0:01:34.890751 INFO: Disabling ion mobility calibration
0:01:34.891526 INFO: Loaded OptimizationManager from /Users/georgwallmann/Documents/data/performance_tests/outputs/astral_lf_dia/.progress/20230815_OA1_SoSt_SA_Whisper40_ADIAMA_HeLa_5ng_8Th14ms_FAIMS-40_1900V_noLoopCount_01/optimization_manager.pkl
0:01:34.892314 INFO: Initializing OptimizationManager
0:01:34.893053 [32;20mPROGRESS: Initializing workflow 20230815_OA1_SoSt_SA_Whisper40_ADIAMA_HeLa_5ng_8Th14ms_FAIMS-40_1900V_noLoopCount_01[0m
0:01:34.893844 INFO: Initializing OptimizationManager
0:01:34.894256 INFO: initial parameter: current_epoch = 0
0:01:34.894679 INFO: initial parameter: current_step = 0
0:01:34.895098 INFO: initial

In [10]:
test_df = workflow.spectral_library.precursor_df.sample(1000)

In [12]:
from alphadia import hybridselection

config = hybridselection.HybridCandidateConfig()
config.update(workflow.config["selection_config"])
config.update(
    {
        "rt_tolerance": workflow.com.rt_error,
        "mobility_tolerance": workflow.com.mobility_error,
        "candidate_count": workflow.com.num_candidates,
        "precursor_mz_tolerance": workflow.com.ms1_error,
        "fragment_mz_tolerance": workflow.com.ms2_error,
        "exclude_shared_ions": workflow.config["search"]["exclude_shared_ions"],
    }
)

extraction = hybridselection.HybridCandidateSelection(
    workflow.dia_data.jitclass(),
    test_df,
    workflow.spectral_library.fragment_df,
    config.jitclass(),
    rt_column=f"rt_{workflow.com.column_type}",
    mobility_column=f"mobility_{workflow.com.column_type}",
    precursor_mz_column=f"mz_{workflow.com.column_type}",
    fragment_mz_column=f"mz_{workflow.com.column_type}",
    fwhm_rt=workflow.optimization_manager.fwhm_rt,
    fwhm_mobility=workflow.optimization_manager.fwhm_mobility,
)
candidates_df = extraction(thread_count=workflow.config["general"]["thread_count"])

0:03:13.549251 INFO: Duty cycle consists of 76 frames, 1.34 seconds cycle time
0:03:13.549875 INFO: Duty cycle consists of 1 scans, 0.00000 1/K_0 resolution
0:03:13.550268 INFO: FWHM in RT is 3.59 seconds, sigma is 0.57
0:03:13.551165 INFO: FWHM in mobility is 0.000 1/K_0, sigma is 1.00
0:03:14.343229 INFO: Starting candidate selection
100%|██████████| 1000/1000 [00:21<00:00, 46.29it/s]
0:03:37.596133 INFO: Finished candidate selection


In [13]:
from alphadia import plexscoring

config = plexscoring.CandidateConfig()
config.update(workflow.config["scoring_config"])
config.update(
    {
        "precursor_mz_tolerance": workflow.com.ms1_error,
        "fragment_mz_tolerance": workflow.com.ms2_error,
        "exclude_shared_ions": workflow.config["search"]["exclude_shared_ions"],
    }
)

candidate_scoring = plexscoring.CandidateScoring(
    workflow.dia_data.jitclass(),
    workflow.spectral_library._precursor_df,
    workflow.spectral_library._fragment_df,
    config=config,
    rt_column=f"rt_{workflow.com.column_type}",
    mobility_column=f"mobility_{workflow.com.column_type}",
    precursor_mz_column=f"mz_{workflow.com.column_type}",
    fragment_mz_column=f"mz_{workflow.com.column_type}",
)

features_df, fragments_df = candidate_scoring(
    candidates_df,
    thread_count=workflow.config["general"]["thread_count"],
)

0:05:34.568625 INFO: Starting candidate scoring
  numba_func(i, *args)
  numba_func(i, *args)
100%|██████████| 1991/1991 [00:57<00:00, 34.33it/s]
0:06:35.413602 INFO: Finished candidate scoring
