In [None]:
%reload_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import alphabase.io.hdf
import alphadia.extraction.planning
import alphadia.extraction.calibration
import alphabase.psm_reader
from alphabase.statistics.regression import LOESSRegression
import alphatims.bruker

import alphatims.plotting
import alphatims.bruker
import alphatims.utils
import alphadia.extraction.utils

import os
import logging
import pandas as pd
import numpy as np
import numba as nb

from alphadia.extraction.data import TimsTOFDIA
from alphadia.extraction.calibration import RunCalibration
from alphadia.extraction.planning import (
    Plan
)
from alphadia.extraction.candidateselection import MS1CentricCandidateSelection
from alphadia.extraction.scoring import MS2ExtractionWorkflow
from alphadia.extraction.utils import (
    recalibrate_mz
)
from alphadia.extraction.scoring import fdr_correction
from alphabase.spectral_library.base import SpecLibBase
from alphabase.spectral_library.flat import SpecLibFlat

from matplotlib import rcParams
rcParams['font.family'] = 'Roboto'

In [None]:
# annotate library with predicted fragments
psm_lib_location = '/Users/georgwallmann/Documents/data/testing/0_brunner_2022_1ng_extraction/brunner_2022_1ng_rep01.hdf'
raw_location = '/Users/georgwallmann/Documents/data/testing/0_brunner_2022_1ng_extraction/20200827_TIMS04_EVO07_AnBr_1ng_dia_rep01_400s_30min_S1-D1_1_2944.d'
yaml_file = '../../misc/config/default.yaml'

In [None]:
plan = Plan(yaml_file)
plan.load_speclib(psm_lib_location, dense=True)


In [None]:
calibration = RunCalibration()
conf = calibration.load_yaml(yaml_file)


In [None]:
profiling = []


dia_data = TimsTOFDIA(raw_location)

initial_ms1_error = 80
initial_ms2_error = 120
initial_rt_error = 30

target_ms1_error = 10
target_ms2_error = 15
target_rt_error = 10


mobility_99 = 0.03

iteration = 0

while initial_ms1_error >= target_ms1_error or iteration < 1:
    logging.info(f'Starting Iteration {iteration}, RT error {initial_rt_error:.2f}s,MS1 error {initial_ms1_error:.2f} ppm, MS2 error {initial_ms2_error:.2f} ppm')
    precursors_flat, fragments_flat = plan.speclib.precursor_df, plan.speclib.fragment_df

    calibration.predict(precursors_flat, 'precursor')
    calibration.predict(fragments_flat, 'fragment')


    if iteration == 0:
        column_type = 'library'
        num_candidates = 2
    else:
        column_type = 'calibrated'
        num_candidates = 1

    extraction = MS1CentricCandidateSelection(
        dia_data,
        precursors_flat, 
        rt_column = f'rt_{column_type}',
        mobility_column = f'mobility_{column_type}',
        precursor_mz_column = f'mz_{column_type}',
        rt_tolerance=initial_rt_error,
        mobility_tolerance=mobility_99,
        num_candidates=num_candidates,
        num_isotopes=2,
        mz_tolerance=initial_ms1_error,
    )
    candidates_df = extraction()
    
            
    candidates_filtered = candidates_df[candidates_df['fraction_nonzero'] > 0.0]
    extraction = MS2ExtractionWorkflow(
        dia_data,
        precursors_flat, 
        candidates_filtered,
        fragments_flat,
        coarse_mz_calibration = False,
        rt_column = f'rt_{column_type}',
        mobility_column = f'mobility_{column_type}',
        precursor_mz_column = f'mz_{column_type}',
        fragment_mz_column = f'mz_{column_type}',
        precursor_mass_tolerance=initial_ms1_error,
        fragment_mass_tolerance=initial_ms2_error,
    )
    
    
    features_df = extraction()
    features_df['decoy'] = precursors_flat['decoy'].values[features_df['index'].values]
    features_df['charge'] = precursors_flat['charge'].values[features_df['index'].values]
    features_df['nAA'] = precursors_flat['nAA'].values[features_df['index'].values]

    
    features_df = fdr_correction(features_df)

    feature_filtered = features_df[features_df['qval'] < 0.01]
    logging.info(f'Found {len(feature_filtered):,} features with qval < 0.01')
    
    calibration.fit(feature_filtered,'precursor', plot=True)
    m1_70 = calibration.get_estimator('precursor', 'mz').ci(features_df, 0.7)[0]
    rt_70 = calibration.get_estimator('precursor', 'rt').ci(features_df, 0.7)[0]

    fragment_calibration_df = alphadia.extraction.scoring.unpack_fragment_info(feature_filtered)
    fragment_calibration_df = fragment_calibration_df.sort_values(by=['intensity'], ascending=True).head(10000)

    calibration.fit(fragment_calibration_df,'fragment', plot=True)
    m2_70 = calibration.get_estimator('fragment', 'mz').ci(fragment_calibration_df, 0.7)[0]

    #rt_99 = calibration.get_estimator('precursor', 'rt').ci(features_df, 0.99)[0]
    #mobility_99 = calibration.get_estimator('precursor', 'mobility').ci(features_df, 0.99)[0]

    if initial_ms1_error == target_ms1_error and initial_ms2_error == target_ms2_error:
        logging.info(f'Ending iteration {iteration}, target_reached')
        break

    initial_ms1_error = max(m1_70, target_ms1_error, initial_ms1_error * 0.6)
    initial_ms2_error = max(m2_70, target_ms2_error, initial_ms2_error * 0.6)
    initial_rt_error = max(rt_70, target_rt_error, initial_rt_error * 0.6)

    iteration += 1

  