In [None]:
#| default_exp mass_spec.mass_calibration

# Mass calibration

In [None]:
#| export
from sklearn.neighbors import KNeighborsRegressor
import pandas as pd
import numpy as np

In [None]:
#| export

def get_fragment_median(start_end_idxes:tuple, frag_df:pd.DataFrame):
    start_idx, end_idx = start_end_idxes
    ret = np.nanmedian(frag_df.values[start_idx:end_idx])
    if np.isnan(ret): return 0.0
    else: return ret

def calibrate_one(start_end_shift, frag_df):
    start_idx, end_idx, mass_shift = start_end_shift
    frag_df.values[int(start_idx):int(end_idx)] -= mass_shift

class MassCalibratorForRT_KNN:
    def __init__(self, n_neighbors=5):
        self._n_neighbors = n_neighbors
        self.model = KNeighborsRegressor(n_neighbors)
    
    def fit(self, psm_df:pd.DataFrame, mass_error_df:pd.DataFrame):
        mass_error_df = mass_error_df.replace(np.inf, np.nan)
        mean_merrs = psm_df[['frag_start_idx','frag_end_idx']].apply(
            get_fragment_median, axis=1, frag_df=mass_error_df
        ).values
        self.model.fit(psm_df.rt.values.reshape((-1,1)), mean_merrs.reshape(-1,1))

    def calibrate(self, 
        psm_df:pd.DataFrame, mass_error_df:pd.DataFrame
    )->pd.DataFrame:
        psm_df['frag_mass_shift'] = self.model.predict(
            psm_df.rt.values.reshape((-1,1))
        ).reshape(-1)
        psm_df[['frag_start_idx','frag_end_idx','frag_mass_shift']].apply(
            calibrate_one, axis=1, frag_df=mass_error_df
        ).values
        return mass_error_df

In [None]:
psm_df = pd.DataFrame({
    'rt': [1.0,2.0,3.0,4.0,5.0],
    'frag_start_idx': [0,1,3,4,6],
    'frag_end_idx': [1,3,4,6,8],
})
frag_df = pd.DataFrame({
    'b': [1.0,2,3,4,5,6,7,8],
    'y':[0.2,np.inf,2,2,4,5,8,9],
})
calibrator = MassCalibratorForRT_KNN(1)
calibrator.fit(psm_df, frag_df)
frag_df = calibrator.calibrate(psm_df, frag_df)
frag_df

Unnamed: 0,b,y
0,0.4,-0.4
1,0.0,inf
2,1.0,0.0
3,1.0,-1.0
4,0.0,-1.0
5,1.0,0.0
6,-1.0,0.0
7,0.0,1.0
