In [None]:
#default_exp model.model_shop

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
#export
import torch
import peptdeep.model.building_block as building_block
from peptdeep.model.model_interface import ModelInterface
from peptdeep.model.featurize import (
    get_ascii_indices, get_batch_mod_feature
)
import pandas as pd
import numpy as np

ASCII_NUM=128

# Scalar Regression Models for a Given Amino Acid Sequence

In [None]:
#export

class ScalarRegression_LSTM_Model_for_AASeq(torch.nn.Module):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__()
        self.dropout = torch.nn.Dropout(dropout)
        
        self.nn = torch.nn.Sequential(
            building_block.ascii_embedding(hidden_dim//4),
            building_block.SeqCNN(hidden_dim//4),
            self.dropout,
            building_block.SeqLSTM(
                hidden_dim, hidden_dim, 
                rnn_layer=n_lstm_layers
            ),
            building_block.SeqAttentionSum(hidden_dim),
            self.dropout,
            torch.nn.Linear(hidden_dim,64),
            torch.nn.GELU(),
            torch.nn.Linear(64, 1),
        )
    def forward(self, aa_x):
        return self.nn(aa_x).squeeze(-1)

class ScalarRegression_Transformer_Model_for_AASeq(torch.nn.Module):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__()

        self.dropout = torch.nn.Dropout(dropout)

        self.input_nn =  torch.nn.Sequential(
            building_block.ascii_embedding(hidden_dim),
        )

        self.output_attentions = output_attentions
        
        self.hidden_nn = building_block.HFace_Transformer_with_PositionalEncoder(
            hidden_dim, nlayers=nlayers, dropout=dropout,
            output_attentions=output_attentions
        )

        self.output_nn = torch.nn.Sequential(
            building_block.SeqAttentionSum(hidden_dim),
            torch.nn.PReLU(),
            self.dropout,
            torch.nn.Linear(hidden_dim, 1),
        )

    @property
    def output_attentions(self)->bool:
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val:bool):
        self._output_attentions = val

    def forward(self, aa_x):
        aa_x = self.dropout(self.input_nn(aa_x))

        aa_x = self.hidden_nn(aa_x)
        if self.output_attentions:
            self.attentions = aa_x[1]
        else:
            self.attentions = None
        aa_x = self.dropout(aa_x[0])

        return self.output_nn(aa_x).squeeze(1)

class ScalarRegression_ModelInterface_for_AASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=ScalarRegression_LSTM_Model_for_AASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.L1Loss() # for regression

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'predicted_property'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ), 
            dtype=torch.long
        )

        return aa_indices

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['detected_property'].values, 
            dtype=torch.float32
        )

#### Building a RT model for only sequences based on `ScalarRegression_LSTM_Model_for_AASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_property'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_AASeq(
    model_class=ScalarRegression_LSTM_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_property,predicted_property
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.0
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.203749
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.254368
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.398172
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.508226
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.472749
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.602823
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.633547
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.941932
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,1.046042


#### Building a RT model for only sequences based on `ScalarRegression_Transformer_Model_for_AASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_property'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_AASeq(
    model_class=ScalarRegression_Transformer_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_property,predicted_property
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.005624
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.076221
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.163718
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.205459
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.233966
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.262998
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.323232
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.407841
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.554401
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,0.605493


# Binary Classification Models for a Given Amino Acid Sequence

In [None]:
#export
class BinaryClassification_LSTM_Model_for_AASeq(
    ScalarRegression_LSTM_Model_for_AASeq
):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__(
            hidden_dim=hidden_dim,
            n_lstm_layers=n_lstm_layers,
            dropout=dropout,
        )

    def forward(self, aa_x):
        x = super().forward(aa_x)
        return torch.sigmoid(x)

class BinaryClassification_Transformer_Model_for_AASeq(
    ScalarRegression_Transformer_Model_for_AASeq
):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__(
            nlayers=nlayers,
            hidden_dim=hidden_dim,
            output_attentions=output_attentions,
            dropout=dropout,
            **kwargs,
        )

    def forward(self, aa_x):
        x = super().forward(aa_x)
        return torch.sigmoid(x)

class BinaryClassification_ModelInterface_for_AASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=BinaryClassification_LSTM_Model_for_AASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        """
        Class to predict retention times from precursor dataframes.
        """
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.BCELoss() # for binary classification

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'predicted_prob'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ), dtype=torch.long
        )

        return aa_indices

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['detected_prob'].values, 
            dtype=torch.float32
        )

#### A sequence classification model using `BinaryClassification_LSTM_Model_for_AASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'detected_prob']=1
model = BinaryClassification_ModelInterface_for_AASeq(
    model_class=BinaryClassification_LSTM_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_prob,predicted_prob
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.9891
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.98889
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.989772
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.987212
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.987834
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.983028
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.38874
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.386249
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.395431
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.38482


#### A sequence classification model using `BinaryClassification_LSTM_Model_for_AASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'detected_prob']=1
model = BinaryClassification_ModelInterface_for_AASeq(
    model_class=BinaryClassification_Transformer_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=10)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_prob,predicted_prob
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.994008
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.987759
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.947251
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.993353
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.988444
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.9823
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.050207
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.03554
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.013822
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.010823


# Scalar Regression Models for Given Amino Acid Sequence and Site-specific PTMs

In [None]:
#export
class ScalarRegression_LSTM_Model_for_ModAASeq(torch.nn.Module):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__()
        self.dropout = torch.nn.Dropout(dropout)

        self.encoder_nn = building_block.Encoder_AsciiAA_Mod_CNN_LSTM_AttnSum(
            hidden_dim,
            n_lstm_layers=n_lstm_layers,
        )
        self.output_nn = torch.nn.Sequential(
            self.dropout,
            torch.nn.Linear(hidden_dim,64),
            torch.nn.GELU(),
            torch.nn.Linear(64, 1),
        )
    def forward(self, aa_x, mod_x):
        x = self.encoder_nn(aa_x, mod_x)
        return self.output_nn(x).squeeze(-1)

class ScalarRegression_Transformer_Model_for_ModAASeq(torch.nn.Module):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__()

        self.dropout = torch.nn.Dropout(dropout)

        self.input_nn = building_block.AA_Mod_Embedding(hidden_dim)

        self._output_attentions = output_attentions
        
        self.hidden_nn = building_block.HFace_Transformer_with_PositionalEncoder(
            hidden_dim, nlayers=nlayers, dropout=dropout,
            output_attentions=output_attentions
        )

        self.output_nn = torch.nn.Sequential(
            building_block.SeqAttentionSum(hidden_dim),
            torch.nn.PReLU(),
            self.dropout,
            torch.nn.Linear(hidden_dim, 1),
        )

    @property
    def output_attentions(self)->bool:
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val:bool):
        self._output_attentions = val

    def forward(self, 
        aa_indices, 
        mod_x,
    ):
        x = self.dropout(self.input_nn(
            aa_indices, mod_x
        ))

        hidden_x = self.hidden_nn(x)
        if self.output_attentions:
            self.attentions = hidden_x[1]
        else:
            self.attentions = None
        x = self.dropout(hidden_x[0]+x*0.2)

        return self.output_nn(x).squeeze(1)

class ScalarRegression_ModelInterface_for_ModAASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=ScalarRegression_LSTM_Model_for_ModAASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.L1Loss() # for regression

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'predicted_property'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ),
            dtype=torch.long
        )
        
        mod_x = self._as_tensor(
            get_batch_mod_feature(
                batch_df
            )
        )

        return aa_indices, mod_x

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['detected_property'].values, 
            dtype=torch.float32
        )

#### Scalar regression model (RT) with modified AA sequences using `ScalarRegression_LSTM_Model_for_ModAASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_property'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_ModAASeq(
    model_class=ScalarRegression_LSTM_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_property,predicted_property
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.050365
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.201225
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.198412
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.311826
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.323035
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.358762
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.512773
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.683389
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.685461
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,0.898756


#### Scalar regression model (RT) with modified AA sequences using `ScalarRegression_Transformer_Model_for_ModAASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_property'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_ModAASeq(
    model_class=ScalarRegression_Transformer_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=10)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_property,predicted_property
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.27944
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.473362
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.414256
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.41998
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.618031
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.682028
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.658774
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.615138
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.969858
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,0.978745


# Binary Classification Models for Given Amino Acid Sequence and Site-specific PTMs

In [None]:
#export
class BinaryClassification_LSTM_Model_for_ModAASeq(
    ScalarRegression_LSTM_Model_for_ModAASeq
):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__(
            hidden_dim=hidden_dim,
            n_lstm_layers=n_lstm_layers,
            dropout=dropout,
            **kwargs,
        )

    def forward(self, aa_x, mod_x):
        x = super().forward(aa_x, mod_x)
        return torch.sigmoid(x)

class BinaryClassification_Transformer_Model_for_ModAASeq(
    ScalarRegression_Transformer_Model_for_ModAASeq
):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__(
            nlayers=nlayers,
            hidden_dim=hidden_dim,
            output_attentions=output_attentions,
            dropout=dropout,
            **kwargs
        )

    @property
    def output_attentions(self)->bool:
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val:bool):
        self._output_attentions = val

    def forward(self, 
        aa_indices, 
        mod_x,
    ):
        x = super().forward(aa_indices, mod_x)
        return torch.sigmoid(x)

class BinaryClassification_ModelInterface_for_ModAASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=BinaryClassification_LSTM_Model_for_ModAASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.BCELoss() # for regression

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'predicted_prob'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ),
            dtype=torch.long
        )
        
        mod_x = self._as_tensor(
            get_batch_mod_feature(
                batch_df
            )
        )

        return aa_indices, mod_x

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['detected_prob'].values, 
            dtype=torch.float32
        )

#### Scalar regression model (RT) with modified AA sequences using `BinaryClassification_LSTM_Model_for_ModAASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'detected_prob']=1
model = BinaryClassification_ModelInterface_for_ModAASeq(
    model_class=BinaryClassification_LSTM_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_prob,predicted_prob
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.986498
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.984277
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.987346
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.985844
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.986121
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.985664
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.354859
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.367485
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.361229
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.358054


#### Scalar regression model (RT) with modified AA sequences using `BinaryClassification_Transformer_Model_for_ModAASeq`

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['detected_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'detected_prob']=1
model = BinaryClassification_ModelInterface_for_ModAASeq(
    model_class=BinaryClassification_Transformer_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=10)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,detected_prob,predicted_prob
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.994057
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.993531
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.991094
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.994169
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.992342
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.992439
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.007014
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.021107
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.007093
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.006821
