In [1]:
#default_exp model.model_shop

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
#export
import torch
import peptdeep.model.building_block as building_block
from peptdeep.model.model_interface import ModelInterface
from peptdeep.model.featurize import (
    get_ascii_indices, get_batch_mod_feature
)
import pandas as pd
import numpy as np

ASCII_NUM=128

# Scalar Regression Models for a Given Amino Acid Sequence

In [4]:
#export

class ScalarRegression_LSTM_Model_for_AASeq(torch.nn.Module):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__()
        self.dropout = torch.nn.Dropout(dropout)
        
        self.nn = torch.nn.Sequential(
            building_block.ascii_embedding(hidden_dim//4),
            building_block.SeqCNN(hidden_dim//4),
            self.dropout,
            building_block.SeqLSTM(
                hidden_dim, hidden_dim, 
                rnn_layer=n_lstm_layers
            ),
            building_block.SeqAttentionSum(hidden_dim),
            self.dropout,
            torch.nn.Linear(hidden_dim,64),
            torch.nn.GELU(),
            torch.nn.Linear(64, 1),
        )
    def forward(self, aa_x):
        return self.nn(aa_x).squeeze(-1)

class ScalarRegression_Transformer_Model_for_AASeq(torch.nn.Module):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__()

        self.dropout = torch.nn.Dropout(dropout)

        self.input_nn =  torch.nn.Sequential(
            building_block.ascii_embedding(hidden_dim),
        )

        self._output_attentions = output_attentions
        
        self.hidden_nn = building_block.HFace_Transformer_with_PositionalEncoder(
            hidden_dim, nlayers=nlayers, dropout=dropout,
            output_attentions=output_attentions
        )

        self.output_nn = torch.nn.Sequential(
            building_block.SeqAttentionSum(hidden_dim),
            torch.nn.PReLU(),
            self.dropout,
            torch.nn.Linear(hidden_dim, 1),
        )

    @property
    def output_attentions(self):
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val:bool):
        self._output_attentions = val
        self.hidden_nn.output_attentions = val

    def forward(self, aa_x):
        aa_x = self.dropout(self.input_nn(aa_x))

        aa_x = self.hidden_nn(aa_x)
        if self.output_attentions:
            self.attentions = aa_x[1]
        else:
            self.attentions = None
        aa_x = self.dropout(aa_x[0])

        return self.output_nn(aa_x).squeeze(1)

class ScalarRegression_ModelInterface_for_AASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=ScalarRegression_LSTM_Model_for_AASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.L1Loss() # for regression

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'target_value_pred'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ), 
            dtype=torch.long
        )

        return aa_indices

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['target_value'].values, 
            dtype=torch.float32
        )

#### Building a RT model for only sequences based on `ScalarRegression_LSTM_Model_for_AASeq`

In [5]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_value'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_AASeq(
    model_class=ScalarRegression_LSTM_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_value,target_value_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.0
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.226986
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.226745
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.354989
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.484007
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.547663
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.668599
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.710436
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.89976
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,1.030823


#### Building a RT model for only sequences based on `ScalarRegression_Transformer_Model_for_AASeq`

In [6]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_value'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_AASeq(
    model_class=ScalarRegression_Transformer_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_value,target_value_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.284259
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.570077
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.600805
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.545549
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.625066
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.642667
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.648241
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.898216
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.907477
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,0.967381


# Binary Classification Models for a Given Amino Acid Sequence

In [7]:
#export
class BinaryClassification_LSTM_Model_for_AASeq(torch.nn.Module):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__()
        
        self.nn = ScalarRegression_LSTM_Model_for_AASeq(
            hidden_dim=hidden_dim, 
            input_dim=ASCII_NUM, 
            n_lstm_layers=n_lstm_layers,
            dropout=dropout,
        )
    def forward(self, aa_x):
        return torch.sigmoid(self.nn(aa_x))

class BinaryClassification_Transformer_Model_for_AASeq(torch.nn.Module):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__()

        self.nn =  ScalarRegression_Transformer_Model_for_AASeq(
            nlayers=nlayers,
            input_dim=ASCII_NUM,
            hidden_dim=hidden_dim,
            output_attentions=output_attentions,
            dropout=dropout,
            **kwargs,
        )

    @property
    def output_attentions(self):
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val:bool):
        self._output_attentions = val
        self.hidden_nn.output_attentions = val

    def forward(self, aa_x):
        return torch.sigmoid(self.nn(aa_x))

class BinaryClassification_ModelInterface_for_AASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=BinaryClassification_LSTM_Model_for_AASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        """
        Class to predict retention times from precursor dataframes.
        """
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.BCELoss() # for binary classification

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'target_prob_pred'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ), dtype=torch.long
        )

        return aa_indices

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['target_prob'].values, 
            dtype=torch.float32
        )

#### A sequence classification model using `BinaryClassification_LSTM_Model_for_AASeq`

In [8]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'target_prob']=1
model = BinaryClassification_ModelInterface_for_AASeq(
    model_class=BinaryClassification_LSTM_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_prob,target_prob_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.992084
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.991962
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.992317
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.991497
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.99156
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.98658
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.365364
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.36772
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.37918
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.365868


#### A sequence classification model using `BinaryClassification_LSTM_Model_for_AASeq`

In [9]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'target_prob']=1
model = BinaryClassification_ModelInterface_for_AASeq(
    model_class=BinaryClassification_Transformer_Model_for_AASeq
)
model.train(IRT_PEPTIDE_DF, epoch=10)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_prob,target_prob_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.993336
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.987852
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.968527
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.991404
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.979454
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.955795
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.012674
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.126706
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.017562
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.012213


# Scalar Regression Models for Given Amino Acid Sequence and Site-specific PTMs

In [10]:
#export
class ScalarRegression_LSTM_Model_for_ModAASeq(torch.nn.Module):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__()
        self.dropout = torch.nn.Dropout(dropout)

        self.encoder_nn = building_block.Encoder_AsciiAA_Mod_CNN_LSTM_AttnSum(
            hidden_dim,
            n_lstm_layers=n_lstm_layers,
        )
        self.output_nn = torch.nn.Sequential(
            self.dropout,
            torch.nn.Linear(hidden_dim,64),
            torch.nn.GELU(),
            torch.nn.Linear(64, 1),
        )
    def forward(self, aa_x, mod_x):
        x = self.encoder_nn(aa_x, mod_x)
        return self.output_nn(x).squeeze(-1)

class ScalarRegression_Transformer_Model_for_ModAASeq(torch.nn.Module):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__()

        self.dropout = torch.nn.Dropout(dropout)

        self.input_nn = building_block.AA_Mod_Embedding(hidden_dim)

        self._output_attentions = output_attentions
        
        self.hidden_nn = building_block.HFace_Transformer_with_PositionalEncoder(
            hidden_dim, nlayers=nlayers, dropout=dropout,
            output_attentions=output_attentions
        )

        self.output_nn = torch.nn.Sequential(
            building_block.SeqAttentionSum(hidden_dim),
            torch.nn.PReLU(),
            self.dropout,
            torch.nn.Linear(hidden_dim, 1),
        )

    @property
    def output_attentions(self):
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val:bool):
        self._output_attentions = val
        self.hidden_nn.output_attentions = val

    def forward(self, 
        aa_indices, 
        mod_x,
    ):
        x = self.dropout(self.input_nn(
            aa_indices, mod_x
        ))

        hidden_x = self.hidden_nn(x)
        if self.output_attentions:
            self.attentions = hidden_x[1]
        else:
            self.attentions = None
        x = self.dropout(hidden_x[0]+x*0.2)

        return self.output_nn(x).squeeze(1)

class ScalarRegression_ModelInterface_for_ModAASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=ScalarRegression_LSTM_Model_for_ModAASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.L1Loss() # for regression

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'target_value_pred'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ),
            dtype=torch.long
        )
        
        mod_x = self._as_tensor(
            get_batch_mod_feature(
                batch_df
            )
        )

        return aa_indices, mod_x

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['target_value'].values, 
            dtype=torch.float32
        )

#### Scalar regression model (RT) with modified AA sequences using `ScalarRegression_LSTM_Model_for_ModAASeq`

In [11]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_value'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_ModAASeq(
    model_class=ScalarRegression_LSTM_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_value,target_value_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.023022
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.153369
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.217336
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.334083
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.331022
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.449149
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.50023
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.556495
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.725871
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,0.913588


#### Scalar regression model (RT) with modified AA sequences using `ScalarRegression_Transformer_Model_for_ModAASeq`

In [12]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_value'] = (
    IRT_PEPTIDE_DF.irt-IRT_PEPTIDE_DF.irt.min()
)/(IRT_PEPTIDE_DF.irt.max()-IRT_PEPTIDE_DF.irt.min())
model = ScalarRegression_ModelInterface_for_ModAASeq(
    model_class=ScalarRegression_Transformer_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=10)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_value,target_value_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,0.100091
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.199488,0.350402
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.298671,0.372654
3,YILAGVENSK,RT-pep d,19.79,,,10,0.357909,0.352173
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.429315,0.462331
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.466699,0.509501
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.537784,0.588974
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.636728,0.630189
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.764009,0.70591
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.897775,0.840132


# Binary Classification Models for Given Amino Acid Sequence and Site-specific PTMs

In [13]:
#export
class BinaryClassification_LSTM_Model_for_ModAASeq(torch.nn.Module):
    def __init__(self, 
        *,
        hidden_dim=256,
        n_lstm_layers=4,
        dropout=0.1,
        **kwargs,
    ):
        super().__init__()
        self.nn = ScalarRegression_LSTM_Model_for_ModAASeq(
            hidden_dim=hidden_dim,
            n_lstm_layers=n_lstm_layers,
            dropout=dropout
        )

    def forward(self, aa_x, mod_x):
        return torch.sigmoid(self.nn(aa_x, mod_x))

class BinaryClassification_Transformer_Model_for_ModAASeq(torch.nn.Module):
    def __init__(self,
        *,
        hidden_dim = 256,
        nlayers = 4,
        output_attentions=False,
        dropout = 0.1,
        **kwargs,
    ):
        """
        Model based on a transformer Architecture from 
        Huggingface's BertEncoder class.
        """
        super().__init__()
        self.nn = ScalarRegression_Transformer_Model_for_ModAASeq(
            nlayers=nlayers,
            hidden_dim=hidden_dim,
            output_attentions=output_attentions,
            dropout=dropout,
            **kwargs
        )

    @property
    def output_attentions(self):
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val:bool):
        self._output_attentions = val
        self.hidden_nn.output_attentions = val

    def forward(self, 
        aa_indices, 
        mod_x,
    ):
        return torch.sigmoid(self.nn(aa_indices, mod_x))

class BinaryClassification_ModelInterface_for_ModAASeq(ModelInterface):
    def __init__(self, 
        dropout=0.1,
        model_class:torch.nn.Module=BinaryClassification_LSTM_Model_for_ModAASeq, #model defined above
        device:str='gpu',
        **kwargs,
    ):
        super().__init__(device=device)
        self.build(
            model_class,
            dropout=dropout,
            **kwargs
        )
        self.loss_func = torch.nn.BCELoss() # for regression

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        self._predict_column_in_df = 'target_prob_pred'
        precursor_df[self._predict_column_in_df] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
    ):
        aa_indices = self._as_tensor(
            get_ascii_indices(
                batch_df['sequence'].values.astype('U')
            ),
            dtype=torch.long
        )
        
        mod_x = self._as_tensor(
            get_batch_mod_feature(
                batch_df
            )
        )

        return aa_indices, mod_x

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
        **kwargs
    ) -> torch.Tensor:
        return self._as_tensor(
            batch_df['target_prob'].values, 
            dtype=torch.float32
        )

#### Scalar regression model (RT) with modified AA sequences using `BinaryClassification_LSTM_Model_for_ModAASeq`

In [14]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'target_prob']=1
model = BinaryClassification_ModelInterface_for_ModAASeq(
    model_class=BinaryClassification_LSTM_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=20)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_prob,target_prob_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.99091
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.98939
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.990863
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.990191
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.987697
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.990062
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.37155
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.37255
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.379109
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.372425


#### Scalar regression model (RT) with modified AA sequences using `BinaryClassification_Transformer_Model_for_ModAASeq`

In [15]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

IRT_PEPTIDE_DF=IRT_PEPTIDE_DF.copy()
IRT_PEPTIDE_DF['target_prob'] = 0
IRT_PEPTIDE_DF.loc[:5,'target_prob']=1
model = BinaryClassification_ModelInterface_for_ModAASeq(
    model_class=BinaryClassification_Transformer_Model_for_ModAASeq
)
model.train(IRT_PEPTIDE_DF, epoch=10)
model.predict(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,target_prob,target_prob_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,1,0.994696
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1,0.971599
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,1,0.990369
3,YILAGVENSK,RT-pep d,19.79,,,10,1,0.994455
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,1,0.987329
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,1,0.984665
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0,0.033939
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0,0.09114
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0,0.014022
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0,0.011016
