## Building your own models for CCS prediction

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
from peptdeep.model.featurize import (
    get_batch_aa_indices, 
    get_batch_mod_feature
)

from peptdeep.settings import model_const

import peptdeep.model.model_interface as model_base
import peptdeep.model.building_block as building_block

mod_feature_size = len(model_const['mod_elements'])

import torch
import pandas as pd

In [None]:
class CCS_LSTM_Module(torch.nn.Module):
    def __init__(self,
        dropout=0.1,
    ):
        super().__init__()
        
        self.dropout = torch.nn.Dropout(dropout)
        
        hidden = 128

        self.ccs_encoder = (
            building_block.Encoder_26AA_Mod_Charge_CNN_LSTM_AttnSum(
                hidden
            )
        )

        self.ccs_decoder = building_block.Decoder_Linear(
            hidden+1, 1
        )

    def forward(self, 
        aa_indices, 
        mod_x,
        charges,
    ):
        x = self.ccs_encoder(aa_indices, mod_x, charges)
        x = self.dropout(x)
        x = torch.cat((x, charges),1)
        return self.ccs_decoder(x).squeeze(1)

In [None]:
class CCS_ModelInterface(model_base.ModelInterface):
    def __init__(self, 
        model_class:torch.nn.Module=CCS_LSTM_Module,
        dropout=0.1,
    ):
        super().__init__()
        self.build(
            model_class,
            dropout=dropout,
        )
        self.loss_func = torch.nn.L1Loss()
        self._target_column_to_train = 'ccs'
        self._target_column_to_predict = 'ccs_pred'

    def _prepare_predict_data_df(self,
        precursor_df:pd.DataFrame,
    ):
        precursor_df[self._target_column_to_predict] = 0.
        self.predict_df = precursor_df

    def _get_features_from_batch_df(self, 
        batch_df: pd.DataFrame,
    ):
        aa_indices = torch.LongTensor(
            get_batch_aa_indices(
                batch_df['sequence'].values.astype('U')
            )
        )

        mod_x = torch.Tensor(
            get_batch_mod_feature(
                batch_df
            )
        )

        charges = torch.Tensor(
            batch_df['charge'].values
        ).unsqueeze(1)*0.1

        return aa_indices, mod_x, charges

    def _get_targets_from_batch_df(self, 
        batch_df: pd.DataFrame,
    ) -> torch.Tensor:
        return torch.Tensor(batch_df['ccs'].values)

### Testing the CCS model

### Prepare training data

In [None]:
from peptdeep.model.rt import irt_pep
# virtual ccs values for training
irt_pep['ccs'] = pd.RangeIndex(0, len(irt_pep)).values.astype(float)
irt_pep['charge'] = 2
irt_pep

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,ccs,charge
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,2
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1.0,2
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,2.0,2
3,YILAGVENSK,RT-pep d,19.79,,,10,3.0,2
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,4.0,2
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,5.0,2
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,6.0,2
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,7.0,2
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,8.0,2
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,9.0,2


In [None]:
ccs_model = CCS_ModelInterface()

Device `gpu` is not available, set to `cpu`


### Test the untrained model

### Test if training works

In [None]:
ccs_model.train(irt_pep, epoch=100, verbose=False)

### Test if the model fits the virtual ccs values

In [None]:
ccs_model.predict(irt_pep)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,ccs,charge,ccs_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.0,2,0.0
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,1.0,2,1.068118
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,2.0,2,2.132658
3,YILAGVENSK,RT-pep d,19.79,,,10,3.0,2,3.344304
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,4.0,2,4.412459
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,5.0,2,5.313079
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,6.0,2,7.875914
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,7.0,2,7.989055
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,8.0,2,8.029768
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,9.0,2,8.443041
