In [1]:
# Setup Chunk
import numpy as np
import pandas as pd

# Supress Warnings 
import warnings 
warnings.simplefilter(action='ignore')

In [2]:
# Data Read
train_clinical = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_clinical_data.csv")
train_protein = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_proteins.csv")
train_peptide = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_peptides.csv")

In [3]:
train_clinical.head()

Unnamed: 0,visit_id,patient_id,visit_month,updrs_1,updrs_2,updrs_3,updrs_4,upd23b_clinical_state_on_medication
0,55_0,55,0,10.0,6.0,15.0,,
1,55_3,55,3,10.0,7.0,25.0,,
2,55_6,55,6,8.0,10.0,34.0,,
3,55_9,55,9,8.0,9.0,30.0,0.0,On
4,55_12,55,12,10.0,10.0,41.0,0.0,On


# Data Cleaning

In [4]:
# Data cleaning packages 
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import KNNImputer

In [5]:
# Data cleaning recipe. 

def recipe(clinical, protein, peptide, verbose = True, rep = True):
    if verbose: print('Preprocessing Steps')
    
    # Peptitde Abundance / Protein Expression 
    pep_over_pro = pd.merge(protein, peptide, 
                        on =['visit_id', 'visit_month', 'patient_id', 
                            'UniProt']) # joins the peptide and protein dataset

    # Creates feature in new column
    pep_over_pro['pep_per_pro'] = pep_over_pro['PeptideAbundance'] / pep_over_pro['NPX']
    
    if verbose: print('1. Add Peptide/Protein as new feature.')
    
    # Pivot the data to wide format. 
    pep_over_pro = pep_over_pro.drop(['patient_id', 'visit_month'], axis = 1).pivot(
    index = ['visit_id'], columns = ['Peptide'], values = ['pep_per_pro'])
    
    # Cleans erronous levels for smooth merge. 
    pep_over_pro.columns = pep_over_pro.columns.droplevel()
    pep_over_pro.reset_index()

    train = pd.merge(clinical, pep_over_pro, on = 'visit_id', 
                 how = 'left') # left join
    
    train = train.set_index('visit_id') # removes as feature, but in rowname for tracking
    
    # Drop med_status as a predictor (DOES NOT APPEAR IN TEST DATA)
    if rep:
        train = train.drop(['upd23b_clinical_state_on_medication'], axis = 1)
    
        if verbose: print('2. Dropped med_status as a predictor.')
        
    # Drop patient_id as a predictor 
    train = train.drop(['patient_id'], axis = 1)
    
    if verbose: print('3. Dropped patient_id as a predictor.')
    
    ## KNN Imputation ## 
    
    # Drops the response if it training data.
    if rep: 
        response = train[['updrs_1', 'updrs_2', 'updrs_3', 'updrs_4']]
        train = train.drop(['updrs_1', 'updrs_2', 'updrs_3', 'updrs_4'], axis = 1)
    
    # Drops the categorical variables. 
    cats = train[['visit_month']]
    train = train.drop(['visit_month'], axis = 1)
    
    # Standardize numeric features. 
    scalar = StandardScaler()
    train = pd.DataFrame(scalar.fit_transform(train), columns = train.columns,
                        index = train.index)
    
    if verbose: print('4. Normalized numeric predictors.')
    
    # Add back in cats 
    train = train.join(cats)
        
    # Apply KNN imputation 
    imputer = KNNImputer(n_neighbors = 5)
    train = pd.DataFrame(imputer.fit_transform(train), columns = train.columns, 
                        index = train.index)
    if rep: 
        response = pd.DataFrame(imputer.fit_transform(response), 
                                columns = response.columns, 
                                index = response.index)
    
    if verbose: print('5. KNN Imputation')
    
    # Add back in repsonse if training data. 
    if rep: 
        train = train.join(response)
    
    return train 

In [6]:
# Test recipe 
train_clinical_fresh = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_clinical_data.csv")
clean_test = recipe(train_clinical_fresh, train_protein, train_peptide, rep = True)
clean_test.head()

Preprocessing Steps
1. Add Peptide/Protein as new feature.
2. Dropped med_status as a predictor.
3. Dropped patient_id as a predictor.
4. Normalized numeric predictors.
5. KNN Imputation


Unnamed: 0_level_0,AADDTWEPFASGK,AAFGQGSGPIMLDEVQC(UniMod_4)TGTEASLADC(UniMod_4)K,AAFTEC(UniMod_4)C(UniMod_4)QAADK,AANEVSSADVK,AATGEC(UniMod_4)TATVGKR,AATVGSLAGQPLQER,AAVYHHFISDGVR,ADDKETC(UniMod_4)FAEEGK,ADDKETC(UniMod_4)FAEEGKK,ADDLGKGGNEESTKTGNAGSR,...,YVNKEIQNAVNGVK,YWGVASFLQK,YYC(UniMod_4)FQGNQFLR,YYTYLIMNK,YYWGGQYTWDMAK,visit_month,updrs_1,updrs_2,updrs_3,updrs_4
visit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
55_0,-0.097394,-0.611465,-0.482302,0.0,0.555718,0.388845,-0.055969,-1.188915,0.74708,0.138628,...,-0.235448,0.130248,-0.254317,-0.447802,-0.635128,0.0,10.0,6.0,15.0,2.8
55_3,-0.474096,-1.122953,-0.650454,0.0,-0.309951,0.157413,1.547536,0.143963,-0.820133,0.55196,...,0.933085,2.673593,1.413008,0.661591,0.246301,3.0,10.0,7.0,25.0,3.0
55_6,-0.236887,-0.887478,-0.043868,0.0,1.775712,0.125236,-0.082369,-1.187405,1.221974,0.10326,...,-0.578775,0.200504,-0.661579,-0.292764,-0.010148,6.0,8.0,10.0,34.0,3.4
55_9,0.249475,-1.713341,-0.572966,0.0,-0.329779,-0.103017,0.411495,-0.068773,-0.922437,0.856256,...,0.533902,0.592032,1.609423,-2.001862,-0.192085,9.0,8.0,9.0,30.0,0.0
55_12,-0.313668,-0.911808,-0.261373,0.0,0.110614,0.979999,-0.402378,-1.176289,0.379146,0.15599,...,0.226678,0.498383,-0.876679,0.069697,0.19149,12.0,10.0,10.0,41.0,0.0


## Auto-encoder

In [7]:
# Packages 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

In [8]:
# Extract the peptide/protein data to be reduced. 
clean_pep_pro = clean_test.iloc[:, :clean_test.shape[1] - 5]

In [9]:
# Encoder Network
input_shape = [clean_pep_pro.shape[1]]
latent_shape = 25

encoder = tf.keras.Sequential()
encoder.add(Dense(500, input_shape = input_shape, activation = 'relu'))
encoder.add(Dropout(0.2))
encoder.add(Dense(250, activation = 'relu'))
encoder.add(Dropout(0.2))
encoder.add(Dense(latent_shape, activation = 'relu'))
encoder.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 500)               484500    
                                                                 
 dropout (Dropout)           (None, 500)               0         
                                                                 
 dense_1 (Dense)             (None, 250)               125250    
                                                                 
 dropout_1 (Dropout)         (None, 250)               0         
                                                                 
 dense_2 (Dense)             (None, 25)                6275      
                                                                 
Total params: 616,025
Trainable params: 616,025
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Decoder Network
decoder = tf.keras.Sequential()
decoder.add(Dense(50, input_shape = encoder.output_shape, activation = 'relu'))
decoder.add(Dropout(0.2))
decoder.add(Dense(250, activation = 'relu'))
decoder.add(Dropout(0.2))
decoder.add(Dense(500, activation = 'relu'))
decoder.add(Dropout(0.2))
decoder.add(Dense(input_shape[0]))
decoder.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, None, 50)          1300      
                                                                 
 dropout_2 (Dropout)         (None, None, 50)          0         
                                                                 
 dense_4 (Dense)             (None, None, 250)         12750     
                                                                 
 dropout_3 (Dropout)         (None, None, 250)         0         
                                                                 
 dense_5 (Dense)             (None, None, 500)         125500    
                                                                 
 dropout_4 (Dropout)         (None, None, 500)         0         
                                                                 
 dense_6 (Dense)             (None, None, 968)        

In [11]:
# Auto-encoder Network
visit = keras.Input(shape = input_shape)
latent_vector = encoder(visit)
output = decoder(latent_vector)

auto_encoder = keras.Model(inputs = visit, outputs = output)
auto_encoder.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 968)]             0         
                                                                 
 sequential (Sequential)     (None, 25)                616025    
                                                                 
 sequential_1 (Sequential)   (None, None, 968)         624518    
                                                                 
Total params: 1,240,543
Trainable params: 1,240,543
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Compile and train model 
auto_encoder.compile('adam', loss = 'mse')

auto_encoder.fit(clean_pep_pro, clean_pep_pro, epochs = 100, 
                 validation_split = 0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7ff0004b6530>

In [13]:
clean_encoded = encoder.predict(clean_pep_pro)
clean_encoded = pd.DataFrame(clean_encoded)
clean_encoded



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,22.402472,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,13.421409,...,0.0,0.0,0.0,0.0,8.016436,0.0,0.0,0.000000,0.000000,0.0
1,10.349184,0.0,7.378607,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,2.912897,0.000000,0.0
2,22.181145,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,12.408401,...,0.0,0.0,0.0,0.0,5.808912,0.0,0.0,0.000000,0.000000,0.0
3,0.000000,0.0,5.505663,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0
4,9.662017,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,9.810909,...,0.0,0.0,0.0,0.0,4.154107,0.0,0.0,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2610,10.253488,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,5.464534,...,0.0,0.0,0.0,0.0,2.050546,0.0,0.0,0.000000,0.000000,0.0
2611,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0
2612,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.681651,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0
2613,6.589236,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,4.912424,0.0


In [14]:
train_clean_encoded = clean_test.iloc[:, 968:]
train_clean_encoded = train_clean_encoded.join(
    clean_encoded.set_index(clean_test.index))
train_clean_encoded.head()

Unnamed: 0_level_0,visit_month,updrs_1,updrs_2,updrs_3,updrs_4,0,1,2,3,4,...,15,16,17,18,19,20,21,22,23,24
visit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
55_0,0.0,10.0,6.0,15.0,2.8,22.402472,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,8.016436,0.0,0.0,0.0,0.0,0.0
55_3,3.0,10.0,7.0,25.0,3.0,10.349184,0.0,7.378607,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.912897,0.0,0.0
55_6,6.0,8.0,10.0,34.0,3.4,22.181145,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,5.808912,0.0,0.0,0.0,0.0,0.0
55_9,9.0,8.0,9.0,30.0,0.0,0.0,0.0,5.505663,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55_12,12.0,10.0,10.0,41.0,0.0,9.662017,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,4.154107,0.0,0.0,0.0,0.0,0.0


# Model Training

In [15]:
# Model Imports
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

In [16]:
# Tuning Grid 
xgb_grid = {'n_estimators': [100, 200],
            'max_depth': [3, 10],
            'min_samples_split': [2],
            'learning_rate': [0.01, 0.015],
            'loss': ['absolute_error']}

In [17]:
# Empty dictionary 
models = {}
# Response variable names
target = ["updrs_1", "updrs_2", "updrs_3", "updrs_4"]

X = train_clean_encoded.iloc[:, 5:].join(
    train_clean_encoded['visit_month'])

# X.head()

# Loop to train a model for each of response
for u in target:
    
    y = train_clean_encoded[u]
    
    # Model
    xgb = GridSearchCV(estimator = GradientBoostingRegressor(), param_grid = xgb_grid, 
        cv = 2, n_jobs = 30, verbose = 2)
    
    trained = xgb.fit(X.values, y)
    
    print(trained.best_params_)
    
    models[u] = trained.best_estimator_

Fitting 2 folds for each of 8 candidates, totalling 16 fits




{'learning_rate': 0.01, 'loss': 'absolute_error', 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 100}
Fitting 2 folds for each of 8 candidates, totalling 16 fits




{'learning_rate': 0.015, 'loss': 'absolute_error', 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 100}
Fitting 2 folds for each of 8 candidates, totalling 16 fits
{'learning_rate': 0.01, 'loss': 'absolute_error', 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 100}
Fitting 2 folds for each of 8 candidates, totalling 16 fits
{'learning_rate': 0.01, 'loss': 'absolute_error', 'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 200}


# Predictions

In [18]:
X.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,visit_month
visit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
55_0,22.402472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.421409,...,0.0,0.0,0.0,8.016436,0.0,0.0,0.0,0.0,0.0,0.0
55_3,10.349184,0.0,7.378607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.912897,0.0,0.0,3.0
55_6,22.181145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.408401,...,0.0,0.0,0.0,5.808912,0.0,0.0,0.0,0.0,0.0,6.0
55_9,0.0,0.0,5.505663,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0
55_12,9.662017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.810909,...,0.0,0.0,0.0,4.154107,0.0,0.0,0.0,0.0,0.0,12.0


In [19]:
def prepare_features(clinical, protein, peptides):
    
    # Match input format.
    clinical = clinical[['patient_id', 'visit_month', 'visit_id']]
    protein = protein.drop('group_key', axis = 1, errors = 'ignore')
    peptides = peptides.drop('group_key', axis = 1, errors = 'ignore')
    
    # Apply recipe. 
    clean = recipe(clinical, protein, peptides, rep = False)
    
    # Drop duplicate rows.
    clean = clean[~clean.index.duplicated(keep='first')]
    
    # Add features not seen during testing as 0.
    test_add = np.setdiff1d(clean_test.columns, 
                            clean.columns).tolist()
        
    temp = pd.DataFrame(0, 
                        columns = test_add, 
                        index = clean.index)
    clean = clean.join(temp)
    
    # Drop any features not seen during training.
    train_drops = np.setdiff1d(clean.columns, 
                               clean_test.columns).tolist()
    clean = clean.drop(train_drops, axis = 1)
    
    # Apply encoder
    clean_pep_pro = clean.iloc[:, :clean.shape[1] - 5]
    clean_pep_pro = encoder.predict(clean_pep_pro)
    clean_encoded = pd.DataFrame(clean_pep_pro)
    clean_encoded = clean_encoded.set_index(clean.index)
    
    clean = clean_encoded.join(clean['visit_month'])

    return clean

In [20]:
#test = prepare_features(train_clinical, train_protein, train_peptide)
#test.head()

In [21]:
# Prediction and Formatting Function 
def get_prediction(clinical, protein, peptides, models):
    
    for u in target:
        # Add results column 
        clinical['result_' + str(u)] = 0
        
        # Get predictors
        X = prepare_features(clinical, protein, peptides)
        
        # Make predictions 
        clinical['result_' + str(u)] = models[u].predict(X.values)
        
        preds = models[u].predict(X.values)
        
        #print(preds.shape)
        #print(clinical.shape)
        
    # Formatting 
    
    # Empty DF to store results. 
    result = pd.DataFrame()
    
    # Loop over months 
    for m in [0, 6, 12, 24]:
        # Loop over responses 
        for u in [1, 2, 3, 4]:

            temp = clinical[["visit_id", "result_updrs_" + str(u)]]
            temp["prediction_id"] = temp["visit_id"] + "_updrs_" + str(u) + "_plus_" + str(m) + "_months"
            temp["rating"] = temp["result_updrs_" + str(u)]
            temp = temp [['prediction_id', 'rating']]

            result = pd.concat([result, temp])            
    result = result.drop_duplicates(subset=['prediction_id', 'rating'])

    return result

In [22]:
#train_preds = get_prediction(train_clinical, train_protein, train_peptide, models)
#train_preds

In [23]:
import amp_pd_peptide_310
amp_pd_peptide_310.make_env.func_dict['__called__'] = False
env = amp_pd_peptide_310.make_env()
iter_test = env.iter_test()

In [24]:
for (test_clinical, test_peptides, test_proteins, sample_submission) in iter_test:
    
    test_clinical = test_clinical.drop_duplicates(subset = ['visit_id'])
    
    results = get_prediction(test_clinical, test_proteins, test_peptides, models)

    results['rating'] = results['rating'].apply(np.round)
    
    print(results.head())
    
    env.predict(results)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
Preprocessing Steps
1. Add Peptide/Protein as new feature.
3. Dropped patient_id as a predictor.
4. Normalized numeric predictors.
5. KNN Imputation
Preprocessing Steps
1. Add Peptide/Protein as new feature.
3. Dropped patient_id as a predictor.
4. Normalized numeric predictors.
5. KNN Imputation
Preprocessing Steps
1. Add Peptide/Protein as new feature.
3. Dropped patient_id as a predictor.
4. Normalized numeric predictors.
5. KNN Imputation
Preprocessing Steps
1. Add Peptide/Protein as new feature.
3. Dropped patient_id as a predictor.
4. Normalized numeric predictors.
5. KNN Imputation
                   prediction_id  rating
0   3342_0_updrs_1_plus_0_months     6.0
4  50423_0_updrs_1_plus_0_months     6.0
0   3342_0_updrs_2_plus_0_months     4.0
4  50423_0_updrs_2_plus_0_months     4.0
0   3342_0_updrs_3_plus_0_months    18.0
Preprocessing Steps
1. Add Peptid