In [1]:
%load_ext autoreload

%autoreload 2

In [19]:
import numpy as np
import pandas as pd
import dlomix
from dlomix import constants, data, eval, layers, models, pipelines, reports, utils
from dlomix.models import PrositIntensityPredictor
from dlomix.losses import masked_spectral_distance, masked_pearson_correlation_distance
import tensorflow as tf
from tensorflow import keras
import wandb
from wandb.keras import WandbCallback
import keras_nlp
import seaborn as sns
print([x for x in dir(dlomix) if not x.startswith("_")])

['META_DATA', 'constants', 'data', 'eval', 'layers', 'losses', 'models', 'pipelines', 'reports', 'utils']


In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [4]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [5]:
project_name = "transforming-prosit"

In [17]:
from dlomix.data import IntensityDataset

TRAIN_DATAPATH = 'https://raw.githubusercontent.com/wilhelm-lab/dlomix-resources/main/example_datasets/Intensity/proteomeTools_train_val.csv'
BATCH_SIZE = 64
int_data = IntensityDataset(data_source=TRAIN_DATAPATH, seq_length=30,
                            collision_energy_col='collision_energy', batch_size=BATCH_SIZE, val_ratio=0.2, test=False)

In [7]:
wandb.init(project=project_name, name='baseline')

model = PrositIntensityPredictor(seq_length=30,embedding_output_dim=16,
        recurrent_layers_sizes=(256, 512))

[34m[1mwandb[0m: Currently logged in as: [33mmamisashvili-lizi[0m ([33mprosit-compms[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(optimizer=optimizer, 
              loss=masked_spectral_distance, metrics=[masked_pearson_correlation_distance])

In [9]:
%%time
history = model.fit(int_data.train_data, validation_data=int_data.val_data,
                    epochs=1, callbacks=[WandbCallback()])





2023-07-24 13:16:37.614068: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-07-24 13:16:38.658512: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8801




[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model in the h5py format. The model will be saved as as an W&B Artifact in the 'tf' format.






INFO:tensorflow:Assets written to: /cmnfs/home/l.mamisashvili/transforming-prosit/notebooks/wandb/run-20230724_131628-72s63o41/files/model-best/assets


INFO:tensorflow:Assets written to: /cmnfs/home/l.mamisashvili/transforming-prosit/notebooks/wandb/run-20230724_131628-72s63o41/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/cmnfs/home/l.mamisashvili/transforming-prosit/notebooks/wandb/run-20230724_131628-72s63o41/files/model-best)... Done. 0.2s


CPU times: user 23.3 s, sys: 1.32 s, total: 24.6 s
Wall time: 25 s


In [10]:
model.summary()

Model: "prosit_intensity_predictor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 string_lookup (StringLookup  multiple                 0         
 )                                                               
                                                                 
 embedding (Embedding)       multiple                  352       
                                                                 
 sequential (Sequential)     multiple                  4096      
                                                                 
 sequential_1 (Sequential)   (None, 30, 512)           1996800   
                                                                 
 sequential_2 (Sequential)   (None, 29, 512)           1576806   
                                                                 
 encoder_att (AttentionLayer  multiple                 542       
 )                                      

In [11]:
model.save("./prosit_model")









































































INFO:tensorflow:Assets written to: ./prosit_model/assets


INFO:tensorflow:Assets written to: ./prosit_model/assets


### Test Model

In [18]:
# create the dataset object for test data
from dlomix.data import IntensityDataset

TEST_DATAPATH = 'https://raw.githubusercontent.com/wilhelm-lab/dlomix-resources/tasks/intensity/example_datasets/Intensity/proteomeTools_test.csv'

test_int_data = IntensityDataset(data_source=TEST_DATAPATH,
                              seq_length=30, collision_energy_col='collision_energy',batch_size=32, test=True)

In [13]:
load_model = tf.keras.models.load_model("./prosit_model", custom_objects={
            "masked_spectral_distance": masked_spectral_distance,
            "masked_pearson_correlation_distance": masked_pearson_correlation_distance
        })

TypeError: Dimension value must be integer or None or have an __index__ method, got value 'TensorShape([None, 1])' with type '<class 'tensorflow.python.framework.tensor_shape.TensorShape'>'