In [1]:
import numpy as np
import pandas as pd
import dlomix
from dlomix import constants, data, eval, layers, models, pipelines, reports, utils
from dlomix.models import PrositIntensityPredictor
from dlomix.losses import masked_spectral_distance, masked_pearson_correlation_distance
from tensorflow.keras.layers.experimental import preprocessing
from dlomix.constants import ALPHABET_UNMOD
from dlomix.layers.attention import AttentionLayer, DecoderAttentionLayer
import tensorflow as tf
from tensorflow import keras
import wandb
# from prosit import  PrositIntensityPredictor
from wandb.keras import WandbCallback
import keras_nlp
print([x for x in dir(dlomix) if not x.startswith("_")])

2023-05-25 13:27:18.298781: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


['META_DATA', 'constants', 'data', 'eval', 'layers', 'losses', 'models', 'pipelines', 'reports', 'utils']


In [2]:
%load_ext autoreload

%autoreload 2

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [4]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [5]:
# policy = keras.mixed_precision.Policy("mixed_float16")
# keras.mixed_precision.set_global_policy(policy)

In [6]:
project_name = "transforming-prosit"

In [7]:
from dlomix.data import IntensityDataset

TRAIN_DATAPATH = 'https://raw.githubusercontent.com/wilhelm-lab/dlomix-resources/main/example_datasets/Intensity/proteomeTools_train_val.csv'
BATCH_SIZE = 64
int_data = IntensityDataset(data_source=TRAIN_DATAPATH, seq_length=30,
                            collision_energy_col='collision_energy', batch_size=BATCH_SIZE, val_ratio=0.2, test=False)

2023-05-25 13:27:39.884763: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 41964 MB memory:  -> device: 0, name: NVIDIA A40, pci bus id: 0000:81:00.0, compute capability: 8.6


In [8]:
in_sequence = tf.keras.layers.Input(shape=(30,))
in_collision_energy = tf.keras.layers.Input(shape=(6,))
in_precursor_charge = tf.keras.layers.Input(shape=(1,))

meta = tf.keras.layers.Concatenate(name="meta_in")([in_collision_energy,in_precursor_charge])
meta = tf.keras.layers.Dense(512, name="meta_dense")(meta)
meta = tf.keras.layers.Dropout(0.2, name="meta_dense_do")(meta)

x = preprocessing.StringLookup(vocabulary=list(ALPHABET_UNMOD.keys()))(in_sequence)
x = tf.keras.layers.Embedding(
            input_dim=len(ALPHABET_UNMOD) + 2,
            output_dim=16,
            input_length=30,
        )(x)
x = tf.keras.layers.Bidirectional(
                    tf.keras.layers.GRU(
                        units=256, return_sequences=True
                    )
                )(x)
x = tf.keras.layers.Dropout(rate=0.2)(x)
x = tf.keras.layers.GRU(units=512, return_sequences=True)(x)
x = tf.keras.layers.Dropout(rate=0.2)(x)
x = AttentionLayer(name="encoder_att")(x)
x = tf.keras.layers.Multiply(name="add_meta")([x, meta])
x = tf.keras.layers.RepeatVector(29, name="repeat")(x)

x = tf.keras.layers.GRU(
                    units=512,
                    return_sequences=True,
                    name="decoder")(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = DecoderAttentionLayer(29)(x)
x = tf.keras.layers.TimeDistributed(
                    tf.keras.layers.Dense(6), name="time_dense"
                )(x)
x = tf.keras.layers.LeakyReLU(name="activation")(x)
x = tf.keras.layers.Flatten(name="out")(x)

model = tf.keras.Model(inputs=[in_sequence, in_collision_energy, in_precursor_charge],outputs=x)

2023-05-25 13:28:11.926596: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-25 13:28:11.927617: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-25 13:28:11.928342: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 30)]         0           []                               
                                                                                                  
 string_lookup (StringLookup)   (None, 30)           0           ['input_1[0][0]']                
                                                                                                  
 embedding (Embedding)          (None, 30, 16)       352         ['string_lookup[0][0]']          
                                                                                                  
 bidirectional (Bidirectional)  (None, 30, 512)      420864      ['embedding[0][0]']              
                                                                                              

In [9]:
# wandb.init(project=project_name, name='test_on_csv')

In [10]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

In [11]:
model.compile(optimizer=optimizer, 
              loss=masked_spectral_distance, metrics=[masked_pearson_correlation_distance])

In [30]:
in_ = tf.keras.layers.Input(shape=(30,16))
z = keras_nlp.layers.TransformerEncoder(
        intermediate_dim=512,
        num_heads=4,
        dropout=0.1,
        layer_norm_epsilon=1e-5,
    )(in_)
z = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=2, dropout=0.2, output_shape=512)(z, z)

In [31]:
z.shape

TensorShape([None, 30, 512])

In [23]:
y.shape

TensorShape([None, 30, 512])