In [1]:
%load_ext autoreload

%autoreload 2

In [40]:
import time
import tensorflow as tf
import wandb
import pandas as pd
from prosit_t.wandb_agent.train_utils import get_proteometools_data
import seaborn as sns 
from dlomix.losses import masked_spectral_distance, masked_pearson_correlation_distance
import os
import matplotlib.pyplot as plt
from prosit_t.models import PrositTransformerV2
from dlomix.models import PrositIntensityPredictor
import numpy as np
import tensorflow.keras.backend as K
from prosit_t.inference.visualization import compare_spectra, compare_multiple_spectra

In [20]:
import plotly.io as pio
pio.renderers.default = "iframe"
import plotly.express as px

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# physical_devices = tf.config.list_physical_devices("GPU")
# tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [5]:
data_config = {
    "dataset": "proteometools",
    "data_source": {
        "train": "/cmnfs/proj/prosit/Transformer/first_pool_train.parquet",
        "val": "/cmnfs/proj/prosit/Transformer/first_pool_test.parquet",
    },
    "fragmentation": "HCD",
    "batch_size": 1024,
    "seq_length": 30,
}

In [6]:
project_name = "transforming-prosit-first-pool"
run = wandb.init(project=project_name)

[34m[1mwandb[0m: Currently logged in as: [33mmamisashvili-lizi[0m ([33mprosit-compms[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
_, val_data = get_proteometools_data(data_config)

2023-09-18 07:58:58.711321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 43640 MB memory:  -> device: 0, name: NVIDIA A40, pci bus id: 0000:21:00.0, compute capability: 8.6


In [8]:
artifact_path = "prosit-compms/transforming-prosit-first-pool/model-hardy-pine-74:v144"
artifact = run.use_artifact(artifact_path)
model_dir = artifact.download()
transformer = PrositTransformerV2(seq_length=30,embedding_output_dim=64,num_heads=8,
        num_transformers=6, dense_dim_factor=4)
transformer.load_weights(model_dir)

[34m[1mwandb[0m:   5 of 5 files downloaded.  
2023-09-18 07:59:17.431045: W tensorflow/core/util/tensor_slice_reader.cc:97] Could not open ./artifacts/model-hardy-pine-74:v144: FAILED_PRECONDITION: artifacts/model-hardy-pine-74:v144; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f34aa814c10>

In [9]:
baseline_path = "prosit-compms/transforming-prosit-first-pool/model-classic-star-15:v19"
baseline_artifact = run.use_artifact(baseline_path)
baseline_dir = baseline_artifact.download()

baseline = PrositIntensityPredictor(seq_length=30,embedding_output_dim=16,
        recurrent_layers_sizes=(256, 512))

baseline.load_weights(baseline_dir)

[34m[1mwandb[0m:   4 of 4 files downloaded.  
2023-09-18 07:59:18.708653: W tensorflow/core/util/tensor_slice_reader.cc:97] Could not open ./artifacts/model-classic-star-15:v19: FAILED_PRECONDITION: artifacts/model-classic-star-15:v19; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f34aaf41dc0>

In [10]:
batch_x, batch_y = [x for x in val_data.take(1)][0]

In [11]:
pred = transformer.predict(batch_x)
baseline_pred = baseline.predict(batch_x)

2023-09-18 07:59:20.001097: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:693] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "NVIDIA A40" frequency: 1740 num_cores: 84 environment { key: "architecture" value: "8.6" } environment { key: "cuda" value: "11080" } environment { key: "cudnn" value: "8600" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 6291456 shared_memory_size_per_multiprocessor: 102400 memory_size: 45760577536 bandwidth: 696096000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }




2023-09-18 07:59:20.629724: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




2023-09-18 07:59:23.576347: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8801




In [44]:
idx = 456
compare_multiple_spectra(batch_y[idx], [pred[idx], baseline_pred[idx]], ["Transformer", "Baseline"])

In [48]:
batch_x["sequence"][idx]

<tf.Tensor: shape=(30,), dtype=string, numpy=
array([b'E', b'S', b'S', b'I', b'I', b'A', b'P', b'A', b'P', b'A', b'E',
       b'D', b'V', b'D', b'T', b'P', b'P', b'R', b'', b'', b'', b'', b'',
       b'', b'', b'', b'', b'', b'', b''], dtype=object)>

In [49]:
batch_x["collision_energy"][idx]

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([0.29139948])>

In [52]:
batch_x["precursor_charge"][idx]

<tf.Tensor: shape=(6,), dtype=float32, numpy=array([0., 1., 0., 0., 0., 0.], dtype=float32)>

In [61]:
for i in batch_x["collision_energy"]:
    if i == 0.29139948:
        print(i)