In [1]:
%load_ext autoreload

%autoreload 2

In [23]:
import tensorflow as tf
import wandb
import pandas as pd
from prosit_t.wandb_agent.train_utils import get_proteometools_data
from dlomix.losses import masked_spectral_distance
import os
from prosit_t.models import PrositTransformerV2
from dlomix.models import PrositIntensityPredictor
import numpy as np
import tensorflow.keras.backend as K
from prosit_t.inference.visualization import compare_spectra, compare_multiple_spectra

In [3]:
import plotly.io as pio
pio.renderers.default = "iframe"
import plotly.express as px

In [5]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [6]:
data_config = {
    "dataset": "proteometools",
    "data_source": {
        "train": "/cmnfs/proj/prosit/Transformer/first_pool_train.parquet",
        "val": "/cmnfs/proj/prosit/Transformer/first_pool_test.parquet",
    },
    "fragmentation": "HCD",
    "batch_size": 1024,
    "seq_length": 30,
}

In [7]:
project_name = "transforming-prosit-first-pool"
run = wandb.init(project=project_name)

[34m[1mwandb[0m: Currently logged in as: [33mmamisashvili-lizi[0m ([33mprosit-compms[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
_, val_data = get_proteometools_data(data_config)

2023-09-26 09:15:46.473210: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 43640 MB memory:  -> device: 0, name: NVIDIA A40, pci bus id: 0000:21:00.0, compute capability: 8.6


In [32]:
artifact_path = "prosit-compms/transforming-prosit-first-pool/model-swept-monkey-124:v80"
artifact = run.use_artifact(artifact_path)
model_dir = artifact.download()
transformer = PrositTransformerV2(
    seq_length=30,
    embedding_output_dim=64,
    num_heads=8,
    num_transformers=6,
    dense_dim_factor=4
)
transformer.load_weights(model_dir)

[34m[1mwandb[0m:   5 of 5 files downloaded.  
2023-09-26 10:21:27.708700: W tensorflow/core/util/tensor_slice_reader.cc:97] Could not open ./artifacts/model-swept-monkey-124:v80: FAILED_PRECONDITION: artifacts/model-swept-monkey-124:v80; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f048b9480a0>

In [10]:
baseline_path = "prosit-compms/transforming-prosit-first-pool/model-classic-star-15:v19"
baseline_artifact = run.use_artifact(baseline_path)
baseline_dir = baseline_artifact.download()

baseline = PrositIntensityPredictor(seq_length=30,embedding_output_dim=16,
        recurrent_layers_sizes=(256, 512))

baseline.load_weights(baseline_dir)

[34m[1mwandb[0m:   4 of 4 files downloaded.  
2023-09-26 09:20:30.019527: W tensorflow/core/util/tensor_slice_reader.cc:97] Could not open ./artifacts/model-classic-star-15:v19: FAILED_PRECONDITION: artifacts/model-classic-star-15:v19; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7ef84773fb50>

In [33]:
batch_idx = 0
sample_idx = 456
batch_x, batch_y = [x for x in val_data.skip(batch_idx).take(1)][0]

transformer_pred = transformer.predict(batch_x)
transformer_loss = masked_spectral_distance(batch_y, transformer_pred)
transformer_loss = tf.round(transformer_loss * 1000) / 1000

baseline_pred = baseline.predict(batch_x)
baseline_loss = masked_spectral_distance(batch_y, baseline_pred)
baseline_loss = tf.round(baseline_loss * 1000) / 1000

compare_multiple_spectra(
    batch_y[sample_idx],
    [transformer_pred[sample_idx], baseline_pred[sample_idx]],
    ["Transformer", "Baseline"],
    [transformer_loss[sample_idx], baseline_loss[sample_idx]]
)



2023-09-26 10:21:33.860443: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:693] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "NVIDIA A40" frequency: 1740 num_cores: 84 environment { key: "architecture" value: "8.6" } environment { key: "cuda" value: "11080" } environment { key: "cudnn" value: "8600" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 6291456 shared_memory_size_per_multiprocessor: 102400 memory_size: 45760577536 bandwidth: 696096000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }




In [34]:
batch_idx = 552
sample_idx = 396
batch_x, batch_y = [x for x in val_data.skip(batch_idx).take(1)][0]

transformer_pred = transformer.predict(batch_x)
transformer_loss = masked_spectral_distance(batch_y, transformer_pred)
transformer_loss = tf.round(transformer_loss * 1000) / 1000

baseline_pred = baseline.predict(batch_x)
baseline_loss = masked_spectral_distance(batch_y, baseline_pred)
baseline_loss = tf.round(baseline_loss * 1000) / 1000

compare_multiple_spectra(
    batch_y[sample_idx],
    [transformer_pred[sample_idx], baseline_pred[sample_idx]],
    ["Transformer", "Baseline"],
    [transformer_loss[sample_idx], baseline_loss[sample_idx]]
)

