In [1]:
%load_ext autoreload

%autoreload 2

In [95]:
from prosit_t.eval import prosit_transformer_eval
import wandb
import tensorflow as tf
import plotly.io as pio
import os
from prosit_t.wandb_agent.train_utils import get_proteometools_data
from prosit_t.models import PrositTransformerV2
import pandas as pd
from dlomix.models import PrositIntensityPredictor

In [9]:
pio.renderers.default = "iframe"

In [10]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [11]:
data_config = {
    "dataset": "proteometools",
    "data_source": {
        "train": "/cmnfs/proj/prosit/Transformer/first_pool_train.parquet",
        "val": "/cmnfs/proj/prosit/Transformer/first_pool_test.parquet",
    },
    "fragmentation": "HCD",
    "batch_size": 1024,
    "seq_length": 30,
}

In [12]:
project_name = "transforming-prosit-first-pool"
run = wandb.init(project=project_name)

[34m[1mwandb[0m: Currently logged in as: [33mmamisashvili-lizi[0m ([33mprosit-compms[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [15]:
_, val_data = get_proteometools_data(data_config)

2023-09-25 09:55:17.462963: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 43640 MB memory:  -> device: 0, name: NVIDIA A40, pci bus id: 0000:e2:00.0, compute capability: 8.6


In [22]:
artifact_path = "prosit-compms/transforming-prosit-first-pool/model-hardy-pine-74:v144"
transformer = PrositTransformerV2(seq_length=30,embedding_output_dim=64,num_heads=16,
        num_transformers=6, dense_dim_factor=4)
transformer = prosit_transformer_eval.load_model(run,artifact_path,transformer)

[34m[1mwandb[0m:   5 of 5 files downloaded.  
2023-09-25 09:56:24.544700: W tensorflow/core/util/tensor_slice_reader.cc:97] Could not open ./artifacts/model-hardy-pine-74:v144: FAILED_PRECONDITION: artifacts/model-hardy-pine-74:v144; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


In [23]:
num_batches = 600
batch_size = 1024

In [77]:
data_list = prosit_transformer_eval.dataset_to_list(val_data.take(num_batches))
df = pd.DataFrame(data_list)

In [78]:
df = prosit_transformer_eval.process_df(df)

In [79]:
transformer_loss = prosit_transformer_eval.compute_losses(transformer, val_data, num_batches, batch_size)









In [80]:
df["Transformer_loss"] = transformer_loss

In [81]:
baseline_path = "prosit-compms/transforming-prosit-first-pool/model-classic-star-15:v45"
baseline = PrositIntensityPredictor(seq_length=30,embedding_output_dim=16,
        recurrent_layers_sizes=(256, 512))
baseline = prosit_transformer_eval.load_model(run,baseline_path,baseline)

[34m[1mwandb[0m:   4 of 4 files downloaded.  
2023-09-25 13:07:21.502644: W tensorflow/core/util/tensor_slice_reader.cc:97] Could not open ./artifacts/model-classic-star-15:v45: FAILED_PRECONDITION: artifacts/model-classic-star-15:v45; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


In [82]:
baseline_loss = prosit_transformer_eval.compute_losses(baseline, val_data, num_batches, batch_size)









In [83]:
df["Baseline_loss"] = baseline_loss

In [84]:
violin_sequence_length = prosit_transformer_eval.violin_plot_per_feature_val(
    df,
    ["Baseline_loss", "Transformer_loss"],
    "sequence_length",
    title="Spectral Distance Stratified on Sequence Length"
)
violin_sequence_length.show()

In [85]:
violin_charge = prosit_transformer_eval.violin_plot_per_feature_val(
    df,
    ["Baseline_loss", "Transformer_loss"],
    "precursor_charge_int",
    title="Spectral Distance Stratified on Precursor Charge"
)
violin_charge.show()

In [86]:
violin_ce = prosit_transformer_eval.violin_plot_per_feature_val(
    df,
    ["Baseline_loss", "Transformer_loss"],
    "collision_energy_range",
    title="Spectral Distance Stratified on Collision Energy Ranges"
)
violin_ce.show()

In [98]:
charge_histograms_transformer = prosit_transformer_eval.histogram_per_feature_val(
    df,
    "Transformer_loss",
    "precursor_charge_int",
    2,
    3
)
charge_histograms.show()

In [99]:
seq_length_histograms_transformer = prosit_transformer_eval.histogram_per_feature_val(
    df,
    "Transformer_loss",
    "sequence_length",
    4,
    6
)
seq_length_histograms_transformer.show()