In [2]:
import onnx

from deeplc import DeepLC
import tensorflow as tf
import tf2onnx
import numpy as np





In [3]:
dlc = DeepLC()
keras_model = dlc.model
keras_model

['D:\\OneDrive - UGent\\Python Codes\\3Projects\\DeepLC_ONNX\\deeplc\\mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.keras']

In [4]:
model = tf.keras.models.load_model(keras_model[0])
model.input_shape

[(None, 60, 6), (None, 30, 6), (None, 55), (None, 60, 20)]

In [5]:
onnx_model_path = "deeplc.onnx"
spec = [
    tf.TensorSpec([None, 60, 6], tf.float32, name="input_1"),
    tf.TensorSpec([None, 30, 6], tf.float32, name="input_2"),
    tf.TensorSpec([None, 55], tf.float32, name="input_3"),
    tf.TensorSpec([None, 60, 20], tf.float32, name="input_4"),
]

onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13, output_path=onnx_model_path)


rewriter <function rewrite_constant_fold at 0x000002F3075BF380>: exception `np.cast` was removed in the NumPy 2.0 release. Use `np.asarray(arr, dtype=dtype)` instead.


In [6]:
from onnx2torch import convert
import torch

onnx_model = onnx.load(onnx_model_path)
pytorch_onnx_model = convert(onnx_model)

In [7]:
input_tensors = (
    torch.randn(1, 60, 6),  # Input 1
    torch.randn(1, 30, 6),  # Input 2
    torch.randn(1, 55),     # Input 3
    torch.randn(1, 60, 20)  # Input 4
)

# Run PyTorch model
output = pytorch_onnx_model(*input_tensors)
print("PyTorch Model Output:", output)

PyTorch Model Output: tensor([[26.4123]], grad_fn=<AddBackward0>)


In [8]:
import numpy as np
import onnxruntime as ort
from deeplc import DeepLC
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io.peptide_record import peprec_to_proforma

# Initialize DeepLC
dlc = DeepLC()

# Example peptide sequences with modifications
peptides = ["AAGPSLSHTSGGTQSK", "AAINQKLIETGER", "AANDAGYFNDEMAPIEVKTK"]
modifications = ["", "6|Acetyl", "12|Oxidation|18|Acetyl"]
identifiers = ["peptide1", "peptide2", "peptide3"]

# Convert peptide + modifications into ProForma notation
list_of_psms = [
    PSM(peptidoform=peprec_to_proforma(seq, mod), spectrum_id=ident)
    for seq, mod, ident in zip(peptides, modifications, identifiers)
]

# Convert to a PSMList
psm_list = PSMList(psm_list=list_of_psms)

# Extract numerical features using DeepLC
feature_dict = dlc.do_f_extraction_psm_list(psm_list)

# Ensure extracted features match ONNX expected shape
input_1 = np.stack(list(feature_dict["matrix"].values())).astype(np.float32)  # [batch, 60, 6]
input_2 = np.stack(list(feature_dict["matrix_sum"].values())).astype(np.float32)  # [batch, 30, 6]
input_3 = np.stack(list(feature_dict["matrix_all"].values())).astype(np.float32)  # [batch, ?] (Fix applied below)
input_4 = np.stack(list(feature_dict["pos_matrix"].values())).astype(np.float32)  # [batch, ?] (Fix applied below)

# 🔥 Fix `input_3`: Ensure it has exactly 55 features
expected_input_3_dim = 55
if input_3.shape[1] != expected_input_3_dim:
    padded_input_3 = np.zeros((input_3.shape[0], expected_input_3_dim), dtype=np.float32)
    padded_input_3[:, :input_3.shape[1]] = input_3  # Fill with available data
    input_3 = padded_input_3

# 🔥 Fix `input_4`: Ensure it is **3D with shape [batch, 60, 20]**
expected_input_4_shape = (input_4.shape[0], 60, 20)

# 🛠️ Ensure input_4 is **at least 2D**
if input_4.ndim == 2:
    input_4 = np.expand_dims(input_4, axis=-1)  # Convert [batch, X] → [batch, X, 1]

# 🛠️ Now, reshape or pad to exactly `[batch, 60, 20]`
padded_input_4 = np.zeros(expected_input_4_shape, dtype=np.float32)

# Find minimum matching dimensions
min_dim1 = min(input_4.shape[1], 60)
min_dim2 = min(input_4.shape[2], 20)

# Fill with available data
padded_input_4[:, :min_dim1, :min_dim2] = input_4[:, :min_dim1, :min_dim2]
input_4 = padded_input_4  # Replace with corrected array

# Load ONNX model
onnx_model_path = "deeplc.onnx"
ort_session = ort.InferenceSession(onnx_model_path)

# Prepare inputs for ONNX
onnx_inputs = {
    "input_1": input_1,  # [batch, 60, 6]
    "input_2": input_2,  # [batch, 30, 6]
    "input_3": input_3,  # [batch, 55]  ✅ Now correctly sized
    "input_4": input_4   # [batch, 60, 20]  ✅ Now correctly sized
}

# Run ONNX model
onnx_outputs = ort_session.run(None, onnx_inputs)

# Print output predictions
print("ONNX Model Output:", onnx_outputs)


ONNX Model Output: [array([[-18.030432],
       [ 43.959698],
       [ 58.209183]], dtype=float32)]


In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import onnx
import onnxruntime as ort
from onnx2torch import convert
from sklearn.model_selection import train_test_split

# Import DeepLC and related utilities for feature extraction
from deeplc import DeepLC
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io.peptide_record import peprec_to_proforma
import copy

# Set device for PyTorch computations
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

csv_file = "ATLANTIS_SILICA_fixed_mods.csv"
data = pd.read_csv(csv_file, keep_default_na=False)

# Create a list of PSM objects from the CSV data.
psm_list = PSMList(psm_list=[
    PSM(peptidoform=peprec_to_proforma(row["seq"], row["modifications"]), spectrum_id=str(idx))
    for idx, row in data.iterrows()
])
dlc = DeepLC()
feature_dict = dlc.do_f_extraction_psm_list(psm_list)

input_1 = np.stack(list(feature_dict["matrix"].values())).astype(np.float32)
input_2 = np.stack(list(feature_dict["matrix_sum"].values())).astype(np.float32)
input_3 = np.stack(list(feature_dict["matrix_all"].values())).astype(np.float32)
input_4 = np.stack(list(feature_dict["pos_matrix"].values())).astype(np.float32)


expected_input_3_dim = 55
if input_3.shape[1] != expected_input_3_dim:
    padded_input_3 = np.zeros((input_3.shape[0], expected_input_3_dim), dtype=np.float32)
    padded_input_3[:, :input_3.shape[1]] = input_3
    input_3 = padded_input_3

# Ensure input_4 is a 3D tensor with shape [batch, 60, 20].
expected_input_4_shape = (input_4.shape[0], 60, 20)
if input_4.ndim == 2:
    input_4 = np.expand_dims(input_4, axis=-1)
padded_input_4 = np.zeros(expected_input_4_shape, dtype=np.float32)
min_dim1 = min(input_4.shape[1], 60)
min_dim2 = min(input_4.shape[2], 20)
padded_input_4[:, :min_dim1, :min_dim2] = input_4[:, :min_dim1, :min_dim2]
input_4 = padded_input_4

y = data["tr"].values.astype(np.float32)

(train_input_1, test_input_1,
 train_input_2, test_input_2,
 train_input_3, test_input_3,
 train_input_4, test_input_4,
 y_train, y_test) = train_test_split(
    input_1, input_2, input_3, input_4, y, test_size=0.2, random_state=42
)

class DeepLCDataset(Dataset):
    def __init__(self, in1, in2, in3, in4, targets):
        self.in1 = in1
        self.in2 = in2
        self.in3 = in3
        self.in4 = in4
        self.targets = targets

    def __len__(self):
        return self.in1.shape[0]

    def __getitem__(self, idx):
        return (
            torch.tensor(self.in1[idx]),
            torch.tensor(self.in2[idx]),
            torch.tensor(self.in3[idx]),
            torch.tensor(self.in4[idx]),
            torch.tensor(self.targets[idx])
        )

train_dataset = DeepLCDataset(train_input_1, train_input_2, train_input_3, train_input_4, y_train)
test_dataset = DeepLCDataset(test_input_1, test_input_2, test_input_3, test_input_4, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [23]:
import copy

onnx_model_path = "deeplc.onnx"
onnx_model = onnx.load(onnx_model_path)
pretrained_model = convert(onnx_model)  # Convert ONNX model to PyTorch
fine_tuned_model = copy.deepcopy(pretrained_model)  # Create a copy for fine-tuning
pretrained_model.to(device)
fine_tuned_model.to(device)

# Note the optimizer now updates fine_tuned_model's parameters.
optimizer = optim.Adam(fine_tuned_model.parameters(), lr=1e-4)
loss_fn = nn.MSELoss()
num_epochs = 10

# Evaluation before transfer learning (using pretrained_model)
pretrained_model.eval()
initial_test_loss = 0.0
with torch.no_grad():
    for batch in test_loader:
        in1, in2, in3, in4, targets = batch
        in1 = in1.to(device)
        in2 = in2.to(device)
        in3 = in3.to(device)
        in4 = in4.to(device)
        targets = targets.to(device).view(-1, 1)
        outputs = pretrained_model(in1, in2, in3, in4)
        loss = loss_fn(outputs, targets)
        initial_test_loss += loss.item()
initial_test_loss /= len(test_loader)
print(f"Test Loss Before Transfer Learning: {initial_test_loss:.4f}")

# Fine-tuning the fine_tuned_model
fine_tuned_model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch in train_loader:
        in1, in2, in3, in4, targets = batch
        in1 = in1.to(device)
        in2 = in2.to(device)
        in3 = in3.to(device)
        in4 = in4.to(device)
        targets = targets.to(device).view(-1, 1)

        optimizer.zero_grad()
        outputs = fine_tuned_model(in1, in2, in3, in4)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")


Test Loss Before Transfer Learning: 1961.7255
Epoch 1/10, Loss: 97.4451
Epoch 2/10, Loss: 6.4018
Epoch 3/10, Loss: 4.5538
Epoch 4/10, Loss: 3.5531
Epoch 5/10, Loss: 2.8361
Epoch 6/10, Loss: 2.3910
Epoch 7/10, Loss: 2.0271
Epoch 8/10, Loss: 1.7389
Epoch 9/10, Loss: 1.5321
Epoch 10/10, Loss: 1.3355


In [24]:
fine_tuned_model.eval()
test_loss = 0.0
with torch.no_grad():
    for batch in test_loader:
        in1, in2, in3, in4, targets = batch
        in1 = in1.to(device)
        in2 = in2.to(device)
        in3 = in3.to(device)
        in4 = in4.to(device)
        targets = targets.to(device).view(-1, 1)
        outputs = fine_tuned_model(in1, in2, in3, in4)
        loss = loss_fn(outputs, targets)
        test_loss += loss.item()
avg_test_loss = test_loss / len(test_loader)
print(f"Test Loss: {avg_test_loss:.4f}")

Test Loss: 1.8959


In [25]:
pretrained_model.eval()
test_loss = 0.0
with torch.no_grad():
    for batch in test_loader:
        in1, in2, in3, in4, targets = batch
        in1 = in1.to(device)
        in2 = in2.to(device)
        in3 = in3.to(device)
        in4 = in4.to(device)
        targets = targets.to(device).view(-1, 1)
        outputs = pretrained_model(in1, in2, in3, in4)
        loss = loss_fn(outputs, targets)
        test_loss += loss.item()
avg_test_loss = test_loss / len(test_loader)
print(f"Test Loss: {avg_test_loss:.4f}")

Test Loss: 1961.7255
