In [1]:
from proteobench.modules.denovo.denovo_lfq_DDA_HCD import DDAHCDDeNovoModule
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.io.parsing.parse_denovo import load_input_file
from proteobench.score.denovo.denovoscores import DenovoScores
from proteobench.datapoint.denovo_datapoint import DenovoDatapoint
from proteobench.io.params.adanovo import extract_params as extract_params_adanovo
import pandas as pd

from psm_utils import Peptidoform
import os

import numpy as np
from typing import List, Dict, Tuple

In [2]:
root = '/home/samva/Doctorate/Benchmarking/de_novo_module/ProteoBench/test/data/denovo'
path_to_config = '/home/samva/Doctorate/Benchmarking/de_novo_module/ProteoBench/test/params/denovo/adanovo/config.yaml'
path_to_gt = '/home/samva/Doctorate/Benchmarking/de_novo_module/ProteoBench/proteobench/io/parsing/io_parse_settings/denovo/lfq/DDA/HCD/module_settings.csv'
module = DDAHCDDeNovoModule(
    token=''
)

In [3]:
# Parse the parameters
user_input = extract_params_adanovo(
    file_path=path_to_config
)

In [4]:
module.benchmarking(
    input_file_loc=os.path.join(root, 'adanovo', 'denovo_benchmark_test.adanovo.mztab'),
    input_format='AdaNovo',
    user_input=user_input.__dict__,
    all_datapoints=None,
)

{'peptide': {'exact': {'precision': 0.3738888888888889, 'recall': 0.3738888888888889, 'coverage': 1.0}, 'mass': {'precision': 0.672, 'recall': 0.672, 'coverage': 1.0}}, 'aa': {'exact': {'precision': 0.8266411727214786, 'recall': np.float64(0.8228027666417023), 'coverage': np.float64(0.9953566236398079)}, 'mass': {'precision': 0.8266411727214786, 'recall': np.float64(0.8228027666417023), 'coverage': np.float64(0.9953566236398079)}}}


(      spectrum_id                    proforma  \
 0               0                     AEEHANR   
 1               1                  GNAGGLHHHR   
 2               2                 HHVLHHQTVDK   
 3               3                      HHNLLR   
 4               4                    HHSTLPQK   
 ...           ...                         ...   
 8995         8995               GWSTDGVPTDETR   
 8996         8996  AVEQAVNSGALAVN[UNIMOD:7]NR   
 8997         8997                VDSDDSTGSLFR   
 8998         8998                SNLEYMVGDESK   
 8999         8999            AVEQAVNSGALAVDNR   
 
                                             peptidoform     score  \
 0     ((A, None), (E, None), (E, None), (H, None), (...  0.988760   
 1     ((G, None), (N, None), (A, None), (G, None), (...  0.996438   
 2     ((H, None), (H, None), (V, None), (L, None), (...  0.986840   
 3     ((H, None), (H, None), (N, None), (L, None), (...  0.998590   
 4     ((H, None), (H, None), (S, None), (T, Non

In [3]:
# Load input
input_df = load_input_file(
    input_path=os.path.join(root, 'adanovo', 'denovo_benchmark_test.adanovo.mztab'),
    input_format='AdaNovo'
)

# Get parser
parse_settings = ParseSettingsBuilder(
    parse_settings_dir=module.parse_settings_dir,
    module_id=module.module_id
).build_parser('AdaNovo')

# Convert to standard format and load the ground-truth
sf = parse_settings.convert_to_standard_format(
    df = input_df
)

In [4]:
sf['peptidoform_ground_truth'][0]

Peptidoform('AEEHANR')

In [None]:

gt = pd.read_csv(path_to_gt)
gt = gt.rename(
    columns={
        'peptidoform': 'peptidoform_gt'
    }
)
gt['peptidoform_gt'] = gt.peptidoform_gt.apply(lambda x: Peptidoform(x))
sf = sf.rename(
    columns={
        'peptidoform': 'peptidoform_dn'
    }
)
_ = sf.pop('rescoring_features')
sf['spectrum_id'] = sf['spectrum_id'].astype(int)
input_to_scores = gt.merge(sf, on='spectrum_id')
input_to_scores = input_to_scores.loc[:, 
    [
        'peptidoform_gt',
        'peptidoform_dn',
        'spectrum_id',
        'score',
        'aa_scores',
        'rescoring_features'
    ]
]

# Get the intermediate
denovo_scores = DenovoScores()
intermediate = denovo_scores.generate_intermediate(
    input_to_scores
)

# Generate the datapoint
current_datapoint = DenovoDatapoint.generate_datapoint(
    intermediate=intermediate,
    input_format='AdaNovo',
    user_input=user_input.__dict__,
)

{'peptide': {'exact': {'precision': 0.3738888888888889, 'recall': 0.3738888888888889, 'coverage': 1.0}, 'mass': {'precision': 0.672, 'recall': 0.672, 'coverage': 1.0}}, 'aa': {'exact': {'precision': 0.8266411727214786, 'recall': np.float64(0.8228027666417023), 'coverage': np.float64(0.9953566236398079)}, 'mass': {'precision': 0.8266411727214786, 'recall': np.float64(0.8228027666417023), 'coverage': np.float64(0.9953566236398079)}}}
