In [8]:
!pip install ipywidgets

import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import requests
from bs4 import BeautifulSoup
import os
import zipfile
from tqdm import tqdm
from pathlib import Path

import json

from collections import defaultdict
import toml

from proteobench.modules.quant.quant_lfq_ion_DDA import DDAQuantIonModule
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.plotting import plot_quant



In [2]:
# Examples to choose from:
#
# "https://api.github.com/repos/Proteobench/Results_quant_ion_DDA/contents/",
# "https://api.github.com/repos/Proteobench/Results_quant_peptidoform_DIA/contents/",
# "https://api.github.com/repos/Proteobench/Results_subcellprofile_DOMLFQ_protein_DIA_EXPL/contents/",
# "https://api.github.com/repos/Proteobench/Results_quant_peptidoform_DDA/contents/",
# "https://api.github.com/repos/Proteobench/Results_quant_ion_DIA_diaPASEF/contents/",
# "https://api.github.com/repos/Proteobench/Results_quant_ion_DIA/contents/",
# "https://api.github.com/repos/Proteobench/Results_quant_ion_DIA_singlecell/contents/",

repo_url = "https://api.github.com/repos/Proteobench/Results_quant_ion_DDA/contents/"
    
file_names = []

response = requests.get(repo_url)

if response.status_code == 200:
    repo_contents = response.json()

    for item in repo_contents:
        file_name = item.get("name")
        if file_name:
            file_names.append(file_name)    
else:
    print(f"Failed to retrieve the repository contents. Status code: {response.status_code}")
    print(repo_url)
        
file_names = [f for f in file_names if f.endswith(".json")]
json_files_content = []

for f in file_names:
    url = f"https://raw.githubusercontent.com/Proteobot/Results_quant_ion_DDA/refs/heads/main/{f}"
    response = requests.get(url)
    response.raise_for_status()  # Raise an error if the request fails

    data = response.json()
    
    json_files_content.append(data)
    
df = pd.json_normalize(json_files_content)

df.head(5)

Unnamed: 0,id,old_new,software_name,software_version,search_engine,search_engine_version,ident_fdr_psm,ident_fdr_peptide,ident_fdr_protein,enable_match_between_runs,...,results.2.CV_q75,results.2.CV_q95,results.1.median_abs_epsilon,results.1.mean_abs_epsilon,results.1.variance_epsilon,results.1.nr_prec,results.1.CV_median,results.1.CV_q90,results.1.CV_q75,results.1.CV_q95
0,MaxQuant_20241216_122433,old,MaxQuant,2.5.1.0,Andromeda,,,0.01,0.01,False,...,0.291818,0.512293,0.199852,0.274547,0.160229,50302,0.202619,0.416519,0.291818,0.512293
1,MaxQuant_20241216_124040,old,MaxQuant,2.3.1.0,Andromeda,,,0.01,0.01,True,...,0.312472,0.569765,0.207123,0.304261,0.222469,50339,0.215507,0.46077,0.312472,0.569765
2,i2MassChroQ_20241216_124142,old,i2MassChroQ,1.0.16,X! Tandem,X! Tandem Alanine (2017.2.1.4),0.008998,0.011963,0.009873,False,...,0.269583,0.61368,0.211233,0.329852,0.255909,77949,0.130385,0.477048,0.269583,0.61368
3,AlphaPept_20241217_071436,old,AlphaPept,0.5.0,AlphaPept,0.5.0,,0.01,0.01,True,...,0.279738,0.600532,0.224729,0.350489,0.334572,59868,0.164423,0.465654,0.279738,0.600532
4,MaxQuant_20241216_122819,old,MaxQuant,1.5.8.2,Andromeda,,,0.01,0.01,True,...,0.317542,0.597572,0.209998,0.324032,0.276706,49679,0.217079,0.475874,0.317542,0.597572


In [3]:
# Directory to save the unzipped files
unzip_dir = "temp_results"
os.makedirs(unzip_dir, exist_ok=True)

hash_vis_dirs = {}

# Loop over each hash and download the corresponding zip file, unzip, and remove it
for ihash in df["intermediate_hash"]:
    result_path = os.path.join(unzip_dir,ihash)
    if os.path.isdir(result_path):
        hash_vis_dirs[ihash] = result_path
        continue
    
    os.makedirs(result_path, exist_ok=True)
    
    url = f"https://proteobench.cubimed.rub.de/datasets/{ihash}/{ihash}_data.zip"
    
    # Send the GET request to download the file
    response = requests.get(url)

    if response.status_code == 200:
        # Temporary file path for the zip file
        zip_file_path = os.path.join(result_path, f"{ihash}.zip")
        
        # Write the zip file to disk
        with open(zip_file_path, 'wb') as f:
            f.write(response.content)
        
        # Now unzip the file into the desired directory
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(result_path)
        
        # After extracting, remove the zip file
        os.remove(zip_file_path)
        hash_vis_dirs[ihash] = result_path
        print(f"Downloaded, unzipped, and deleted {ihash}.zip")
    else:
        print(f"Failed to download {ihash}.zip - Status code: {response.status_code}")

In [4]:
# Create a SelectMultiple widget with names as options
row_selector = widgets.SelectMultiple(
    options=[(f"{row['id']} (hash: {row['intermediate_hash']}, submission comments: {row['submission_comments']})", idx) for idx, row in df.iterrows()],
    description='Select Rows:',
    rows=10,  # Number of visible rows in the widget
    layout=widgets.Layout(width='50%')  # Adjust layout as needed
)

# Button to confirm selection
button = widgets.Button(description='Filter Rows')

# Output widget to display the filtered DataFrame
output = widgets.Output()

# Callback for filtering rows
def on_button_click(b):
    with output:
        output.clear_output()
        selected_indices = list(row_selector.value)
        global filtered_df  # Store filtered DataFrame globally
        filtered_df = df.iloc[selected_indices]
        print("Filtered DataFrame:")
        display(filtered_df)

# Attach callback
button.on_click(on_button_click)

# Display the widgets
display(row_selector, button, output)

SelectMultiple(description='Select Rows:', layout=Layout(width='50%'), options=(('MaxQuant_20241216_122433 (ha…

Button(description='Filter Rows', style=ButtonStyle())

Output()

In [10]:
location = hash_vis_dirs[filtered_df["intermediate_hash"].iloc[0]]
software_name = filtered_df["software_name"].iloc[0]

# List all files in the directory
all_files = os.listdir(location)

# Filter for files that start with 'input_file' and ignore their extensions
matching_file = os.path.join(location,[f for f in all_files if f.startswith('input_file') and os.path.isfile(os.path.join(location, f))][0])
matching_file_params = os.path.join(location,[f for f in all_files if f.startswith('param') and os.path.isfile(os.path.join(location, f))][0])

user_config = defaultdict(lambda: "")

module_obj = DDAQuantIonModule(token="")
results_df = module_obj.obtain_all_data_points(all_datapoints=None)

results_performance, all_datapoints, result_df = module_obj.benchmarking(
    matching_file, software_name, user_input=user_config, all_datapoints=[]
)

fig1 = plot_quant.PlotDataPoint.plot_CV_violinplot(results_performance)
fig1.show()

parse_settings = ParseSettingsBuilder(
                parse_settings_dir="../../proteobench/io/parsing/io_parse_settings/Quant/lfq/DDA/ion/", module_id="quant_lfq_DDA_ion"
            ).build_parser(software_name)

fig2 = plot_quant.PlotDataPoint.plot_fold_change_histogram(results_performance,parse_settings.species_expected_ratio())
fig2.show()