# PGFinder Interactive Notebook

This notebook is a basic user interface to allow researchers less familiar with the command line to run PGFinder. Some compromises have been made to make it run as a Jupyter notebook on the free myBinder platform.

To use the code to analyse your data you must work from top to bottom on this notebook, following instructions as you go. The first step is to execute all of the "cells" in the notebook so they are ready for your input. To do this...

**Click *Kernel*>*Restart & Run All* on the menu, above.**

If you have any problems or suggestions, or would like to contribute a mass database, please raise an [issue here](https://github.com/Mesnage-Org/pgfinder/issues). This notebook runs on the latest release. Please review the [release notes](https://github.com/Mesnage-Org/pgfinder/releases).

In [1]:
import base64
import codecs
import io
import os
import uuid
import ipywidgets as widgets
from ipywidgets import HTML
from IPython.display import display
from ipysheet import from_dataframe
import pandas as pd

from pgfinder import matching, pgio, validation

# Get list of modifications
allowed_mods = validation.allowed_modifications()

# Get built in mass lists
mass_lists_path = './data/masses'
mass_lists = os.listdir(mass_lists_path)
mass_lists.append('Upload Custom')

# Main analysis function
def analysis(b):
    # Upload deconvoluted file
    # This widget returns a dictionary of details of uploaded files
    # However, the widget is restricted to allow only one file
    # Hence `.value[0]`
    print(f"~#~#~#~#~#~#~#~#~#~# data_uploader :\n{data_uploader}")
    print(f"~#~#~#~#~#~#~#~#~#~# data_uploader.children :\n{data_uploader.children}")
    print(f"~#~#~#~#~#~#~#~#~#~# data_uploader.children[0] :\n{data_uploader.children[0]}")
    print(f"~#~#~#~#~#~#~#~#~#~# data_uploader.children[0].value :\n{data_uploader.children[0].value}")
    print(f"~#~#~#~#~#~#~#~#~#~# data_uploader.children[0].value[0] :\n{data_uploader.children[0].value[0]}")
    
    uploaded_df = pgio.ms_upload_reader(data_uploader.children[0].value[0])
    
    # Load mass list
    if rb_masses.value == 'Upload Custom':
        theo_masses = pgio.theo_masses_upload_reader(mass_uploader.children[0].value[0])
    else:
        csv_filepath = os.path.join(mass_lists_path, rb_masses.value)
        theo_masses = pgio.theo_masses_reader(csv_filepath)

    # Load ppm value
    user_ppm = ppm_tol.value

    # Load time delta value
    user_time_delta = time_delta.value

    # Make sure mod list is a list
    mod_list = list(selector_mods.value)
    
    results = matching.data_analysis(uploaded_df, theo_masses, user_time_delta, mod_list, user_ppm)
    
    # Make the download button
    results_csv_str = results.to_csv()
    filename = pgio.default_filename()
    results_csv_str = pgio.dataframe_to_csv_metadata(output_dataframe=results)
    b64 = base64.b64encode(results_csv_str.encode())
    payload = b64.decode()

    html_buttons = '''<html>
    <head>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    </head>
    <body>
    <a download="{filename}" href="data:text/csv;base64,{payload}" download>
    <button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning">Download File</button>
    </a>
    </body>
    </html>
    '''

    html_button = html_buttons.format(payload=payload,filename=filename)
    display(HTML(html_button))
    
# Define widgets

# A composite widget for picking a file and displaying its name
def named_file_upload(accept, description):
    file_upload = widgets.FileUpload(
        accept = accept, 
        description = description,
        multiple = False,
        layout = big_button
    )
    file_name = widgets.Label(value="No file selected...")
    def handle_file_upload(file):
        file_name.value = file["new"][0]["name"]
    file_upload.observe(handle_file_upload, names='value')
    return widgets.HBox([file_upload, file_name])

# Layout for a bigger button
big_button = widgets.Layout(width='auto')

# Style for wider description
wide_style = {'description_width': 'initial'}

# Deconvoluted Data file upload
data_uploader = named_file_upload('.txt,.ftrs', 'Upload Deconvoluted Data')

# Modifcation selector
selector_mods = widgets.SelectMultiple(
    options = allowed_mods,
    description = 'Modification',
    disabled = False
)

# Mass library selector
rb_masses = widgets.RadioButtons(
    options = mass_lists,
    description = 'Mass List',
    disabled = False
)

# Mass library file upload
mass_uploader = named_file_upload('.csv', 'Upload Mass Library')

# Set PPM tolerance
ppm_tol = widgets.BoundedFloatText(
    value = 10.0,
    min = 1,
    max = 100,
    step = 0.1,
    description = 'Set ppm tolerance',
    disabled = False,
    style = wide_style
    )

# Set time delta for in source clean up
time_delta = widgets.BoundedFloatText(
    value = 0.5,
    min = 0,
    max = 100,
    step = 0.01,
    description = 'Set time delta value',
    disabled = False,
    style = wide_style
)

# Analysis button
button = widgets.Button(description="Run Analysis")
button.on_click(analysis)

[Tue, 25 Apr 2023 12:38:05] [INFO    ] [pgfinder] Loaded parameters from file : config/parameters.yaml
[Tue, 25 Apr 2023 12:38:05] [INFO    ] [pgfinder] All parameters converted to decimal


## Step 1: Upload Deconvoluted Data
Click *Upload* to upload a `.txt` file output by MaxQuant ([example file](https://github.com/Mesnage-Org/pgfinder/raw/master/data/maxquant_test_data.txt)), or an `.ftrs` file.

In [None]:
str("something.txt")

In [2]:
display(data_uploader)

HBox(children=(FileUpload(value=(), accept='.txt,.ftrs', description='Upload Deconvoluted Data', layout=Layout…

## Step 2: Select Modifications
Select modifications (Hold down control / command and click to select mulitple items.)

In [3]:
display(selector_mods) 

SelectMultiple(description='Modification', options=('Sodium', 'Potassium', 'Anh', 'DeAc', 'DeAc_Anh', 'Nude', …

## Step 3: Select or Upload Mass Library

### Select

In [4]:
display(rb_masses)

RadioButtons(description='Mass List', options=('e_coli_monomer_masses.csv', 'c_diff_monomer_masses.csv', 'Uplo…

### (Optional) Upload Custom Mass Library
[Example mass library file.](https://raw.githubusercontent.com/Mesnage-Org/pgfinder/master/data/masses/e_coli_monomer_masses.csv)

In [5]:
display(mass_uploader)

HBox(children=(FileUpload(value=(), accept='.csv', description='Upload Mass Library', layout=Layout(width='aut…

## Step 4: Set PPM tolerance

In [6]:
display(ppm_tol)

BoundedFloatText(value=10.0, description='Set ppm tolerance', min=1.0, step=0.1, style=DescriptionStyle(descri…

## Step 5: Set time window for in-source decay and salt adduct clean up

In [7]:
display(time_delta)

BoundedFloatText(value=0.5, description='Set time delta value', step=0.01, style=DescriptionStyle(description_…

## Step 6: Run Analysis
Click run analysis.
After the analysis is complete, a download button will appear.

In [8]:
display(button)

Button(description='Run Analysis', style=ButtonStyle())

~#~#~#~#~#~#~#~#~#~# data_uploader :
HBox(children=(FileUpload(value=({'name': 'WT.ftrs', 'type': '', 'size': 11436032, 'content': <memory at 0x7fb2e18a9840>, 'last_modified': datetime.datetime(2023, 4, 25, 9, 5, 51, 33000, tzinfo=datetime.timezone.utc)},), accept='.txt,.ftrs', description='Upload Deconvoluted Data', layout=Layout(width='auto')), Label(value='WT.ftrs')))
~#~#~#~#~#~#~#~#~#~# data_uploader.children :
(FileUpload(value=({'name': 'WT.ftrs', 'type': '', 'size': 11436032, 'content': <memory at 0x7fb2e18a9840>, 'last_modified': datetime.datetime(2023, 4, 25, 9, 5, 51, 33000, tzinfo=datetime.timezone.utc)},), accept='.txt,.ftrs', description='Upload Deconvoluted Data', layout=Layout(width='auto')), Label(value='WT.ftrs'))
~#~#~#~#~#~#~#~#~#~# data_uploader.children[0] :
FileUpload(value=({'name': 'WT.ftrs', 'type': '', 'size': 11436032, 'content': <memory at 0x7fb2e18a9840>, 'last_modified': datetime.datetime(2023, 4, 25, 9, 5, 51, 33000, tzinfo=datetime.timezone.utc)},), acc

HTML(value='<html>\n    <head>\n    <meta name="viewport" content="width=device-width, initial-scale=1">\n    …

~#~#~#~#~#~#~#~#~#~# data_uploader :
HBox(children=(FileUpload(value=({'name': 'features_2023-04-25.txt', 'type': 'text/plain', 'size': 853143, 'content': <memory at 0x7fb2e18aa080>, 'last_modified': datetime.datetime(2023, 4, 25, 12, 5, 37, 427000, tzinfo=datetime.timezone.utc)},), accept='.txt,.ftrs', description='Upload Deconvoluted Data', layout=Layout(width='auto')), Label(value='features_2023-04-25.txt')))
~#~#~#~#~#~#~#~#~#~# data_uploader.children :
(FileUpload(value=({'name': 'features_2023-04-25.txt', 'type': 'text/plain', 'size': 853143, 'content': <memory at 0x7fb2e18aa080>, 'last_modified': datetime.datetime(2023, 4, 25, 12, 5, 37, 427000, tzinfo=datetime.timezone.utc)},), accept='.txt,.ftrs', description='Upload Deconvoluted Data', layout=Layout(width='auto')), Label(value='features_2023-04-25.txt'))
~#~#~#~#~#~#~#~#~#~# data_uploader.children[0] :
FileUpload(value=({'name': 'features_2023-04-25.txt', 'type': 'text/plain', 'size': 853143, 'content': <memory at 0x7fb2e18aa

KeyError: "['mwMonoisotopic', 'rt', 'rt_length', 'maxIntensity'] not in index"