===================================
 # PEAKS DETECTION #
===================================

Read the chromatogram, generate a peak table and identify the compounds using the NIST database. 

In [2]:
# code 
import os
import ipywidgets as widgets
from IPython.display import display
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Process
from identification import sample_identification

# ex : G0-1-120123.cdf

# get the variable from .env.
docker_volume_path = os.getenv('DOCKER_VOLUME_PATH')
host_volume_path = os.getenv('HOST_VOLUME_PATH')


def parse_abs_thresholds(change):
    try:
        values = [float(x.strip()) for x in change['new'].split(",")]
        print("Valeurs converties :", values)
    except ValueError:
        print("Erreur : Veuillez entrer des nombres séparés par des virgules.")

def update_ABS_THRESHOLDS(change):
    if change.new == 'tic':
        ABS_THRESHOLDS.disabled = True
        cluster.disabled = True
    else:
        ABS_THRESHOLDS.disabled = False
        cluster.disabled = False

def bold_widget(value, widget):
    bold = widgets.HTML(value=f'<b>{value}</b>')
    return widgets.HBox([bold, widget])


style = {'description_width': 'initial'}
vbox = widgets.VBox(layout=widgets.Layout (border='2px solid green'))
default_path_input = f'{docker_volume_path}/input/'
default_path_output = f'{docker_volume_path}/output/'


txt = widgets.HTML('<H1>Choisissez vos parametres:</H1>')

path = widgets.Text(value=default_path_input)
pbox = bold_widget("Path", path)

files = widgets.Text(placeholder="ex: file1.cdf, file2.cdf",)
fbox = bold_widget("Files", files)


mod_time = widgets.FloatSlider(value=1.25, min=1, max=8, step=0.01,
                               description="<b>Modulation time:</b>",
                               style=style)
OUTPUT_PATH = widgets.Text(value=default_path_output, description="<b>Output path</b>",
                           style=style)



method = widgets.RadioButtons(
    options=['persistent_homology', 'peak_local_max', 'LoG', 'DoG', 'DoH'],
    value='persistent_homology',
    description='<b>Method</b>',
    disabled=False)
mode = widgets.RadioButtons(
    options=['tic', 'mass_per_mass', '3D'],
    value='tic',
    description='<b>Mode</b>',
    disabled=False)
seuil = widgets.IntSlider(value=5, min=0, max=10,
                            description="<b>Seuil</b>")

hit_prob_min = widgets.IntSlider(value=15, min=0, max=30,
                                   description="<b>Minimum hit probability</b> [a modifier]",
                                   style=style)

txt_abs_threshold_cluster = widgets.HTML("If mass_per_mass or 3D mode:") #TODO
# ABS_THRESHOLDS = widgets.Text(value=None, description="<b>Seuils absolus</b> [à modifier]",
#                               disabled=True, 
#                               layout=widgets.Layout(margin='0 0 0 50px'),
#                               style=style)
ABS_THRESHOLDS = widgets.Text(
    value=None,
    disabled=True,
    description="Seuils absolus(liste)",
    style=style
)
ABS_THRESHOLDS.observe(parse_abs_thresholds, names='value')
cluster = widgets.Checkbox(
    value=False,
    description='<b>cluster</b>',
    disabled=True,)

mode.observe(update_ABS_THRESHOLDS, names='value')

txt_plm = widgets.HTML("Peak local max parameter: ")
txt1 = "The minimal allowed distance separating peaks."
txt2 = "To find the maximum number of peaks, use min_distance=1."
txt_plm_description = widgets.HTML(f"({txt1} {txt2})")
min_distance = widgets.IntSlider(value=1, min=0, max=30, step=1, 
                                 description="<b>Minimal distance</b> [a modifier]",
                                 style=style)

txt_dog = widgets.HTML("DoG parameter: ")
txt3 = "The ratio between the standard deviation of Gaussian Kernels used for"\
        " computing the Difference of Gaussians."
txt_dog_description = widgets.HTML(txt3)
sigma_ratio = widgets.FloatSlider(value=1.6, min=0, max=2, step=0.01,
                                  description="<b>sigma ratio:</b>",
                                  style=style)

txt_log_doh = widgets.HTML("LoG and DoG parameter")
txt4 = "(The number of intermediate values of standard deviations to consider"\
    " between min_sigma (1) and max_sigma (30))"
txt_log_doh_description = widgets.HTML(txt4)
num_sigma = widgets.IntSlider(value=10, min=1, max=30,
                              description="<b>sigma:</b>")

formated_spectra = widgets.Checkbox(
    value=True,
    description='<b>formated spectra</b>',
    disabled=False,
)

match_factor_min = widgets.IntSlider(value=700, min=0, max=1000, step=1,
                                     description="<b>Match factor min</b> [a modifier]")



def get_files_from_folder(path):
    if os.path.isdir(path):
        return [f for f in os.listdir(path) if f.endswith(".cdf")]
    else:
        return []

import multiprocessing
stop_event = multiprocessing.Event()

def analyse(path, files_list, output_path, mod_time, method, mode, seuil,
            hit_prob_min, ABS_THRESHOLDS, cluster, min_distance,
            sigma_ratio, num_sigma, formated_spectra, match_factor_min):
        if not path:
            print("Erreur : Aucun chemin sélectionné.")
            return
        if files_list is None:
            files_list = get_files_from_folder(path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
            print(f"Created output directory: {output_path}")
        
        files_list = [file.strip() for file in files_list if file.strip()]
        print(f"Fichiers à analyser : {files_list}")
      
        with ProcessPoolExecutor(max_workers=4) as executor:
            futures = []
            for file in files_list:
                full_path = os.path.join(path, file)
                if not os.path.isfile(full_path):
                    print(f"Erreur : Le fichier '{file}' est introuvable dans '{path}'")
                    return
                if not os.access(full_path, os.R_OK):
                    print(f"Erreur: Permission refusée pour accéder à '{file}' dans \
                        '{path}'")
                    return

                print(f"Analyzing with path={full_path}, mod_time={mod_time}, cluster={cluster}\n")
                futures = [executor.submit(sample_identification, path, file, output_path,
                                                mod_time, method, mode, seuil,
                                                hit_prob_min, ABS_THRESHOLDS,
                                                cluster, min_distance, sigma_ratio,
                                                num_sigma, formated_spectra,
                                                match_factor_min)]
            for future in futures:
                result = future.result()
                print("Terminé avec résultat :", result)

run_button = widgets.Button(description="Run")
# stop_button = widgets.Button(description="Stop Tasks")

output = widgets.Output()

def on_button_click(b):
    with output:
        output.clear_output()
        files_list = files.value.split(",")
        if files_list == ['']:
            files_list = None
        analyse(path.value, files_list, OUTPUT_PATH.value, mod_time.value,
                method.value, mode.value, seuil.value, hit_prob_min.value,
                ABS_THRESHOLDS.value, cluster.value, min_distance.value,
                sigma_ratio.value, num_sigma.value, formated_spectra.value,
                match_factor_min.value)
        
run_button.on_click(on_button_click)

display(txt, pbox, fbox, OUTPUT_PATH, mod_time, method, mode, seuil,
        hit_prob_min, formated_spectra, match_factor_min,
        vbox, txt_abs_threshold_cluster, ABS_THRESHOLDS, cluster, vbox,
        txt_plm, txt_plm_description, min_distance, vbox, txt_dog,
        txt_dog_description, sigma_ratio, vbox, txt_log_doh,
        txt_log_doh_description, num_sigma, vbox,
        run_button, output)


HTML(value='<H1>Choisissez vos parametres:</H1>')

HBox(children=(HTML(value='<b>Path</b>'), Text(value='/app/data/input/')))

HBox(children=(HTML(value='<b>Files</b>'), Text(value='', placeholder='ex: file1.cdf, file2.cdf')))

Text(value='/app/data/output/', description='<b>Output path</b>', style=TextStyle(description_width='initial')…

FloatSlider(value=1.25, description='<b>Modulation time:</b>', max=8.0, min=1.0, step=0.01, style=SliderStyle(…

RadioButtons(description='<b>Method</b>', options=('persistent_homology', 'peak_local_max', 'LoG', 'DoG', 'DoH…

RadioButtons(description='<b>Mode</b>', options=('tic', 'mass_per_mass', '3D'), value='tic')

IntSlider(value=5, description='<b>Seuil</b>', max=10)

IntSlider(value=15, description='<b>Minimum hit probability</b> [a modifier]', max=30, style=SliderStyle(descr…

Checkbox(value=True, description='<b>formated spectra</b>')

IntSlider(value=700, description='<b>Match factor min</b> [a modifier]', max=1000)

VBox(layout=Layout(border_bottom='2px solid green', border_left='2px solid green', border_right='2px solid gre…

HTML(value='If mass_per_mass or 3D mode:')

Text(value='', description='Seuils absolus(liste)', disabled=True, style=TextStyle(description_width='initial'…

Checkbox(value=False, description='<b>cluster</b>', disabled=True)

VBox(layout=Layout(border_bottom='2px solid green', border_left='2px solid green', border_right='2px solid gre…

HTML(value='Peak local max parameter: ')

HTML(value='(The minimal allowed distance separating peaks. To find the maximum number of peaks, use min_dista…

IntSlider(value=1, description='<b>Minimal distance</b> [a modifier]', max=30, style=SliderStyle(description_w…

VBox(layout=Layout(border_bottom='2px solid green', border_left='2px solid green', border_right='2px solid gre…

HTML(value='DoG parameter: ')

HTML(value='The ratio between the standard deviation of Gaussian Kernels used for computing the Difference of …

FloatSlider(value=1.6, description='<b>sigma ratio:</b>', max=2.0, step=0.01, style=SliderStyle(description_wi…

VBox(layout=Layout(border_bottom='2px solid green', border_left='2px solid green', border_right='2px solid gre…

HTML(value='LoG and DoG parameter')

HTML(value='(The number of intermediate values of standard deviations to consider between min_sigma (1) and ma…

IntSlider(value=10, description='<b>sigma:</b>', max=30, min=1)

VBox(layout=Layout(border_bottom='2px solid green', border_left='2px solid green', border_right='2px solid gre…

Button(description='Run', style=ButtonStyle())

Output()