===================================
 # PEAKS DETECTION #
===================================

Read the chromatogram, generate a peak table and identify the compounds using the NIST database. 

In [None]:
# code 
import os
import ipywidgets as widgets
from IPython.display import display
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Process
from identification import sample_identification
import netCDF4 as nc

# ex : G0-1-120123.cdf

# get the variable from .env.
docker_volume_path = os.getenv('DOCKER_VOLUME_PATH')
host_volume_path = os.getenv('HOST_VOLUME_PATH')


def parse_abs_thresholds(change):
    try:
        values = [float(x.strip()) for x in change['new'].split(",")]
        print("Valeurs converties :", values)
    except ValueError:
        print("Erreur : Veuillez entrer des nombres séparés par des virgules.")

def update_ABS_THRESHOLDS(change):
    if change.new == 'tic':
        ABS_THRESHOLDS.disabled = True
        cluster.disabled = True
    else:
        ABS_THRESHOLDS.disabled = False
        cluster.disabled = False

def bold_widget(value, widget):
    bold = widgets.HTML(value=f'<b>{value}</b>')
    return widgets.HBox([bold, widget])


style = {'description_width': 'initial'}
vbox = widgets.VBox(layout=widgets.Layout (border='2px solid green'))
default_path_input = f'{docker_volume_path}/input/'
default_path_output = f'{docker_volume_path}/output/'


txt = widgets.HTML('<H1>Choisissez vos parametres:</H1>')

w_path = widgets.Text(value=default_path_input)
path = bold_widget("Path", w_path)

w_files = widgets.Text(placeholder="ex: file1.cdf, file2.cdf",)
files = bold_widget("Files", w_files)

w_output_path = widgets.Text(value=default_path_output, style=style)
output_path = bold_widget("Output path", w_output_path)
w_filtering_factor = widgets.FloatSlider(value=0.1, min=0, max=1, step=0.1)

filtering_factor = bold_widget("Filtering factor", w_filtering_factor)
filtering_factor_def = widgets.HTML(value="""
                                    <div style="margin-left: 50px;">
                                    <p><i>Filtering factor is a floating-point value between 0 and 1  \
used to filter detected peaks. A peak is retained if its intensity in the  \
chromatogram is greater than the maximum intensity in the chromatogram  \
multiplied by the filtering factor. 
The noise level (sigma) is estimated, then adjusted using the filtering factor to define when a signal \
is considered significant despite the noise. The result is normalized relative to the maximum signal in \
the chromatogram, so that this threshold (dynamic_threshold_fact) is proportional to the overall signal \
amplitude.</div>""")

w_min_persistence = widgets.Text(value="0.02")
min_persistence = bold_widget("Minimum persistence", w_min_persistence)
min_persistence_def = widgets.HTML(value="""
                                    <div style="margin-left: 50px;">
                                    <p><i>enter a min_persistence if method="persistent_homology".
min_persistence is a floating-point value used to filter detected peaks based on their topological \
                                   persistence.
It defines the minimum persistence threshold that a peak must exceed to be considered a true signal \
                                   rather than noise.
A small min_persistence will detect many peaks, including weak ones — often noise.
A large min_persistence will retain only the most prominent peaks, effectively filtering out noise, \
                                   but it may also miss subtle or low-intensity signals. Use \
                                   estimation_min_persistence.ipynb in order to estimate it.</div>""")


method = widgets.RadioButtons(
    options=['persistent_homology', 'peak_local_max', 'LoG', 'DoG', 'DoH'],
    value='peak_local_max',
    description='<b>Method</b>',
    disabled=False)

mode = widgets.RadioButtons(
    options=['tic', 'mass_per_mass', '3D'],
    value='tic',
    description='<b>Mode</b>',
    disabled=False)



hit_prob_min = widgets.IntSlider(value=15, min=0, max=30,
                                   description="<b>Minimum hit probability</b> [a modifier]",
                                   style=style)

txt_abs_threshold_cluster = widgets.HTML("If mass_per_mass or 3D mode:") #TODO
# ABS_THRESHOLDS = widgets.Text(value=None, description="<b>Seuils absolus</b> [à modifier]",
#                               disabled=True, 
#                               layout=widgets.Layout(margin='0 0 0 50px'),
#                               style=style)
ABS_THRESHOLDS = widgets.Text(
    value=None,
    disabled=True,
    description="Seuils absolus(liste)",
    style=style
)
ABS_THRESHOLDS.observe(parse_abs_thresholds, names='value')
cluster = widgets.Checkbox(
    value=False,
    description='<b>cluster</b>',
    disabled=True,)

mode.observe(update_ABS_THRESHOLDS, names='value')

txt_plm = widgets.HTML("Peak local max parameter: ")
txt1 = "The minimal allowed distance separating peaks."
txt2 = "To find the maximum number of peaks, use min_distance=1."
txt_plm_description = widgets.HTML(f"({txt1} {txt2})")
min_distance = widgets.IntSlider(value=1, min=0, max=30, step=1, 
                                 description="<b>Minimal distance</b> [a modifier]",
                                 style=style)

txt_dog = widgets.HTML("DoG parameter: ")
txt3 = "The ratio between the standard deviation of Gaussian Kernels used for"\
        " computing the Difference of Gaussians."
txt_dog_description = widgets.HTML(txt3)
sigma_ratio = widgets.FloatSlider(value=1.6, min=0, max=2, step=0.01,
                                  description="<b>sigma ratio:</b>",
                                  style=style)

txt_log_doh = widgets.HTML("LoG and DoG parameter")
txt4 = "(The number of intermediate values of standard deviations to consider"\
    " between min_sigma (1) and max_sigma (30))"
txt_log_doh_description = widgets.HTML(txt4)
num_sigma = widgets.IntSlider(value=10, min=1, max=30,
                              description="<b>sigma:</b>")

formated_spectra = widgets.Checkbox(
    value=True,
    description='<b>formated spectra</b>',
    disabled=False,
)

match_factor_min = widgets.IntSlider(value=700, min=0, max=1000, step=1,
                                     description="<b>Match factor min</b> [a modifier]")


def get_files_from_folder(path):
    if os.path.isdir(path):
        return [f for f in os.listdir(path) if f.endswith(".cdf")]
    else:
        return []
    
def get_mod_time(file_path):
    data = nc.Dataset(file_path, 'r')
    scan_number = data.dimensions['scan_number'].size
    if scan_number == 328125:   
        mod_time = 1.25
        print("type de donnees: G0/plasma")
    elif scan_number == 540035:
        mod_time = 1.7
        print("type de donnnees: air expire")
    else:
        print("scan_number non reconnu")
    return mod_time


def analyse(path, files_list, output_path, method, mode, filtering_factor,
            hit_prob_min, ABS_THRESHOLDS, cluster, min_distance,
            sigma_ratio, num_sigma, formated_spectra, match_factor_min):
        if not path:
            print("Erreur : Aucun chemin sélectionné.")
            return
        if files_list is None:
            files_list = get_files_from_folder(path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
            print(f"Created output directory: {output_path}")
        
        files_list = [file.strip() for file in files_list if file.strip()]
        print(f"Fichiers à analyser : {files_list}")
      
        # with ProcessPoolExecutor(max_workers=6) as executor:
        # TODO nb demax_worker a definir en fonction du nombre de fichiers envoyes , 
        # si 10 fichiers, max_workers=10 pour lancer 10 prcesses ms attention a la 
        # charge CPU et memoire. si on ne specifie pas max_workers, il va prendre le nb de
        # coeurs de la machine, ce qui n'est pas forcement le nb de fichiers a traiter
        # definir le nb de workers du serveur
        # max_workers = min(os.cpu_count(), len(files_list))
        # with ProcessPoolExecutor(max_workers=max_workers) as executor:
        #     futures = []
        #     for file in files_list:
        #         full_path = os.path.join(path, file)
        #         if not os.path.isfile(full_path):
        #             print(f"Erreur : Le fichier '{file}' est introuvable dans '{path}'")
        #             return
        #         if not os.access(full_path, os.R_OK):
        #             print(f"Erreur: Permission refusée pour accéder à '{file}' dans \
        #                 '{path}'")
        #             return
        #         print(f"Analyzing with path={full_path}\n")
                
        #         futures.append(
        #             [executor.submit(sample_identification, path, file, output_path,
        #                              mod_time, method, mode, seuil, hit_prob_min, 
        #                              ABS_THRESHOLDS, cluster, min_distance, sigma_ratio, 
        #                              num_sigma, formated_spectra, match_factor_min)]
        #         )
        #     for future in futures:
        #         result = future.result()
        #         print("Analyse terminée:", result)
        #     print("Tous les fichiers ont été analysés avec succès.")


        for file in files_list:
            full_path = os.path.join(path, file)
            if not os.path.isfile(full_path):
                print(f"Erreur : Le fichier '{file}' est introuvable dans '{path}'")
                return
            if not os.access(full_path, os.R_OK):
                print(f"Erreur: Permission refusée pour accéder à '{file}' dans \
                        '{path}'")
                return
            
            mod_time = get_mod_time(full_path)
            
            print(f"Analyzing {file} with modulation time = {mod_time} secondes...\n")
            result = sample_identification(path, file, output_path, mod_time, method, mode, filtering_factor, 
                                  hit_prob_min, ABS_THRESHOLDS, cluster, min_distance, sigma_ratio, 
                                  num_sigma, formated_spectra, match_factor_min)
            print("Analyse terminée:", result)
        print("Tous les fichiers ont été analysés avec succès.")

run_button = widgets.Button(description="Run")

output = widgets.Output()

def on_button_click(b):
    with output:
        output.clear_output()
        files_list = w_files.value.split(",")
        if files_list == ['']:
            files_list = None
        analyse(w_path.value, files_list, w_output_path.value,
                method.value, mode.value, w_filtering_factor.value, w_min_persistence.value, hit_prob_min.value,
                ABS_THRESHOLDS.value, cluster.value, min_distance.value,
                sigma_ratio.value, num_sigma.value, formated_spectra.value,
                match_factor_min.value)
        
run_button.on_click(on_button_click)

display(txt, path, files, output_path, filtering_factor, filtering_factor_def, min_persistence, method, mode,
        hit_prob_min, formated_spectra, match_factor_min,
        vbox, txt_abs_threshold_cluster, ABS_THRESHOLDS, cluster, vbox,
        txt_plm, txt_plm_description, min_distance, vbox, txt_dog,
        txt_dog_description, sigma_ratio, vbox, txt_log_doh,
        txt_log_doh_description, num_sigma, vbox,
        run_button, output)
