===================================
 # PEAKS DETECTION #
===================================

Read the chromatogram, generate a peak table and identify the compounds using the NIST database. 

In [None]:
import os, sys
path_to_scr_folder=os.path.join(os.path.dirname(os.path.abspath('')), 'src')
sys.path.append(path_to_scr_folder)

In [None]:
import os
import ipywidgets as widgets
from IPython.display import display
from identification import sample_identification
import netCDF4 as nc
import h5py
from ipyfilechooser import FileChooser


class GCGCMSAnalysisUI:
    """
    Class that handles the GC*GC_MS Analysis user interface and processing.
    Provides a widget-based interface for configuring and running GCGCMS analysis.
    """
    
    def __init__(self):
        """Initialize the GCMS Analysis UI with default parameters and widgets."""
        # Environment setup
        self.docker_volume_path = os.getenv('DOCKER_VOLUME_PATH')
        self.host_volume_path = os.getenv('HOST_VOLUME_PATH')
        
        self.default_path_input = f'{self.host_volume_path}'
        self.default_path_output = f'{self.host_volume_path}output/'
        
        self.style = {'description_width': 'initial'}

        self.abs_threshold = "0"
        self.rel_threshold = "0.01"
        self.noise_factor = "1.5"
        self.min_persistence = "0.0002"
        
        # private
        self._min_distance = 1
        self._sigma_ratio = 1.6
        self._num_sigma = 10
        self._min_sigma = 1
        self._max_sigma = 30
        self._overlap = 0.5
        self._match_factor_min = 650
        self._cluster = True
        self._min_samples = 4
        self._eps = 3
        self.formated_spectra = True #??
        self._choosers = []
        

        self._vbox = widgets.VBox(layout=widgets.Layout(border='2px solid green'))
        

        self._create_widgets()
        self._setup_callbacks()
    
    def add_folderchooser(self, b):
        fc = FileChooser(path=self.docker_volume_path, select_dirs=True)
        self._choosers.append(fc)
        self._vbox.children = (*self._vbox.children, fc) 

    def add_filechooser(self, b):
        fc = FileChooser(path=self.docker_volume_path, select_dirs=False)
        self._choosers.append(fc)
        self._vbox.children = (*self._vbox.children, fc)
        
    def _create_widgets(self):
        """Create all UI widgets."""
        self.txt_title = widgets.HTML('<H1>Choisissez vos parametres:</H1>')
        
        # self.w_path = widgets.Text(value=self.default_path_input)
        self.path = widgets.HTML(value=f'<b>Path</b>')
        # self.path = self._bold_widget("Path", self.w_path)
        # self.path = FileChooser(self.docker_volume_path, select_dirs=True)
        self.add_button = widgets.Button(description="Ajouter un dossier")
        self.add_button.on_click(self.add_filechooser)

        self._vbox.children = (self.path, self.add_button)

        # self.w_path = [fc.selected_path for fc in self._choosers if fc.selected_path]

        




        # self.w_files = widgets.Text(placeholder="ex: file1.h5, file2.h5, file3.cdf",
        #                             description='.cdf / .h5',
        #                             value='A-F-028-817822-droite-ReCIVA.h5', #DEBUG
        #                             style=self.style)
        # self.files = self._bold_widget("Files", self.w_files)
        


        self.w_output_path = widgets.Text(value=self.default_path_output)
        self.output_path = self._bold_widget("Output path", self.w_output_path)
    
        label_method = widgets.HTML(value="<b>Method</b>")
        self.r_method = widgets.RadioButtons(
            options=['persistent_homology', 'peak_local_max', 'LoG', 'DoG', 'DoH'],
            value='persistent_homology',
            description=f'',
            disabled=False)
        self.w_method = widgets.VBox([label_method, self.r_method])
        
        label_mode = widgets.HTML(value="<b>Mode</b>")
        self.r_mode = widgets.RadioButtons(
            options=['tic', 'mass_per_mass', '3D'],
            value='tic',
            description='',
            disabled=False)
        self.w_mode = widgets.VBox([label_mode, self.r_mode])
        
       
        self.w_noise_factor = widgets.Text(value=self.noise_factor)
        self.noise_factor = self._bold_widget("Noise_factor", self.w_noise_factor)
        self.noise_factor_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>Noise scaling factor, used to filter detected peaks. \
    A peak is retained if its intensity in the  \
    chromatogram is greater than the maximum intensity in the chromatogram  \
    multiplied by the noise_factor. 
    The noise level (sigma) is estimated, then adjusted using the noise_factor to define when a signal \
    is considered significant despite the noise. The result is normalized relative to the maximum signal in \
    the chromatogram, so that this threshold (dynamic_threshold_fact) is proportional to the overall signal \
    amplitude.</div>""")
        
        
        self.w_min_persistence = widgets.Text(value=self.min_persistence)
        self.min_persistence = self._bold_widget("Minimum persistence", self.w_min_persistence)
        self.min_persistence_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>enter a min_persistence if method="persistent_homology".
    min_persistence is a floating-point value used to filter detected peaks based on their topological \
                                       persistence.
    It defines the minimum persistence threshold that a peak must exceed to be considered a true signal \
                                       rather than noise.
    A small min_persistence will detect many peaks, including weak ones — often noise.
    A large min_persistence will retain only the most prominent peaks, effectively filtering out noise, \
                                       but it may also miss subtle or low-intensity signals. Use \
                                       estimation_min_persistence.ipynb in order to estimate it.</div>""")
        
     
        self.w_abs_threshold = widgets.Text(value=self.abs_threshold, style=self.style)
        self.abs_threshold = self._bold_widget("Absolute threshold", self.w_abs_threshold)
        self.abs_threshold_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>Absolute threshold, used to filter detected peaks. \
        A peak is retained if its intensity in the chromatogram is greater than the absolute threshold.</div>""")


        self.w_rel_threshold = widgets.Text(value=self.rel_threshold, style=self.style)
        self.rel_threshold = self._bold_widget("Relative threshold", self.w_rel_threshold)
        self.rel_threshold_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>Relative threshold is a floating-point value used to filter \
                                              detected peaks based on their relative intensity.</div>""")
        
        self.nist = widgets.Checkbox(
            value=True,
            description='Matching with NIST',
            style=self.style,
            disabled=False)
        
        
        # Run button and output
        self.run_button = widgets.Button(description="Run")
        self.output = widgets.Output()
    
    def _setup_callbacks(self):
        """Set up callbacks for interactive widgets."""
        self.run_button.on_click(self._on_button_click)
        
    def _bold_widget(self, value, widget):
        """Create a widget with bold label."""
        bold = widgets.HTML(value=f'<b>{value}</b>')
        return widgets.HBox([bold, widget])
    
    
    # def get_files_from_folder(self, path):
    #     """Get all .h5 or .cdf files from a folder."""
    #     if os.path.isdir(path):
    #         return [f for f in os.listdir(path) if f.endswith(".h5") or f.endswith(".cdf")]
    #     else:
    #         return []

    def get_all_files_from_folders(self, paths_list):
        """Get all .h5 or .cdf files from a list of folders."""
        extensions = ['.cdf', '.h5'] 
        all_files = []
        for base_path in paths_list:
            for root, _, files in os.walk(base_path):
                for file in files:
                    if extensions is None or os.path.splitext(file)[1].lower() in extensions:
                        all_files.append(os.path.join(root, file))
        print("CHECK", all_files)
        return all_files
            
        
    def get_scan_number(self, file_path):
        if file_path.endswith(".h5"):
            with h5py.File(file_path, 'r') as f:
              scan_number = f.attrs['scan_number_size']
        elif file_path.endswith(".cdf"):
            with nc.Dataset(file_path, 'r') as dt:
                scan_number = dt.dimensions['scan_number'].size
        else:
            raise ValueError("Unsupported file format. Please provide a .h5 or .cdf file.")
        return scan_number
    
    def get_mod_time(self, file_path):
        """Get modulation time based on scan_number from .h5/.cdf file."""
        scan_number = self.get_scan_number(file_path)
        if scan_number == 328125:
            mod_time = 1.25
            print("type de donnees: G0/plasma")
        elif scan_number == 540035:
            mod_time = 1.7
            print("type de donnnees: air expire")
        else:
            print("scan_number non reconnu")
        return mod_time
    
    
    def analyse(self, paths_list, output_path, user_output_path, method, mode, noise_factor,
                min_persistence, abs_threshold, rel_threshold, cluster, min_distance,
                min_sigma, max_sigma, sigma_ratio, num_sigma, formated_spectra, match_factor_min,
                overlap, eps, min_samples, nist):
        """Run the analysis on the specified files."""
        if not paths_list:
            print("Erreur : Aucun chemin sélectionné.")
            return

        if not os.path.exists(output_path):
            os.makedirs(output_path)
            print(f"Created output directory: {user_output_path}")

        all_files = self.get_all_files_from_folders(paths_list)
        print(f"Fichiers à analyser : {[os.path.basename(f) for f in all_files]}")

        for full_path in all_files:
            if not os.path.isfile(full_path):
                print(f"Erreur : Le fichier est introuvable : '{full_path}'")
                continue
            if not os.access(full_path, os.R_OK):
                print(f"Erreur: Permission refusée pour accéder à '{full_path}'")
                continue

            path = os.path.dirname(full_path)
            file = os.path.basename(full_path)
            print(f"Analyzing {file} in path {path}...")
        
        # if files_list is None:
        #     files_list = self.get_files_from_folder(path)
        # files_list = [file.strip() for file in files_list if file.strip()]
        # print(f"Fichiers à analyser : {files_list}")
      
        # for file in files_list:
        #     full_path = os.path.join(path, file)
        #     if not os.path.isfile(full_path):
        #         print(f"Erreur : Le fichier '{file}' est introuvable dans '{path}'")
        #         return
        #     if not os.access(full_path, os.R_OK):
        #         print(f"Erreur: Permission refusée pour accéder à '{file}' dans '{path}'")
        #         return
         
            mod_time = self.get_mod_time(full_path)
            print(f"Analyzing {file} with modulation time = {mod_time} secondes...\n")

            result = sample_identification(
                path, file,
                output_path,
                mod_time,
                method,
                mode,
                noise_factor,
                abs_threshold,
                rel_threshold,
                cluster,
                min_distance,
                min_sigma,
                max_sigma,
                sigma_ratio, 
                num_sigma,
                formated_spectra,
                match_factor_min,
                min_persistence,
                overlap,
                eps,
                min_samples,
                nist
                )
            print("Analyse terminée:", result)
        print("Tous les fichiers ont été analysés avec succès.")
    
    def _on_button_click(self, b):
        """Handle button click event to start analysis."""
     

        with self.output:
            self.output.clear_output()
            # print("path:", self.w_path.value[0])

            # paths_list = []
            # for fc in self._choosers:
            #     if fc.selected_path:
            #         path = fc.selected_path
            #         paths_list.append(path)
           
        selected_files = []

        for fc in self._choosers:
            selected = fc.selected_path
            if not selected:
                continue

            # Si c'est un fichier .cdf ou .h5
            if os.path.isfile(selected) and (selected.endswith(".cdf") or selected.endswith(".h5")):
                selected_files.append(selected)

            # Si c'est un dossier, on ajoute tous les .cdf ou .h5 qu'il contient
            elif os.path.isdir(selected):
                for root, _, files in os.walk(selected):
                    for f in files:
                        if f.endswith(".cdf") or f.endswith(".h5"):
                            selected_files.append(os.path.join(root, f))

        if not selected_files:
            print("Aucun fichier .cdf ou .h5 sélectionné.")
            return

        # Si tu veux afficher les fichiers trouvés :
        print(f"{len(selected_files)} fichiers détectés :")
        for f in selected_files:
            print(f"- {f}")


            # if not paths:
            #     print("Aucun dossier sélectionné.")
            #     return

            # input_path = paths[0]  # ou boucle sur tous les paths si besoin
            # print("path:", input_path)

            # user_input_path = self.w_path.value
            # path_for_docker = user_input_path.replace(self.host_volume_path, self.docker_volume_path, 1)

            user_output_path = self.w_output_path.value
            output_path_for_docker = user_output_path.replace(self.host_volume_path, self.docker_volume_path, 1)
           
            # files_list = self.w_files.value.split(",")
           
            # if files_list == ['']:
            #     files_list = None

            self.analyse(files_list=selected_files, output_path_for_docker, user_output_path, #TODO ici recup 1seul path
                    self.r_method.value, self.r_mode.value, float(self.w_noise_factor.value), 
                    float(self.w_min_persistence.value),
                    float(self.w_abs_threshold.value), float(self.w_rel_threshold.value), self._cluster,
                    self._min_distance,
                    self._min_sigma, self._max_sigma, self._sigma_ratio, self._num_sigma, self.formated_spectra,
                    self._match_factor_min, self._overlap, self._eps, self._min_samples,
                    self.nist.value)
    
    def display(self):
        """Display the UI."""
        display(self.txt_title,self._vbox,  self.output_path, self.w_method, self.w_mode,
                self.nist,
                self.noise_factor, self.noise_factor_def, self.min_persistence, self.min_persistence_def,
                self.abs_threshold, self.abs_threshold_def,
                self.rel_threshold, self.rel_threshold_def,
                # self._vbox,
                self.run_button, self.output)




In [None]:
gcms_ui = GCGCMSAnalysisUI()
gcms_ui.display()