===================================
 # PEAKS DETECTION #
===================================

Read the chromatogram, generate a peak table and identify the compounds using the NIST database. 

In [None]:
import os, sys
path_to_scr_folder=os.path.join(os.path.dirname(os.path.abspath('')), 'src')
sys.path.append(path_to_scr_folder)

In [None]:
import os
import ipywidgets as widgets
from IPython.display import display
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Process
from identification import sample_identification
import netCDF4 as nc



class GCGCMSAnalysisUI:
    """
    Class that handles the GC*GC_MS Analysis user interface and processing.
    Provides a widget-based interface for configuring and running GCGCMS analysis.
    """
    
    def __init__(self):
        """Initialize the GCMS Analysis UI with default parameters and widgets."""
        # Environment setup
        self.docker_volume_path = os.getenv('DOCKER_VOLUME_PATH')
        self.host_volume_path = os.getenv('HOST_VOLUME_PATH')
        
        self.default_path_input = f'{self.host_volume_path}'
        self.default_path_output = f'{self.host_volume_path}/output/'

        self.abs_threshold = "0"
        self.rel_threshold = "0.01"
        self.noise_factor = "1.5"
        self.min_persistence = "0.02"
        self.style = {'description_width': 'initial'}
        
        # private
        self._min_distance = 1
        self._sigma_ratio = 1.6
        self._num_sigma = 10
        self._min_sigma = 1
        self._max_sigma = 30
        self._overlap = 0.5
        self._match_factor_min = 650
        self._vbox = widgets.VBox(layout=widgets.Layout(border='2px solid green'))

        self._create_widgets()
        self._setup_callbacks()
        
        
    def _create_widgets(self):
        """Create all UI widgets."""
        # Title
        self.txt_title = widgets.HTML('<H1>Choisissez vos parametres:</H1>')
        
        # Path inputs
        self.w_path = widgets.Text(value=self.default_path_input)
        self.path = self._bold_widget("Path", self.w_path)
        
        self.w_files = widgets.Text(placeholder="ex: file1.cdf, file2.cdf")
        self.files = self._bold_widget("Files", self.w_files)
        
        self.w_output_path = widgets.Text(value=self.default_path_output)
        self.output_path = self._bold_widget("Output path", self.w_output_path)
        
    
        self.w_method = widgets.RadioButtons(
            options=['persistent_homology', 'peak_local_max', 'LoG', 'DoG', 'DoH'],
            value='peak_local_max',
            description='<b>Method</b>',
            disabled=False)
        # self.method = self._bold_widget("Method", self.w_method)
        
        self.w_mode = widgets.RadioButtons(
            options=['tic', 'mass_per_mass', '3D'],
            value='tic',
            description='<b>Mode</b>',
            disabled=False)
        # self.mode = self._bold_widget("Mode", self.w_mode)
        
       
        self.w_noise_factor = widgets.Text(value=self.noise_factor)
        self.noise_factor = self._bold_widget("Noise_factor", self.w_noise_factor)
        self.noise_factor_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>Noise scaling factor, used to filter detected peaks. \
    A peak is retained if its intensity in the  \
    chromatogram is greater than the maximum intensity in the chromatogram  \
    multiplied by the noise_factor. 
    The noise level (sigma) is estimated, then adjusted using the noise_factor to define when a signal \
    is considered significant despite the noise. The result is normalized relative to the maximum signal in \
    the chromatogram, so that this threshold (dynamic_threshold_fact) is proportional to the overall signal \
    amplitude.</div>""")
        
        
        self.w_min_persistence = widgets.Text(value=self.min_persistence)
        self.min_persistence = self._bold_widget("Minimum persistence", self.w_min_persistence)
        self.min_persistence_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>enter a min_persistence if method="persistent_homology".
    min_persistence is a floating-point value used to filter detected peaks based on their topological \
                                       persistence.
    It defines the minimum persistence threshold that a peak must exceed to be considered a true signal \
                                       rather than noise.
    A small min_persistence will detect many peaks, including weak ones — often noise.
    A large min_persistence will retain only the most prominent peaks, effectively filtering out noise, \
                                       but it may also miss subtle or low-intensity signals. Use \
                                       estimation_min_persistence.ipynb in order to estimate it.</div>""")
        
     
        self.hit_prob_min = widgets.IntSlider(
            value=15, min=0, max=30,
            description="<b>Minimum hit probability</b> [a modifier]",
            style=self.style)
        
        
        # self.txt_abs_threshold_cluster = widgets.HTML("For mass_per_mass or 3D mode:")
       
        self.w_abs_threshold = widgets.Text(value=self.abs_threshold, style=self.style)
        self.abs_threshold = self._bold_widget("Absolute threshold", self.w_abs_threshold)
        self.abs_threshold_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>Absolute threshold, used to filter detected peaks. \
        A peak is retained if its intensity in the chromatogram is greater than the absolute threshold.</div>""")


        self.w_rel_threshold = widgets.Text(value=self.rel_threshold, style=self.style)
        self.rel_threshold = self._bold_widget("Relative threshold", self.w_rel_threshold)
        self.rel_threshold_def = widgets.HTML(value="""
                                        <div style="margin-left: 50px;">
                                        <p><i>Relative threshold is a floating-point value used to filter \
                                              detected peaks based on their relative intensity.</div>""")
        
        # Cluster checkbox
        self.cluster = widgets.Checkbox(
            value=False,
            description='<b>cluster</b>',
            disabled=True)
        
        # self.txt_plm = widgets.HTML("Peak local max parameter: ")
        # txt1 = "The minimal allowed distance separating peaks."
        # txt2 = "To find the maximum number of peaks, use min_distance=1."
        # self.txt_plm_description = widgets.HTML(f"({txt1} {txt2})")
        # self.min_distance = widgets.IntSlider(
        #     value=1, min=0, max=30, step=1, 
        #     description="<b>Minimal distance</b> [a modifier]",
        #     style=self.style)
        
        
        # self.txt_dog = widgets.HTML("DoG parameter: ")
        # txt3 = "The ratio between the standard deviation of Gaussian Kernels used for"\
        #         " computing the Difference of Gaussians."
        # self.txt_dog_description = widgets.HTML(txt3)
        # self.sigma_ratio = widgets.FloatSlider(
        #     value=1.6, min=0, max=2, step=0.01,
        #     description="<b>sigma ratio:</b>",
        #     style=self.style)
        
        # # LoG and DoH parameters
        # self.txt_log_doh = widgets.HTML("LoG and DoG parameter")
        # txt4 = "(The number of intermediate values of standard deviations to consider"\
        #     " between min_sigma (1) and max_sigma (30))"
        # self.txt_log_doh_description = widgets.HTML(txt4)
        # self.num_sigma = widgets.IntSlider(
        #     value=10, min=1, max=30,
        #     description="<b>sigma:</b>")
        
        # Formatted spectra option
        self.formated_spectra = widgets.Checkbox(
            value=True,
            description='<b>formated spectra</b>',
            disabled=False)
        
        # # Match factor
        # self.match_factor_min = widgets.IntSlider(
        #     value=700, min=0, max=1000, step=1,
        #     description="<b>Match factor min</b> [a modifier]")
        
        # Run button and output
        self.run_button = widgets.Button(description="Run")
        self.output = widgets.Output()
    
    def _setup_callbacks(self):
        """Set up callbacks for interactive widgets."""
        # self.w_mode.observe(self._update_abs_threshold, names='value')
        # self.abs_threshold.observe(self._parse_abs_thresholds, names='value')
        self.run_button.on_click(self._on_button_click)
        
    def _bold_widget(self, value, widget):
        """Create a widget with bold label."""
        bold = widgets.HTML(value=f'<b>{value}</b>')
        return widgets.HBox([bold, widget])
    
    # def _parse_abs_thresholds(self, change):
    #     """Parse the absolute thresholds entered as a comma-separated list."""
    #     try:
    #         values = [float(x.strip()) for x in change['new'].split(",")]
    #         print("Valeurs converties :", values)
    #     except ValueError:
    #         print("Erreur : Veuillez entrer des nombres séparés par des virgules.")
    
    # def _update_abs_threshold(self, change):
    #     """Enable or disable ABS_THRESHOLDS and cluster widgets based on selected mode."""
    #     if change.new == 'tic':
    #         self.abs_threshold.disabled = True
    #         self.cluster.disabled = True
    #     else:
    #         self.abs_threshold.disabled = False
    #         self.cluster.disabled = False
    
    def get_files_from_folder(self, path):
        """Get all CDF files from a folder."""
        if os.path.isdir(path):
            return [f for f in os.listdir(path) if f.endswith(".cdf")]
        else:
            return []
    
    def get_mod_time(self, file_path):
        """Get modulation time based on scan_number from CDF file."""
        data = nc.Dataset(file_path, 'r')
        scan_number = data.dimensions['scan_number'].size
        if scan_number == 328125:   
            mod_time = 1.25
            print("type de donnees: G0/plasma")
        elif scan_number == 540035:
            mod_time = 1.7
            print("type de donnnees: air expire")
        else:
            print("scan_number non reconnu")
        return mod_time
    
        
    
    def analyse(self, path, files_list, output_path, user_output_path, method, mode, noise_factor,
                min_persistence, hit_prob_min, abs_threshold, rel_threshold, cluster, min_distance,
                min_sigma, max_sigma, sigma_ratio, num_sigma, formated_spectra, match_factor_min):
        """Run the analysis on the specified files."""
        if not path:
            print("Erreur : Aucun chemin sélectionné.")
            return
        
        if files_list is None:
            files_list = self.get_files_from_folder(path)

        if not os.path.exists(output_path):
            os.makedirs(output_path)
            print(f"Created output directory: {user_output_path}")
        
        files_list = [file.strip() for file in files_list if file.strip()]
        print(f"Fichiers à analyser : {files_list}")
      
        for file in files_list:
            full_path = os.path.join(path, file)
            if not os.path.isfile(full_path):
                print(f"Erreur : Le fichier '{file}' est introuvable dans '{path}'")
                return
            if not os.access(full_path, os.R_OK):
                print(f"Erreur: Permission refusée pour accéder à '{file}' dans '{path}'")
                return
            
            mod_time = self.get_mod_time(full_path)
            
            print(f"Analyzing {file} with modulation time = {mod_time} secondes...\n")
            result = sample_identification(
                path,
                file,
                output_path,
                mod_time,
                method,
                mode,
                noise_factor,
                hit_prob_min,
                abs_threshold,
                rel_threshold,
                cluster,
                min_distance,
                min_sigma,
                max_sigma,
                sigma_ratio, 
                num_sigma,
                formated_spectra,
                match_factor_min,
                min_persistence
                )
            print("Analyse terminée:", result)
        print("Tous les fichiers ont été analysés avec succès.")
    
    def _on_button_click(self, b):
        """Handle button click event to start analysis."""
        with self.output:
            self.output.clear_output()

            user_input_path = self.w_path.value
            path_for_docker = user_input_path.replace(self.host_volume_path, self.docker_volume_path, 1)

            user_output_path = self.w_output_path.value
            output_path_for_docker = user_output_path.replace(self.host_volume_path, self.docker_volume_path, 1)
           
            files_list = self.w_files.value.split(",")
           
            if files_list == ['']:
                files_list = None
                
            self.analyse(path_for_docker, files_list, output_path_for_docker, user_output_path,
                    self.w_method.value, self.w_mode.value, float(self.w_noise_factor.value), 
                    float(self.w_min_persistence.value), self.hit_prob_min.value,
                    float(self.w_abs_threshold.value), float(self.w_rel_threshold.value), self.cluster.value,
                    self._min_distance,
                    self._min_sigma, self._max_sigma, self._sigma_ratio, self._num_sigma, self.formated_spectra,
                    self._match_factor_min)
    
    def display(self):
        """Display the UI."""
        display(self.txt_title, self.path, self.files, self.output_path, self.w_method, self.w_mode, 
                self.noise_factor, self.noise_factor_def, self.min_persistence, self.min_persistence_def,
                self.abs_threshold, self.abs_threshold_def,
                self.rel_threshold, self.rel_threshold_def, 
                self.hit_prob_min, self.formated_spectra, 
                self._vbox, self.cluster, self._vbox,
                self.run_button, self.output)




In [None]:
gcms_ui = GCGCMSAnalysisUI()
gcms_ui.display()