===================================
 # PEAKS DETECTION #
===================================

Read the chromatogram, generate a peak table and identify the compounds using the NIST database. 

In [None]:
import os, sys
path_to_scr_folder=os.path.join(os.path.dirname(os.path.abspath('')), 'src')
sys.path.append(path_to_scr_folder)

In [None]:
import os
import ipywidgets as widgets
from IPython.display import display
from identification import sample_identification
import netCDF4 as nc
import h5py
from ipyfilechooser import FileChooser
import traceback
from pathlib import Path


class GCGCMSAnalysisUI:
    """
     GC×GC-MS Analysis UI with improved error handling and flexible file/folder selection.
    Provides a widget-based interface for configuring and running GCGCMS analysis.
    Users can select individual files, folders, or subfolders - all compatible files will be processed.
    """
    
    def __init__(self):
        """Initialize the GCMS Analysis UI with default parameters and widgets."""
        self._setup_environment()
        self._setup_default_parameters()
        self._setup_style()
        self._initialize_widgets()
        self._create_widgets()
        self._setup_callbacks()
    
    def _setup_environment(self):
        """Set up environment variables and paths."""
        self.docker_volume_path = os.getenv('DOCKER_VOLUME_PATH', '/default/docker/path')
        self.host_volume_path = os.getenv('HOST_VOLUME_PATH', '/default/host/path')
        
        self.default_path_input = self.host_volume_path
        self.default_path_output = f'{self.host_volume_path}output/'
    
    def _setup_default_parameters(self):
        """Initialize default analysis parameters."""
        # Public parameters (configurable via UI)
        self.abs_threshold = "0"
        self.rel_threshold = "0.01"
        self.noise_factor = "1.5"
        self.min_persistence = "0.0002"
        
        # Private parameters (fixed for this UI)
        self._min_distance = 1
        self._sigma_ratio = 1.6
        self._num_sigma = 10
        self._min_sigma = 1
        self._max_sigma = 30
        self._overlap = 0.5
        self._match_factor_min = 650
        self._cluster = True
        self._min_samples = 4
        self._eps = 3
        self.formated_spectra = True #TODO ?
        
        # Supported file extensions
        self.supported_extensions = ['.cdf', '.h5']
    
    def _setup_style(self):
        """Set up widget styling."""
        self.style = {'description_width': 'initial'}
    
    def _initialize_widgets(self):
        """Initialize widget containers."""
        self._choosers = []
        self._vbox = widgets.VBox(layout=widgets.Layout(border='2px solid green'))
    
    def add_path_chooser(self, b):
        """Add a new path chooser (file or folder) to the interface."""
        fc = FileChooser(
            path=self.docker_volume_path,
            select_dirs=False,
            # use_dir_icons=True,
            # show_hidden=False,
            title="Select a file (.cdf, .h5) or a folder"
        )
        
        # # # Configuration pour permettre la sélection de fichiers ET dossiers
        fc.sandbox_path = self.docker_volume_path
        fc.show_only_dirs = False  # Important: permet de voir les fichiers
        
        self._choosers.append(fc)
        self._update_chooser_display()
    
    def remove_last_chooser(self, b):
        """Remove the last added path chooser."""
        if self._choosers:
            self._choosers.pop()
            self._update_chooser_display()
    
    def _update_chooser_display(self):
        """Update the display of path choosers."""
        chooser_widgets = []
        
        for i, fc in enumerate(self._choosers):
            # Add a separator and index for each chooser
            separator = widgets.HTML(f'<hr><b>Sélection {i+1}:</b>')
            # Add selection info
            selection_info = widgets.HTML(
                value=f'<small style="color: #666;">Cliquez sur un fichier ou double-cliquez sur un dossier pour le sélectionner</small>'
            )
            
            chooser_widgets.extend([separator, selection_info, fc])
        
        self._vbox.children = (self.path_label, self.button_box, *chooser_widgets)
    
    def _create_widgets(self):
        """Create all UI widgets."""
        self._create_title_widget()
        self._create_path_widgets()
        self._create_output_widget()
        self._create_method_widgets()
        self._create_parameter_widgets()
        self._create_action_widgets()
    
    def _create_title_widget(self):
        """Create the title widget."""
        self.txt_title = widgets.HTML('<H1>GC×GC-MS Analysis Configuration</H1>')
    
    def _create_path_widgets(self):
        """Create path selection widgets."""
        self.path_label = widgets.HTML(value='''
            <b>Select files/folders/b><br>
            <i>Sélectionnez des fichiers individuels (.cdf, .h5) ou des dossiers complets.<br>
            Pour les dossiers, tous les fichiers compatibles seront traités récursivement.</i>
        ''')
        
        self.add_path_button = widgets.Button(
            description="Add Path", 
            button_style='success',
            icon='plus'
        )
        
        self.remove_button = widgets.Button(
            description="Remove last Path", 
            button_style='warning',
            icon='trash'
        )
        
        self.add_path_button.on_click(self.add_path_chooser)
        self.remove_button.on_click(self.remove_last_chooser)
        
        self.button_box = widgets.HBox([
            self.add_path_button, 
            self.remove_button
        ])
        
        self._vbox.children = (self.path_label, self.button_box)
    
    def _create_output_widget(self):
        """Create output path widget."""
        self.w_output_path = widgets.Text(
            value=self.default_path_output,
            # description="Output Directory:"
        )
        self.output_path = self._bold_widget("Output Directory:", self.w_output_path)
    
    def _create_method_widgets(self):
        """Create method selection widgets."""
        label_method = widgets.HTML(value="<b>Peak Detection Method</b>")
        self.r_method = widgets.RadioButtons(
            options=['persistent_homology', 'peak_local_max', 'LoG', 'DoG', 'DoH'],
            value='persistent_homology',
            description='',
            disabled=False
        )
        self.w_method = widgets.VBox([label_method, self.r_method])
        
        label_mode = widgets.HTML(value="<b>Analysis Mode</b>")
        self.r_mode = widgets.RadioButtons(
            options=['tic', 'mass_per_mass', '3D'],
            value='tic',
            description='',
            disabled=False
        )
        self.w_mode = widgets.VBox([label_mode, self.r_mode])
    
    def _create_parameter_widgets(self):
        """Create parameter input widgets."""
        # Noise factor
        self.w_noise_factor = widgets.Text(value=self.noise_factor)
        self.noise_factor = self._bold_widget("Facteur de Bruit", self.w_noise_factor)
        self.noise_factor_def = self._create_help_text(
            "Noise scaling factor used to filter detected peaks."
            "A peak is retained if its intensity is greater than the maximum intensity multiplied by this factor."
        )
        
        # Min persistence
        self.w_min_persistence = widgets.Text(value=self.min_persistence)
        self.min_persistence = self._bold_widget("Persistance Minimale", self.w_min_persistence)
        self.min_persistence_def = self._create_help_text(
            "Minimum topological persistence threshold that a peak must exceed to be considered a true signal rather than noise."
        )
        
        # Absolute threshold
        self.w_abs_threshold = widgets.Text(value=self.abs_threshold)
        self.abs_threshold = self._bold_widget("Seuil Absolu", self.w_abs_threshold)
        self.abs_threshold_def = self._create_help_text(
            "Absolute threshold used to filter detected peaks based on their raw intensity."
        )
        
        # Relative threshold
        self.w_rel_threshold = widgets.Text(value=self.rel_threshold)
        self.rel_threshold = self._bold_widget("Seuil Relatif", self.w_rel_threshold)
        self.rel_threshold_def = self._create_help_text(
            "Relative threshold used to filter detected peaks based on their relative intensity."
        )
        
        # NIST matching
        self.nist = widgets.Checkbox(
            value=True,
            description='Enable NIST Database Matching',
            style=self.style,
            disabled=False
        )
    
    def _create_action_widgets(self):
        """Create action buttons and output area."""
        self.run_button = widgets.Button(
            description="Run Analysis", 
            button_style='primary',
            icon='play'
        )
        self.clear_button = widgets.Button(
            description="Clear Results", 
            button_style='info',
            icon='eraser'
        )
        self.output = widgets.Output()
        
        self.clear_button.on_click(lambda b: self.output.clear_output())
    
    def _create_help_text(self, text):
        """Create formatted help text."""
        return widgets.HTML(value=f"""
            <div style="margin-left: 20px; font-style: italic; color: #666; font-size: 0.9em;">
                <p>{text}</p>
            </div>
        """)
    
    def _setup_callbacks(self):
        """Set up callbacks for interactive widgets."""
        self.run_button.on_click(self._on_button_click)
    
    def _bold_widget(self, label, widget):
        """Create a widget with bold label."""
        bold_label = widgets.HTML(value=f'<b>{label}:</b>')
        return widgets.HBox([bold_label, widget])
    
    def _validate_parameters(self):
        """Validate input parameters."""
        errors = []
        
        try:
            noise_val = float(self.w_noise_factor.value)
            if noise_val < 0:
                errors.append("Le facteur de bruit doit être non-négatif")
        except ValueError:
            errors.append("Le facteur de bruit doit être un nombre valide")
        
        try:
            pers_val = float(self.w_min_persistence.value)
            if pers_val < 0:
                errors.append("La persistance minimale doit être non-négative")
        except ValueError:
            errors.append("La persistance minimale doit être un nombre valide")
        
        try:
            abs_val = float(self.w_abs_threshold.value)
            if abs_val < 0:
                errors.append("Le seuil absolu doit être non-négatif")
        except ValueError:
            errors.append("Le seuil absolu doit être un nombre valide")
        
        try:
            rel_val = float(self.w_rel_threshold.value)
            if rel_val < 0 or rel_val > 1:
                errors.append("Le seuil relatif doit être entre 0 et 1")
        except ValueError:
            errors.append("Le seuil relatif doit être un nombre valide")
        
        if not self.w_output_path.value.strip():
            errors.append("Le répertoire de sortie ne peut pas être vide")
        
        return errors
    
    def get_all_files_from_selections(self):
        """
        Récupère tous les fichiers supportés depuis toutes les sélections.
        Détermine automatiquement si c'est un fichier ou un dossier.
        """
        all_files = []
        processed_paths = set()

        for i, fc in enumerate(self._choosers):
            selected = fc.selected_path
            if not selected:
                continue

            try:
                # resolve: comparer des chemins absolus
                selected_path = Path(selected).resolve()
                print("slected path", selected_path)
                print("selected", selected)")
                print("slected path suffix", selected_path.suffix)
                print ("pattern", fc.filter_pattern)

                if str(selected_path) in processed_paths:
                    continue
                processed_paths.add(str(selected_path))
                

                if fc.selected_filename:
                    if selected_path.suffix.lower() in self.supported_extensions:
                        all_files.append(str(selected_path))
                        print(f"📄 Fichier ajouté: {selected_path.name}")
                    else:
                        print(f"⚠️  Fichier non supporté ignoré: {selected_path.name}")
                        print(f"   Extensions supportées: {', '.join(self.supported_extensions)}")

                else:
                    # Un dossier a été sélectionné
                    print(f"📁 Traitement du dossier: {selected_path}")
                    dir_files = self._get_files_from_directory(selected_path)

                    for f in dir_files:
                        resolved = str(Path(f).resolve())
                        if resolved not in processed_paths:
                            all_files.append(resolved)
                            processed_paths.add(resolved)
                    print(f"   Trouvé {len(dir_files)} fichiers compatibles")


            except Exception as e:
                print(f"❌ Erreur lors du traitement de la sélection '{selected}': {e}")

        return all_files


    def _get_files_from_directory(self, directory_path):
        """Récupère récursivement tous les fichiers supportés d'un dossier."""
        files = []
        
        try:
            for ext in self.supported_extensions:
                # Utilise rglob pour une recherche récursive
                pattern = f"*{ext}"
                found_files = list(directory_path.rglob(pattern))
                files.extend([str(f) for f in found_files])
                
                # Vérifie aussi les extensions en majuscules
                pattern_upper = f"*{ext.upper()}"
                found_files_upper = list(directory_path.rglob(pattern_upper))
                files.extend([str(f) for f in found_files_upper])
        
        except Exception as e:
            print(f"❌ Erreur lors du scan du dossier {directory_path}: {e}")
        
        return files
    
    def get_scan_number(self, file_path):
        """Get scan number from file."""
        try:
            if file_path.endswith((".h5", ".H5")):
                with h5py.File(file_path, 'r') as f:
                    return f.attrs['scan_number_size']
            elif file_path.endswith((".cdf", ".CDF")):
                with nc.Dataset(file_path, 'r') as dt:
                    return dt.dimensions['scan_number'].size
            else:
                raise ValueError("Format de fichier non supporté. Veuillez fournir un fichier .h5 ou .cdf.")
        except Exception as e:
            raise ValueError(f"Erreur lors de la lecture du fichier {file_path}: {e}")
    
    def get_mod_time(self, file_path):
        """Get modulation time based on scan_number from file."""
        scan_number = self.get_scan_number(file_path)
        
        modulation_times = {
            328125: (1.25, "G0/plasma"),
            540035: (1.7, "air expiré")
        }
        
        if scan_number in modulation_times:
            mod_time, data_type = modulation_times[scan_number]
            print(f"   Type de données: {data_type}")
            return mod_time
        else:
            print(f"   ⚠️  scan_number inconnu: {scan_number}, utilisation du temps de modulation par défaut")
            return 1.0  # Valeur par défaut
    
    def analyze_files(self, selected_files, output_path, user_output_path, method, mode, 
                     noise_factor, min_persistence, abs_threshold, rel_threshold, 
                     cluster, min_distance, min_sigma, max_sigma, sigma_ratio, num_sigma, 
                     formated_spectra, match_factor_min, overlap, eps, min_samples, nist):
        """Run the analysis on the specified files."""
        
        if not selected_files:
            print("❌ Erreur: Aucun fichier sélectionné pour l'analyse.")
            return False
        
        # Créer le répertoire de sortie s'il n'existe pas
        os.makedirs(output_path, exist_ok=True)
        print(f"📁 Répertoire de sortie: {user_output_path}")
        
        print(f"\n🔍 Début de l'analyse de {len(selected_files)} fichiers:")
        for i, f in enumerate(selected_files, 1):
            print(f"  {i}. {os.path.basename(f)}")
        
        successful_analyses = 0
        failed_analyses = 0
        
        for i, full_path in enumerate(selected_files, 1):
            print(f"\n{'='*60}")
            print(f"🔬 Traitement du fichier {i}/{len(selected_files)}: {os.path.basename(full_path)}")
            print(f"{'='*60}")
            
            try:
                # Vérifications basiques du fichier
                if not os.path.isfile(full_path):
                    print(f"❌ Fichier introuvable: {full_path}")
                    failed_analyses += 1
                    continue
                
                if not os.access(full_path, os.R_OK):
                    print(f"❌ Permission refusée: {full_path}")
                    failed_analyses += 1
                    continue
                
                path = os.path.dirname(full_path)
                file = os.path.basename(full_path)
                
                # Obtenir le temps de modulation
                mod_time = self.get_mod_time(full_path)
                print(f"⏱️  Temps de modulation: {mod_time} secondes")
                
                # Lancer l'analyse
                print(f"🚀 Début de l'analyse...")
                result = sample_identification(
                    path, file, output_path, mod_time, method, mode,
                    noise_factor, abs_threshold, rel_threshold, cluster,
                    min_distance, min_sigma, max_sigma, sigma_ratio, num_sigma,
                    formated_spectra, match_factor_min, min_persistence,
                    overlap, eps, min_samples, nist
                )
                
                print(f"✅ Analyse terminée avec succès!")
                print(f"📊 Résultat: {result}")
                successful_analyses += 1
                
            except Exception as e:
                print(f"❌ Échec de l'analyse pour {full_path}:")
                print(f"   Erreur: {str(e)}")
                failed_analyses += 1
                if hasattr(e, '__traceback__'):
                    traceback.print_exc()
        
        print(f"\n{'='*60}")
        print(f"📊 RÉSUMÉ DE L'ANALYSE")
        print(f"{'='*60}")
        print(f"✅ Réussies: {successful_analyses}")
        print(f"❌ Échouées: {failed_analyses}")
        print(f"📈 Taux de succès: {successful_analyses}/{len(selected_files)} ({100*successful_analyses/len(selected_files):.1f}%)")
        
        return successful_analyses > 0
    
    def _on_button_click(self, b):
        """Handle button click event to start analysis."""
        with self.output:
            self.output.clear_output()
            print("🚀 Initialisation de l'analyse GC×GC-MS...")
            
            # Valider les paramètres
            errors = self._validate_parameters()
            if errors:
                print("❌ Échec de la validation des paramètres:")
                for error in errors:
                    print(f"  • {error}")
                return
            
            # Obtenir tous les fichiers sélectionnés
            print("\n📂 Collecte des fichiers depuis les sélections...")
            selected_files = self.get_all_files_from_selections()
            
            if not selected_files:
                print("❌ Aucun fichier compatible (.cdf ou .h5) trouvé dans les sélections.")
                print("💡 Veuillez sélectionner des fichiers ou des dossiers contenant des fichiers .cdf ou .h5.")
                return
            
            print(f"\n✅ {len(selected_files)} fichiers compatibles trouvés")
            
            # Préparer les chemins
            user_output_path = self.w_output_path.value
            output_path_for_docker = user_output_path.replace(
                self.host_volume_path, self.docker_volume_path, 1
            )
            
            try:
                # Lancer l'analyse
                success = self.analyze_files(
                    selected_files, output_path_for_docker, user_output_path,
                    self.r_method.value, self.r_mode.value,
                    float(self.w_noise_factor.value),
                    float(self.w_min_persistence.value),
                    float(self.w_abs_threshold.value),
                    float(self.w_rel_threshold.value),
                    self._cluster, self._min_distance, self._min_sigma,
                    self._max_sigma, self._sigma_ratio, self._num_sigma,
                    self.formated_spectra, self._match_factor_min,
                    self._overlap, self._eps, self._min_samples,
                    self.nist.value
                )
                
                if success:
                    print("\n🎉 Processus d'analyse terminé avec succès!")
                    print(f"📁 Résultats sauvegardés dans: {user_output_path}")
                else:
                    print("\n⚠️  Processus d'analyse terminé avec des erreurs.")
                    print("🔍 Vérifiez les messages ci-dessus pour les détails.")
                    
            except Exception as e:
                print(f"\n❌ Erreur fatale pendant l'analyse: {e}")
                traceback.print_exc()
    
    def display(self):
        """Display the complete UI."""
        display(
            self.txt_title,
            self._vbox,
            self.output_path,
            widgets.HBox([self.w_method, self.w_mode]),
            self.nist,
            self.noise_factor,
            self.noise_factor_def,
            self.min_persistence,
            self.min_persistence_def,
            self.abs_threshold,
            self.abs_threshold_def,
            self.rel_threshold,
            self.rel_threshold_def,
            widgets.HBox([self.run_button, self.clear_button]),
            self.output
        )

In [None]:
gcms_ui = GCGCMSAnalysisUI()
gcms_ui.display()