In [None]:
import numpy as np
import projection
import mass_spec
import pyms
import requests
import time
import sdjson
import json
from typing import List, Tuple
from nist_utils.reference_data import ReferenceData
from pyms_nist_search.search_result import SearchResult
import read_chroma
import peak_detection


def hit_list_with_ref_data_from_json(json_data: str) -> List[Tuple[SearchResult, ReferenceData]]:
    """
    Parse json data into a list of (SearchResult, ReferenceData) tuples.
    :param json_data: str
    """

    raw_output = json.loads(json_data)

    hit_list = []

    for hit, ref_data in raw_output:
        hit_list.append((SearchResult(**hit), ReferenceData(**ref_data)))
    return hit_list


def full_search_with_ref_data(
            mass_spectrum,
            n_hits: int = 20,
            ) -> List[Tuple[SearchResult, ReferenceData]]:
        """
        Perform a Full Spectrum Search of the mass spectral library, including reference data.

        :param mass_spec: The mass spectrum to search against the library.
        :param n_hits: The number of hits to return.

        :return: List of tuples containing possible identities
            for the mass spectrum, and the reference data.
        """

        if not isinstance(mass_spectrum, pyms.Spectrum.MassSpectrum):
            raise TypeError("`mass_spec` must be a pyms.Spectrum.MassSpectrum object.")

        retry_count = 0

        # Keep trying until it works
        while retry_count < 240:
            try:
                res = requests.post(
                        f"http://nist:5001/search/spectrum_with_ref_data/?n_hits={n_hits}",
                        json=sdjson.dumps(mass_spectrum)
                        )
                return hit_list_with_ref_data_from_json(res.text)
            except requests.exceptions.ConnectionError:
                time.sleep(0.5)
                retry_count += 1

        raise TimeoutError("Unable to communicate with the search server.")


def matching_nist_lib_from_chromato_cube_new(
        chromato_obj, chromato_cube, coordinates, mod_time,
        match_factor_min):

    start = time.time()
    chromato, time_rn, spectra_obj = chromato_obj
    coordinates_in_chromato = projection.matrix_to_chromato(
        coordinates, time_rn, mod_time, chromato.shape)

    try:
        (l1, l2, mv, iv, range_min, range_max) = spectra_obj
    except ValueError:
        range_min, range_max = spectra_obj

    mass_values = np.linspace(
        range_min, range_max, range_max - range_min + 1).astype(int)

    matches = []
    nb_analyte = 0
    for i, coord in enumerate(coordinates):

        int_values = mass_spec.read_spectrum_from_chromato_cube(
            coord, chromato_cube=chromato_cube)
        mass_spectrum = pyms.Spectrum.MassSpectrum(mass_values, int_values)

        best_hit = full_search_with_ref_data(mass_spectrum, n_hits=1)[0]
        search_result, ref_data = best_hit
        print(f"Best hit: {search_result.name}, {search_result.cas}"
              f"with match_factor:{search_result.match_factor}.")
        #  res = search.full_spectrum_search(mass_spectrum)

      
        match_data = {
            'spectra': int_values,
            'casno': '',
            'compound_name': '',
            'compound_formula': '',
            'hit_prob': '',
            'match_factor': '',
            'reverse_match_factor': ''
            }
        if search_result.match_factor >= match_factor_min:
            match_data.update({
                'casno': search_result.cas,
                'compound_name': search_result.name,
                'compound_formula': ref_data.formula,
                'hit_prob': search_result.hit_prob,
                'match_factor': search_result.match_factor,
                'reverse_match_factor': search_result.reverse_match_factor
                })
        else:
            # Composé non identifié
            nb_analyte += 1
            match_data['compound_name'] = f'Analyte{nb_analyte}'

        matches.append([[(coordinates_in_chromato[i][0]),
                       (coordinates_in_chromato[i][1])], match_data, coord])
        del mass_spectrum
    end = time.time() - start
    print(f"Matching NIST library took {end:.2f} seconds")

    return matches


def matching_nist_lib_from_chromato_cube_old(
        chromato_obj, chromato_cube, coordinates, mod_time, match_factor_min=800):
   
    chromato, time_rn, spectra_obj = chromato_obj
    coordinates_in_chromato = projection.matrix_to_chromato(
        coordinates, time_rn, mod_time, chromato.shape)

    match = []
    try:
        (l1, l2, mv, iv, range_min, range_max) = spectra_obj
    except ValueError:
        range_min, range_max = spectra_obj
    
    mass_values = np.linspace(
        range_min, range_max, range_max - range_min + 1).astype(int)
    nb_analyte = 0
    print("nb_peaks: ", len(coordinates))
    for i, coord in enumerate(coordinates):
    
        d_tmp = dict()
        int_values = mass_spec.read_spectrum_from_chromato_cube(
            coord, chromato_cube=chromato_cube)
        mass_spectrum = pyms.Spectrum.MassSpectrum(mass_values, int_values)

        res = full_search_with_ref_data(mass_spectrum)
        #  res = search.full_spectrum_search(mass_spectrum)
        
        # if (res[0][0].match_factor < match_factor_min):
        #     continue
        
        del mass_spectrum
        compound_casno = res[0][0].cas
        compound_name = res[0][0].name
        compound_formula = res[0][1].formula
        hit_prob = res[0][0].hit_prob
        match_factor = res[0][0].match_factor
        reverse_match_factor = res[0][0].reverse_match_factor
        
        #if (res[0][0].hit_prob < hit_prob_min):
        if (res[0][0].match_factor < match_factor_min):
            nb_analyte = nb_analyte + 1
            d_tmp['compound_name'] = 'Analyte' + str(nb_analyte)
            d_tmp['casno'] = ''
            d_tmp['compound_formula'] = ''
            d_tmp['hit_prob'] = ''
            d_tmp['match_factor'] = ''
            d_tmp['reverse_match_factor'] = ''
            d_tmp['spectra'] = int_values
        
        else:
            d_tmp['casno'] = compound_casno
            d_tmp['compound_name'] = compound_name
            d_tmp['compound_formula'] = compound_formula
            d_tmp['hit_prob'] = hit_prob
            d_tmp['match_factor'] = match_factor
            d_tmp['reverse_match_factor'] = reverse_match_factor
            d_tmp['spectra'] = int_values
        # if (res[0][0].hit_prob < hit_prob_min):
        #     nb_analyte = nb_analyte + 1
            # d_tmp['compound_name'] = 'Analyte' + str(nb_analyte)

        match.append([[(coordinates_in_chromato[i][0]),
					   (coordinates_in_chromato[i][1])], d_tmp, coord])
        del res
    print("nb match:")
    print(len(coordinates))
    return match


filename = "/app/data/A-F-028-817822-droite-ReCIVA.h5"
mod_time = 1.7

chromato_tic, time_rn, chromato_cube, sigma, mass_range = (
        read_chroma.read_chromato_and_chromato_cube(filename, mod_time,
                                                    pre_process=True
                                                    ))
abs_threshold = 0
rel_threshold = 0.01
noise_factor = 1.5
min_persistence = 0.02

min_distance = 1
sigma_ratio = 1.6
num_sigma = 10
min_sigma = 1
max_sigma = 30
overlap = 0.5
match_factor_min = 650

cluster = True
min_samples = 4
eps = 3
method = "persistent_homology"
mode = "tic"

coordinates = peak_detection.peak_detection(
        (chromato_tic, time_rn, mass_range),
        chromato_cube=chromato_cube,
        sigma=sigma,
        noise_factor=noise_factor,
        abs_threshold=abs_threshold,
        rel_threshold=rel_threshold,
        method=method,
        mode=mode,
        cluster=cluster,
        min_distance=min_distance,
        min_sigma=min_sigma,
        max_sigma=max_sigma,
        sigma_ratio=sigma_ratio,
        num_sigma=num_sigma,
        min_persistence=min_persistence,
        overlap=overlap,
        eps=eps,
        min_samples=min_samples)

matches_new = matching_nist_lib_from_chromato_cube_new(
     (chromato_tic, time_rn, mass_range), chromato_cube, coordinates, mod_time, match_factor_min=800)

matches_old = matching_nist_lib_from_chromato_cube_old(
     (chromato_tic, time_rn, mass_range), chromato_cube, coordinates, mod_time, match_factor_min=800)


def compare_matches(m1, m2):
    coords1, data1, _ = m1
    coords2, data2, _ = m2

    if coords1 != coords2:
        return False, f"⛔️ Coords diffèrent: {coords1} vs {coords2}"

    diffs = {}
    for key in data1:
        val1 = data1.get(key)
        val2 = data2.get(key)

        if isinstance(val1, np.ndarray) and isinstance(val2, np.ndarray):
            if not np.array_equal(val1, val2):
                diffs[key] = 'Différence sur spectre'
        else:
            if val1 != val2:
                diffs[key] = (val1, val2)

    if diffs:
        return False, f"⚠️ Différences sur : {diffs}"

    return True, ""


# Comparaison des résultats
print("\n🔍 Comparaison des résultats NIST:\n")
for i, (m1, m2) in enumerate(zip(matches_new, matches_old)):
    same, msg = compare_matches(m1, m2)
    if same:
        print(f"[{i}] ✅ Match identique pour le pic {m1[0]}")
    else:
        print(f"[{i}] ❌ Différence pour le pic {m1[0]} → {msg}")

# Vérification de la taille des retours (au cas où l’un filtre plus que l’autre)
if len(matches_new) != len(matches_old):
    print(f"\n⚠️ Les longueurs sont différentes: V1={len(matches_new)}, V2={len(matches_old)}")



chromato read 1.8106443881988525 s
--- 43.77129411697388 seconds --- to compute full spectra centroid
full spectra computed 44.75844168663025 s
baseline corrected
Best hit: Acetone, 67-64-1with match_factor:919.
res[0][0].name: Acetone
Best hit: Disiloxane, hexamethyl-, 107-46-0with match_factor:925.
res[0][0].name: Disiloxane, hexamethyl-
Best hit: Isopropyl Alcohol, 67-63-0with match_factor:963.
res[0][0].name: Isopropyl Alcohol
Best hit: Cyclopropane, ethylidene-, 18631-83-9with match_factor:952.
res[0][0].name: Cyclopropane, ethylidene-
Best hit: Benzene, 71-43-2with match_factor:956.
res[0][0].name: Benzene
Best hit: n-Hexane, 110-54-3with match_factor:925.
res[0][0].name: n-Hexane
Best hit: Nonanal, 124-19-6with match_factor:912.
res[0][0].name: Nonanal
Best hit: Cyclotetrasiloxane, octamethyl-, 556-67-2with match_factor:880.
res[0][0].name: Cyclotetrasiloxane, octamethyl-
Best hit: Acetone, 67-64-1with match_factor:923.
res[0][0].name: Acetone
Best hit: Acetone, 67-64-1with matc