In [1]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
from typing import Dict, List, Tuple, Optional, Union
import matplotlib.pyplot as plt

def update_hyperspectral_loader_class():
    """
    Apply a patch to the HyperspectralDataLoader class to update the apply_spectral_cutoff method.
    This will replace the existing method with the dual cutoff version.

    Must be called before using the HyperspectralDataLoader class.
    """
    from HyperspectralDataLoader import HyperspectralDataLoader

    # Replace the apply_spectral_cutoff method with our new implementation
    def new_apply_spectral_cutoff(self, data, wavelengths, excitation):
        """
        Apply both Rayleigh and second-order spectral cutoffs.
        """
        # Convert wavelengths to numpy array if it's not already
        wavelengths_arr = np.array(wavelengths)

        # Create a mask to keep valid wavelengths
        keep_mask = np.ones(len(wavelengths_arr), dtype=bool)

        # 1. Apply Rayleigh cutoff - remove wavelengths below (excitation + rayleigh_offset)
        rayleigh_cutoff = excitation + self.cutoff_offset  # Use the same offset parameter for both cutoffs
        rayleigh_mask = wavelengths_arr >= rayleigh_cutoff
        keep_mask = np.logical_and(keep_mask, rayleigh_mask)

        # 2. Apply second-order cutoff - remove wavelengths in (2*excitation ± cutoff_offset)
        second_order_min = 2 * excitation - self.cutoff_offset
        second_order_max = 2 * excitation + self.cutoff_offset
        second_order_mask = np.logical_or(wavelengths_arr < second_order_min, wavelengths_arr > second_order_max)
        keep_mask = np.logical_and(keep_mask, second_order_mask)

        # Apply the combined mask to the third dimension (emission wavelengths)
        filtered_data = data[:, :, keep_mask]
        filtered_wavelengths = wavelengths_arr[keep_mask].tolist()

        if self.verbose:
            print(f"Applied dual cutoff for excitation {excitation}nm")
            print(f"Removed wavelengths below {rayleigh_cutoff}nm (Rayleigh cutoff)")
            print(f"Removed wavelengths between {second_order_min}nm and {second_order_max}nm (second-order cutoff)")
            print(f"Original data shape: {data.shape}, filtered shape: {filtered_data.shape}")

        return filtered_data, filtered_wavelengths

    # Patch the method in the class
    HyperspectralDataLoader.apply_spectral_cutoff = new_apply_spectral_cutoff

    print("HyperspectralDataLoader.apply_spectral_cutoff method has been updated to apply dual cutoff")


def read_laser_power_excel(excel_path: str) -> Dict[float, float]:
    """
    Read laser power measurements from an Excel file.

    Args:
        excel_path: Path to the Excel file containing excitation wavelengths and power measurements

    Returns:
        Dictionary mapping excitation wavelengths to power values
    """
    # Read the Excel file
    df = pd.read_excel(excel_path)

    # Extract the columns (adjust column names if needed)
    if "Excitation Wavelength (nm)" in df.columns and "Average Power (W)" in df.columns:
        excitation_col = "Excitation Wavelength (nm)"
        power_col = "Average Power (W)"
    elif len(df.columns) >= 2:
        # If column names don't match, use the first two columns
        excitation_col = df.columns[0]
        power_col = df.columns[1]
    else:
        raise ValueError("Excel file doesn't have expected columns")

    # Create dictionary mapping wavelengths to power
    power_dict = {}
    for _, row in df.iterrows():
        excitation = float(row[excitation_col])
        power = float(row[power_col])
        power_dict[excitation] = power

    return power_dict


def normalize_hyperspectral_data_by_laser_power(
    data_dict: Dict,
    laser_powers: Dict[float, float],
    reference_type: str = 'max',
    output_file: Optional[str] = None
) -> Dict:
    """
    Normalize hyperspectral data based on laser powers from Excel file.

    Args:
        data_dict: Dictionary containing hyperspectral data
        laser_powers: Dictionary mapping excitation wavelengths to power values
        reference_type: Type of reference to use ('max', 'min', 'mean', or a float value)
        output_file: Path to save the normalized data pickle file (optional)

    Returns:
        Dictionary containing normalized hyperspectral data
    """
    print("Normalizing hyperspectral data based on laser power...")

    # Create a deep copy of the data to avoid modifying the original
    import copy
    normalized_data = copy.deepcopy(data_dict)

    # Determine the reference power
    if reference_type == 'max':
        reference_power = max(laser_powers.values())
        print(f"Using maximum laser power as reference: {reference_power:.8f} W")
    elif reference_type == 'min':
        reference_power = min(laser_powers.values())
        print(f"Using minimum laser power as reference: {reference_power:.8f} W")
    elif reference_type == 'mean':
        reference_power = sum(laser_powers.values()) / len(laser_powers)
        print(f"Using mean laser power as reference: {reference_power:.8f} W")
    elif isinstance(reference_type, (int, float)):
        reference_power = float(reference_type)
        print(f"Using provided power as reference: {reference_power:.8f} W")
    else:
        raise ValueError("Invalid reference_type. Use 'max', 'min', 'mean', or a float value.")

    # Store the normalization information in metadata
    if 'metadata' not in normalized_data:
        normalized_data['metadata'] = {}

    normalized_data['metadata']['laser_power_normalization'] = {
        'reference_type': reference_type,
        'reference_power': reference_power,
        'laser_powers': laser_powers
    }

    # Plot powers and normalization factors for visualization
    plt.figure(figsize=(12, 10))

    # Plot 1: Original laser powers
    plt.subplot(2, 1, 1)
    x = sorted(laser_powers.keys())
    y = [laser_powers[k] for k in x]
    plt.plot(x, y, 'o-', color='blue')
    plt.xlabel('Excitation Wavelength (nm)')
    plt.ylabel('Laser Power (W)')
    plt.title(f'Laser Power vs Excitation Wavelength')
    plt.grid(True)

    # Plot 2: Normalization factors
    plt.subplot(2, 1, 2)
    factors = [reference_power / laser_powers[k] for k in x]
    plt.plot(x, factors, 'o-', color='red')
    plt.xlabel('Excitation Wavelength (nm)')
    plt.ylabel('Normalization Factor')
    plt.title(f'Normalization Factors ({reference_type} reference)')
    plt.grid(True)

    plt.tight_layout()

    # Save the plot if output file is provided
    if output_file:
        plot_file = str(Path(output_file).with_suffix('.png'))
        plt.savefig(plot_file)
        print(f"Normalization plot saved to: {plot_file}")

    # Normalize each data cube
    print("Normalizing data cubes...")
    for ex_str in data_dict['data'].keys():
        excitation = float(ex_str)

        # Check if we have laser power for this excitation
        if excitation in laser_powers:
            laser_power = laser_powers[excitation]

            # Calculate normalization factor: reference_power / laser_power
            # This compensates for variations in laser power
            normalization_factor = reference_power / laser_power

            # Apply normalization to the data cube
            original_cube = data_dict['data'][ex_str]['cube']
            normalized_data['data'][ex_str]['cube'] = original_cube * normalization_factor

            # Store normalization factor in metadata
            normalized_data['data'][ex_str]['laser_power_normalization_factor'] = normalization_factor

            print(f"  Normalized excitation {ex_str}nm (Power: {laser_power:.8f} W, Factor: {normalization_factor:.4f})")
        else:
            print(f"  ⚠ No laser power data for excitation {ex_str}nm")

    # Save the normalized data if output file is provided
    if output_file:
        with open(output_file, 'wb') as f:
            pickle.dump(normalized_data, f)
        print(f"Normalized data saved to {output_file}")

    return normalized_data


def process_with_dual_cutoff_and_power_normalization(
    data_path: str,
    metadata_path: str,
    laser_power_excel: str,
    cutoff_offset: int = 30,
    reference_types: List[str] = ['max', 'min', 'mean'],
    output_dir: Optional[str] = None
) -> Dict[str, str]:
    """
    Process hyperspectral data with both Rayleigh and second-order cutoffs
    and normalize by laser power from Excel file using multiple reference types.

    Args:
        data_path: Path to the directory containing .im3 files
        metadata_path: Path to the Excel file with exposure metadata
        laser_power_excel: Path to the Excel file with laser power measurements
        cutoff_offset: Offset in nm for both Rayleigh and second-order cutoffs
        reference_types: List of reference types for normalization ('max', 'min', 'mean')
        output_dir: Directory to save the output files (default: 'processed_data')

    Returns:
        Dictionary mapping reference types to their output file paths
    """
    # First update the HyperspectralDataLoader class to use our new dual cutoff method
    update_hyperspectral_loader_class()

    from HyperspectralDataLoader import HyperspectralDataLoader

    # Create output directory if needed
    if output_dir is None:
        output_dir = Path("processed_data")
    else:
        output_dir = Path(output_dir)

    output_dir.mkdir(parents=True, exist_ok=True)

    # 1. Process data with dual cutoff
    print(f"Processing data with dual cutoff (offset: {cutoff_offset}nm)...")

    loader = HyperspectralDataLoader(
        data_path=data_path,
        metadata_path=metadata_path,
        cutoff_offset=cutoff_offset,
        use_fiji=True,
        verbose=True
    )

    # This will now use our patched method that applies both cutoffs
    loader.load_data(apply_cutoff=True)

    # Summary of the processed data
    loader.print_summary()

    # Save the data with the dual cutoff
    cutoff_file = output_dir / f"data_dual_cutoff_{cutoff_offset}nm.pkl"
    loader.save_to_pkl(str(cutoff_file))

    print(f"Data with dual cutoff (offset: {cutoff_offset}nm) saved to: {cutoff_file}")

    # 2. Normalize by laser power using different reference types
    print("\nNormalizing data based on laser power...")

    # Load laser powers from Excel file
    laser_powers = read_laser_power_excel(laser_power_excel)
    print(f"Found laser powers for {len(laser_powers)} excitation wavelengths")

    # Load the data we just saved
    with open(cutoff_file, 'rb') as f:
        data_dict = pickle.load(f)

    # Process with each reference type
    output_files = {'cutoff': str(cutoff_file)}

    for ref_type in reference_types:
        print(f"\nNormalizing with {ref_type} reference...")
        power_file = output_dir / f"data_dual_cutoff_{cutoff_offset}nm_power_normalized_{ref_type}.pkl"

        normalized_data = normalize_hyperspectral_data_by_laser_power(
            data_dict,
            laser_powers,
            reference_type=ref_type,
            output_file=str(power_file)
        )

        output_files[ref_type] = str(power_file)

        # Create dataframe for this normalization
        from HyperspectralDataLoader import load_data_and_create_df, save_dataframe

        # Create dataframe from power normalized data
        df_power = load_data_and_create_df(str(power_file))
        power_parquet = str(power_file).replace('.pkl', '.parquet')
        save_dataframe(df_power, power_parquet)

        print(f"Power normalized dataframe ({ref_type} reference) saved to: {power_parquet}")

    print(f"\nProcessing complete!")
    print(f"1. Data with dual cutoff (offset: {cutoff_offset}nm): {cutoff_file}")

    for ref_type in reference_types:
        print(f"2. Data normalized by laser power ({ref_type} reference): {output_files[ref_type]}")

    return output_files

In [None]:
data_path = '../Data/Kiwi'
metadata_path = '../Data/Kiwi/metadata.xlsx'
laser_power_excel = "../Data/Kiwi/TLS Scans/wavelength_power_data.xlsx"

output_files = process_with_dual_cutoff_and_power_normalization(
    data_path=data_path,
    metadata_path=metadata_path,
    laser_power_excel=laser_power_excel,
    cutoff_offset=30,
    reference_types=['max','min','mean'],
    output_dir='Data/Kiwi Experiment/pickles'
)

print("\nCreating dataframes...")

from HyperspectralDataLoader import load_data_and_create_df, save_dataframe

df_power = load_data_and_create_df(power_file)
power_parquet = power_file.replace('.pkl', '.parquet')
save_dataframe(df_power, power_parquet)

print(f"Power normalized dataframe saved to: {power_parquet}")

In [None]:
data_path = '../Data/Kiwi'
metadata_path = '../Data/Kiwi/metadata.xlsx'
laser_power_excel = "../Data/Kiwi/TLS Scans/wavelength_power_data.xlsx"

output_files = process_with_dual_cutoff_and_power_normalization(
    data_path=data_path,
    metadata_path=metadata_path,
    laser_power_excel=laser_power_excel,
    cutoff_offset=30,
    reference_types=['max','min','mean'],
    output_dir='Data/Kiwi Experiment/pickles'
)

print("\nCreating dataframes...")

from HyperspectralDataLoader import load_data_and_create_df, save_dataframe

df_power = load_data_and_create_df(power_file)
power_parquet = power_file.replace('.pkl', '.parquet')
save_dataframe(df_power, power_parquet)

print(f"Power normalized dataframe saved to: {power_parquet}")