In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from HyperspectralDataLoader import HyperspectralDataLoader
import pandas as pd
from pathlib import Path
import pickle
from typing import Dict, List, Tuple, Optional, Union

data_path = '../Data/Kiwi'
metadata_path = '../Data/Kiwi/metadata.xlsx'

loader = HyperspectralDataLoader(
    data_path=data_path,
    metadata_path=metadata_path,
    cutoff_offset=20,
    use_fiji=True,
    verbose=True
)

Initializing ImageJ (Fiji)...


In [2]:
loader.load_data(apply_cutoff=True)

loader.print_summary()

loader.save_to_pkl('kiwi_processed.pkl')

Loading 300.im3 ...


Operating in headless mode - the original ImageJ will have limited functionality.


Loading 310.im3 ...
Loading 320.im3 ...
Loading 330.im3 ...
Loading 340.im3 ...
Loading 350.im3 ...
Loading 360.im3 ...
Loading 370.im3 ...
Loading 380.im3 ...
Loading 390.im3 ...
Loading 400.im3 ...
Loading 410.im3 ...
Loading 420.im3 ...
Loading 430.im3 ...
Loading 440.im3 ...
Loading 450.im3 ...
Loading 460.im3 ...
Loading 470.im3 ...
Loading 480.im3 ...
Loading 490.im3 ...
Loading 500.im3 ...
Processing data with cutoff offset: 30nm...
Applied cutoff for excitation 300.0nm
Removed wavelengths between 570.0nm and 630.0nm
Original data shape: (1024, 1392, 31), filtered shape: (1024, 1392, 24)
Applied cutoff for excitation 310.0nm
Removed wavelengths between 590.0nm and 650.0nm
Original data shape: (1024, 1392, 31), filtered shape: (1024, 1392, 24)
Applied cutoff for excitation 320.0nm
Removed wavelengths between 610.0nm and 670.0nm
Original data shape: (1024, 1392, 31), filtered shape: (1024, 1392, 24)
Applied cutoff for excitation 330.0nm
Removed wavelengths between 630.0nm and 690.

In [None]:
def create_excitation_emission_dataframe(data_dict: Dict,
                                        sample_size: Optional[int] = None) -> pd.DataFrame:
    """
    Transform 4D hyperspectral data into a 2D dataframe.

    Args:
        data_dict: Dictionary containing hyperspectral data
        sample_size: Optional number of random pixels to sample (for large datasets)

    Returns:
        DataFrame with x, y coordinates and intensity values for each valid excitation-emission combination
    """
    # First, collect all valid excitation-emission combinations
    valid_combinations = []
    all_excitations = []

    # Check what excitations we actually have in the data
    for ex_str in data_dict['data'].keys():
        excitation = float(ex_str)
        all_excitations.append(excitation)

        # Get the valid emission wavelengths for this excitation
        emissions = data_dict['data'][ex_str]['wavelengths']

        # Add all valid combinations to our list
        for emission in emissions:
            col_name = f"{int(emission)}-{int(excitation)}"
            valid_combinations.append((excitation, emission, col_name))

    print(f"Found {len(all_excitations)} excitation wavelengths")
    print(f"Generated {len(valid_combinations)} valid excitation-emission combinations")

    # Create an empty dataframe with x, y coordinates
    # First, determine the dimensions of our data
    first_ex = str(all_excitations[0])
    cube_shape = data_dict['data'][first_ex]['cube'].shape
    height, width = cube_shape[0], cube_shape[1]

    print(f"Image dimensions: {height} x {width} pixels")

    # Initialize the dataframe with columns for x and y coordinates
    total_pixels = height * width

    # Create coordinate arrays - this is the correct way to flatten spatial dimensions
    # Create a meshgrid of coordinates
    y_coords, x_coords = np.mgrid[0:height, 0:width]

    # Flatten the coordinates
    x_coords = x_coords.flatten()
    y_coords = y_coords.flatten()

    # Create initial dataframe with coordinates
    df = pd.DataFrame({
        'x': x_coords,
        'y': y_coords
    })

    # If sample_size is provided, take a random sample of pixels
    if sample_size is not None and sample_size < len(df):
        df = df.sample(n=sample_size, random_state=42)
        print(f"Sampled {sample_size} pixels out of {total_pixels}")

    print(f"Created initial dataframe with {len(df)} rows")

    # Now, fill in the intensity values for each valid combination
    for excitation, emission, col_name in valid_combinations:
        # Get the data cube for this excitation
        ex_str = str(excitation)
        cube = data_dict['data'][ex_str]['cube']
        wavelengths = data_dict['data'][ex_str]['wavelengths']

        # Find the index of this emission wavelength
        try:
            em_idx = wavelengths.index(emission)

            # Extract the intensity values for this emission wavelength
            # For the sampled rows only
            if sample_size is not None and sample_size < total_pixels:
                # Get the x, y coordinates of the sampled pixels
                sampled_coords = df[['x', 'y']].values
                # Extract intensity values for these coordinates
                intensities = [cube[y, x, em_idx] for x, y in zip(sampled_coords[:, 0], sampled_coords[:, 1])]
                df[col_name] = intensities
            else:
                # Extract for all pixels - flatten in the same order as the coordinates
                intensities = cube[:, :, em_idx].flatten()
                df[col_name] = intensities

        except ValueError:
            # This emission wavelength doesn't exist for this excitation
            # We're skipping it as requested instead of adding NaN values
            continue

    print(f"Final dataframe has {len(df.columns)} columns")
    return df

def load_data_and_create_df(pickle_file: str, sample_size: Optional[int] = None) -> pd.DataFrame:
    """
    Load data from pickle file and create the dataframe

    Args:
        pickle_file: Path to the pickle file
        sample_size: Optional number of random pixels to sample

    Returns:
        Transformed dataframe
    """
    # Load the data
    with open(pickle_file, 'rb') as f:
        data_dict = pickle.load(f)

    # Create the dataframe
    return create_excitation_emission_dataframe(data_dict, sample_size)

def save_dataframe(df: pd.DataFrame, output_file: str) -> None:
    """Save the dataframe to a file"""
    print(f"Saving dataframe to {output_file}")

    # Determine file extension and save accordingly
    ext = Path(output_file).suffix
    if ext == '.csv':
        df.to_csv(output_file, index=False)
    elif ext == '.parquet':
        df.to_parquet(output_file, index=False)
    elif ext == '.pkl' or ext == '.pickle':
        df.to_pickle(output_file)
    else:
        print(f"Unrecognized extension {ext}, saving as pickle")
        df.to_pickle(output_file)

    print(f"Saved dataframe with {len(df)} rows and {len(df.columns)} columns")

In [None]:
pickle_file = "Data/Normalized/kiwi_processed_normalized_exposure_up.pkl"  # Update with your file path

# 2. Create the dataframe (sample 1000 pixels for large datasets)
df = load_data_and_create_df(pickle_file)

# 3. Save the result
save_dataframe(df, "Data/parquet-data/kiwi_processed_normalized_exposure_up.parquet")

# 4. Show a sample of the result
print("\nSample of the dataframe:")
print(df.head())

# 5. Show some statistics
print("\nDataframe statistics:")
print(f"Total rows (pixels): {len(df)}")
print(f"Total columns: {len(df.columns)}")
print(f"Memory usage: {df.memory_usage().sum() / 1024 / 1024:.2f} MB")

In [7]:
pickle_file = "Data/Normalized/kiwi_processed_normalized_exposure_down.pkl"  # Update with your file path

# 2. Create the dataframe (sample 1000 pixels for large datasets)
df = load_data_and_create_df(pickle_file)

# 3. Save the result
save_dataframe(df, "Data/parquet-data/kiwi_processed_normalized_exposure_down.parquet")

# 4. Show a sample of the result
print("\nSample of the dataframe:")
print(df.head())

# 5. Show some statistics
print("\nDataframe statistics:")
print(f"Total rows (pixels): {len(df)}")
print(f"Total columns: {len(df.columns)}")
print(f"Memory usage: {df.memory_usage().sum() / 1024 / 1024:.2f} MB")

Found 21 excitation wavelengths
Generated 549 valid excitation-emission combinations
Image dimensions: 1024 x 1392 pixels
Created initial dataframe with 1425408 rows


  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name] = intensities
  df[col_name]

Final dataframe has 551 columns
Saving dataframe to kiwi_processed_normalized_exposure_down.parquet
Saved dataframe with 1425408 rows and 551 columns

Sample of the dataframe:
   x  y  420-300  430-300  440-300  450-300  460-300  470-300  480-300  \
0  0  0      4.8      0.0      7.2     11.2      7.2     15.2      0.0   
1  1  0      0.0      0.0     11.2      4.8     32.0      0.0     17.6   
2  2  0      0.0     13.6      0.0      0.0      7.2      0.0      4.8   
3  3  0      0.0      0.8      0.0     28.0     46.4     21.6     13.6   
4  4  0      8.8      0.0      0.0     17.6      7.2     25.6      8.8   

   490-300  ...  630-500  640-500  650-500  660-500  670-500  680-500  \
0      0.8  ...     14.0      1.0     17.0      0.0     48.0      0.0   
1      0.0  ...     35.0      0.0     24.0     19.0      0.0      0.0   
2     15.2  ...      0.0      0.0      0.0     17.0      0.0     19.0   
3      0.0  ...     32.0      9.0      0.0      0.0      4.0      0.0   
4     15.2  ..

In [5]:
import numpy as np
import pandas as pd
from typing import Dict, List, Tuple, Optional, Union, Any

def get_intensity(data_dict: Dict, x: int, y: int,
                 excitation: float, emission: float) -> float:
    """
    Extract the intensity value for a specific pixel and wavelength combination
    from the original hyperspectral data.

    Args:
        data_dict: The original hyperspectral data dictionary
        x: X coordinate (column) of the pixel
        y: Y coordinate (row) of the pixel
        excitation: Excitation wavelength
        emission: Emission wavelength

    Returns:
        The intensity value at the specified position

    Raises:
        ValueError: If the excitation or emission wavelength is not found
    """
    # Convert excitation to string key
    ex_str = str(excitation)

    # Check if excitation exists in the data
    if ex_str not in data_dict['data']:
        raise ValueError(f"Excitation wavelength {excitation}nm not found in data")

    # Get the data cube and wavelengths for this excitation
    cube = data_dict['data'][ex_str]['cube']
    wavelengths = data_dict['data'][ex_str]['wavelengths']

    # Check if the pixel coordinates are within bounds
    height, width, _ = cube.shape
    if x < 0 or x >= width or y < 0 or y >= height:
        raise ValueError(f"Pixel coordinates ({x},{y}) out of bounds for image of size {width}x{height}")

    # Find the emission wavelength index
    try:
        em_idx = wavelengths.index(emission)
    except ValueError:
        raise ValueError(f"Emission wavelength {emission}nm not found for excitation {excitation}nm")

    # Return the intensity value
    return cube[y, x, em_idx]

def validate_dataframe(data_dict: Dict, df: pd.DataFrame, num_samples: int = 10) -> bool:
    """
    Validate the transformed dataframe against the original data by comparing
    random samples.

    Args:
        data_dict: The original hyperspectral data dictionary
        df: The transformed dataframe
        num_samples: Number of random samples to validate

    Returns:
        True if all validations pass, False otherwise
    """
    print(f"Validating dataframe with {num_samples} random samples...")

    # Get a list of all excitation-emission combination columns
    combination_cols = [col for col in df.columns if '-' in col]

    # Function to extract excitation and emission from column name
    def parse_column_name(col_name):
        emission, excitation = map(float, col_name.split('-'))
        return emission, excitation

    # Randomly select rows and columns to validate
    np.random.seed(42)  # For reproducibility
    sample_indices = np.random.randint(0, len(df), num_samples)

    all_passed = True

    for idx in sample_indices:
        row = df.iloc[idx]
        x, y = int(row['x']), int(row['y'])

        # Randomly select a combination column
        col = np.random.choice(combination_cols)
        emission, excitation = parse_column_name(col)

        # Get the value from the dataframe
        df_value = row[col]

        try:
            # Get the value from the original data
            original_value = get_intensity(data_dict, x, y, excitation, emission)

            # Compare values (allow for small floating-point differences)
            if abs(df_value - original_value) < 1e-6:
                print(f"✓ Validation passed for pixel ({x},{y}), Ex={excitation}nm, Em={emission}nm: {df_value} == {original_value}")
            else:
                print(f"✗ Validation failed for pixel ({x},{y}), Ex={excitation}nm, Em={emission}nm: {df_value} != {original_value}")
                all_passed = False

        except ValueError as e:
            print(f"⚠ Validation error: {e}")
            all_passed = False

    if all_passed:
        print("All validations passed! The dataframe transformation is correct.")
    else:
        print("Some validations failed. Please check your transformation code.")

    return all_passed

def compare_specific_pixel(data_dict: Dict, df: pd.DataFrame, x: int, y: int,
                         excitation: float, emission: float) -> None:
    """
    Compare a specific pixel's value between the original data and the transformed dataframe.

    Args:
        data_dict: The original hyperspectral data dictionary
        df: The transformed dataframe
        x: X coordinate of the pixel
        y: Y coordinate of the pixel
        excitation: Excitation wavelength
        emission: Emission wavelength
    """
    # Find the row in the dataframe for this pixel
    pixel_row = df[(df['x'] == x) & (df['y'] == y)]

    if len(pixel_row) == 0:
        print(f"Pixel ({x},{y}) not found in the dataframe")
        return

    # Get the column name for this excitation-emission combination
    col_name = f"{int(emission)}-{int(excitation)}"

    if col_name not in df.columns:
        print(f"Column {col_name} not found in the dataframe")
        return

    # Get the value from the dataframe
    df_value = pixel_row[col_name].values[0]

    try:
        # Get the value from the original data
        original_value = get_intensity(data_dict, x, y, excitation, emission)

        # Compare values
        if abs(df_value - original_value) < 1e-6:
            print(f"✓ Values match for pixel ({x},{y}), Ex={excitation}nm, Em={emission}nm")
            print(f"  Original value: {original_value}")
            print(f"  Dataframe value: {df_value}")
        else:
            print(f"✗ Values do not match for pixel ({x},{y}), Ex={excitation}nm, Em={emission}nm")
            print(f"  Original value: {original_value}")
            print(f"  Dataframe value: {df_value}")

    except ValueError as e:
        print(f"⚠ Error: {e}")

def visualize_spectrum_comparison(data_dict: Dict, df: pd.DataFrame, x: int, y: int,
                                excitation: float) -> None:
    """
    Visualize and compare the emission spectrum for a specific pixel and excitation
    between the original data and the transformed dataframe.

    Args:
        data_dict: The original hyperspectral data dictionary
        df: The transformed dataframe
        x: X coordinate of the pixel
        y: Y coordinate of the pixel
        excitation: Excitation wavelength
    """
    import matplotlib.pyplot as plt

    # Convert excitation to string key
    ex_str = str(excitation)

    # Check if excitation exists in the data
    if ex_str not in data_dict['data']:
        print(f"Excitation wavelength {excitation}nm not found in data")
        return

    # Get the wavelengths and data cube for this excitation
    wavelengths = data_dict['data'][ex_str]['wavelengths']
    cube = data_dict['data'][ex_str]['cube']

    # Extract the spectrum from the original data
    original_spectrum = cube[y, x, :]

    # Find the row in the dataframe for this pixel
    pixel_row = df[(df['x'] == x) & (df['y'] == y)]

    if len(pixel_row) == 0:
        print(f"Pixel ({x},{y}) not found in the dataframe")
        return

    # Extract values from the dataframe for this excitation
    df_values = []
    df_wavelengths = []

    for emission in wavelengths:
        col_name = f"{int(emission)}-{int(excitation)}"
        if col_name in df.columns:
            df_values.append(pixel_row[col_name].values[0])
            df_wavelengths.append(emission)

    # Plot the comparison
    plt.figure(figsize=(10, 6))
    plt.plot(wavelengths, original_spectrum, 'o-', label='Original Data')
    plt.plot(df_wavelengths, df_values, 'x--', label='Transformed DataFrame')
    plt.xlabel('Emission Wavelength (nm)')
    plt.ylabel('Intensity')
    plt.title(f'Spectrum Comparison for Pixel ({x},{y}) at Excitation {excitation}nm')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Calculate some statistics
    original_mean = np.mean(original_spectrum)
    df_mean = np.mean(df_values)

    print(f"Original data mean intensity: {original_mean:.2f}")
    print(f"DataFrame mean intensity: {df_mean:.2f}")
    print(f"Difference: {abs(original_mean - df_mean):.2f}")

In [6]:
pickle_file = "Data/pickle-data/kiwi_processed.pkl"  # Update with your file path
with open(pickle_file, 'rb') as f:
    data_dict = pickle.load(f)

df_file = "Data/parquet-data/hyperspectral_2d.parquet"  # Update with your file path
df = pd.read_parquet(df_file)

# Validate the dataframe (check 20 random samples)
validate_dataframe(data_dict, df, num_samples=20)

# Check a specific pixel
x, y = 100, 200  # Example coordinates
excitation = 350.0
emission = 480.0

compare_specific_pixel(data_dict, df, x, y, excitation, emission)

# Visualize a spectrum comparison
visualize_spectrum_comparison(data_dict, df, x, y, excitation)

Validating dataframe with 20 random samples...
✓ Validation passed for pixel (1358,829), Ex=410.0nm, Em=570.0nm: 79.0 == 79.0
✓ Validation passed for pixel (1183,510), Ex=420.0nm, Em=630.0nm: 120.0 == 120.0
✓ Validation passed for pixel (947,987), Ex=480.0nm, Em=580.0nm: 48.0 == 48.0
✓ Validation passed for pixel (155,871), Ex=450.0nm, Em=520.0nm: 24.0 == 24.0
✓ Validation passed for pixel (397,52), Ex=440.0nm, Em=500.0nm: 9.0 == 9.0
✓ Validation passed for pixel (712,187), Ex=370.0nm, Em=610.0nm: 0.0 == 0.0
✓ Validation passed for pixel (365,19), Ex=400.0nm, Em=550.0nm: 0.0 == 0.0
✓ Validation passed for pixel (893,348), Ex=360.0nm, Em=570.0nm: 6.0 == 6.0
✓ Validation passed for pixel (196,172), Ex=470.0nm, Em=490.0nm: 241.0 == 241.0
✓ Validation passed for pixel (1306,1020), Ex=410.0nm, Em=620.0nm: 24.0 == 24.0
✓ Validation passed for pixel (510,54), Ex=300.0nm, Em=700.0nm: 32.0 == 32.0
✓ Validation passed for pixel (905,11), Ex=390.0nm, Em=620.0nm: 53.0 == 53.0
✓ Validation passed f

In [13]:
import numpy as np
import pickle
from pathlib import Path
import copy
from typing import Dict, List, Tuple, Optional, Union

def normalize_hyperspectral_data(
    data_dict: Dict,
    reference_type: str = 'min',
    output_file: Optional[str] = None
) -> Dict:
    """
    Normalize hyperspectral data based on exposure time.

    Args:
        data_dict: Dictionary containing hyperspectral data with exposure time in metadata
        reference_type: Type of reference exposure time ('min', 'max', or float value)
        output_file: Path to save the normalized data pickle file (optional)

    Returns:
        Dictionary containing normalized hyperspectral data
    """
    print(f"Normalizing hyperspectral data using {reference_type} exposure as reference...")

    # Create a deep copy of the data to avoid modifying the original
    normalized_data = copy.deepcopy(data_dict)

    # Extract exposure times for each excitation wavelength
    exposure_times = {}

    for ex_str in data_dict['data'].keys():
        # Try to get exposure time from different possible locations in the data structure
        if 'raw' in data_dict['data'][ex_str] and 'expos_val' in data_dict['data'][ex_str]['raw']:
            exposure_times[ex_str] = data_dict['data'][ex_str]['raw']['expos_val']
        elif 'expos_val' in data_dict['data'][ex_str]:
            exposure_times[ex_str] = data_dict['data'][ex_str]['expos_val']

    if not exposure_times:
        raise ValueError("Could not find exposure time information in the data")

    print(f"Found exposure times for {len(exposure_times)} excitation wavelengths")

    # Determine the reference exposure time
    if reference_type == 'min':
        reference_exposure = min(exposure_times.values())
        print(f"Using minimum exposure time as reference: {reference_exposure}")
    elif reference_type == 'max':
        reference_exposure = max(exposure_times.values())
        print(f"Using maximum exposure time as reference: {reference_exposure}")
    elif isinstance(reference_type, (int, float)):
        reference_exposure = float(reference_type)
        print(f"Using provided exposure time as reference: {reference_exposure}")
    else:
        raise ValueError("Invalid reference_type. Use 'min', 'max', or a float value.")

    # Store the normalization information in metadata
    if 'metadata' not in normalized_data:
        normalized_data['metadata'] = {}

    normalized_data['metadata']['normalization'] = {
        'reference_type': reference_type,
        'reference_exposure': reference_exposure,
        'original_exposures': exposure_times
    }

    # Normalize each data cube
    print("Normalizing data cubes...")
    for ex_str, exposure in exposure_times.items():
        # Calculate normalization factor: E₁/E₂
        normalization_factor = reference_exposure / exposure

        # Apply normalization to the data cube
        original_cube = data_dict['data'][ex_str]['cube']

        # Normalize: I_ij^norm = I_ij × (E₁/E₂)
        normalized_data['data'][ex_str]['cube'] = original_cube * normalization_factor

        # Store normalization factor in metadata
        normalized_data['data'][ex_str]['normalization_factor'] = normalization_factor

        print(f"  Normalized excitation {ex_str}nm (Exposure: {exposure}, Factor: {normalization_factor:.4f})")

    # Save the normalized data if output file is provided
    if output_file:
        with open(output_file, 'wb') as f:
            pickle.dump(normalized_data, f)
        print(f"Normalized data saved to {output_file}")

    return normalized_data

def print_exposure_info(data_dict: Dict) -> None:
    """
    Print exposure time information from the data dictionary.

    Args:
        data_dict: Dictionary containing hyperspectral data
    """
    print("\nExposure Time Information:")

    exposure_times = {}

    for ex_str in data_dict['data'].keys():
        # Try to get exposure time from different possible locations
        if 'raw' in data_dict['data'][ex_str] and 'expos_val' in data_dict['data'][ex_str]['raw']:
            exposure_times[ex_str] = data_dict['data'][ex_str]['raw']['expos_val']
        elif 'expos_val' in data_dict['data'][ex_str]:
            exposure_times[ex_str] = data_dict['data'][ex_str]['expos_val']

    if not exposure_times:
        print("No exposure time information found in the data")
        return

    # Convert to sorted list of tuples
    sorted_exposures = sorted([(float(ex), exp) for ex, exp in exposure_times.items()])

    print(f"{'Excitation (nm)':<15} {'Exposure Time':<15}")
    print("-" * 30)

    for ex, exp in sorted_exposures:
        print(f"{ex:<15.1f} {exp:<15}")

    print("\nSummary:")
    print(f"Minimum exposure: {min(exposure_times.values())}")
    print(f"Maximum exposure: {max(exposure_times.values())}")
    print(f"Ratio max/min: {max(exposure_times.values()) / min(exposure_times.values()):.2f}")

def normalize_and_save_both_versions(
    input_file: str,
    output_dir: Optional[str] = None
) -> Tuple[Dict, Dict]:
    """
    Load data, normalize it using both min and max exposure times, and save both versions.

    Args:
        input_file: Path to the input pickle file
        output_dir: Directory to save the output files (default: same as input file)

    Returns:
        Tuple of (up_normalized_data, down_normalized_data)
    """
    # Load the data
    print(f"Loading data from {input_file}...")
    with open(input_file, 'rb') as f:
        data_dict = pickle.load(f)

    # Print exposure information
    print_exposure_info(data_dict)

    # Set up output directory
    input_path = Path(input_file)
    if output_dir is None:
        output_dir = input_path.parent
    else:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)

    # Create output file names
    base_name = input_path.stem
    up_output_file = output_dir / f"{base_name}_normalized_exposure_up.pkl"
    down_output_file = output_dir / f"{base_name}_normalized_exposure_down.pkl"

    # Normalize up (using max exposure as reference)
    up_normalized_data = normalize_hyperspectral_data(
        data_dict,
        reference_type='max',
        output_file=str(up_output_file)
    )

    # Normalize down (using min exposure as reference)
    down_normalized_data = normalize_hyperspectral_data(
        data_dict,
        reference_type='min',
        output_file=str(down_output_file)
    )

    print("\nNormalization complete!")
    print(f"Up-normalized data (max exposure reference) saved to: {up_output_file}")
    print(f"Down-normalized data (min exposure reference) saved to: {down_output_file}")

    return up_normalized_data, down_normalized_data

In [14]:
normalize_and_save_both_versions('Data/pickle-data/kiwi_processed.pkl', 'Data/Kiwi Experiment/pickles')

Loading data from kiwi_processed.pkl...

Exposure Time Information:
Excitation (nm) Exposure Time  
------------------------------
300.0           5000.0         
310.0           5000.0         
320.0           5000.0         
330.0           5000.0         
340.0           5000.0         
350.0           5000.0         
360.0           5000.0         
370.0           5000.0         
380.0           5000.0         
390.0           5000.0         
400.0           5000.0         
410.0           5000.0         
420.0           5000.0         
430.0           5000.0         
440.0           5000.0         
450.0           4000.0         
460.0           4000.0         
470.0           4000.0         
480.0           4000.0         
490.0           4000.0         
500.0           4000.0         

Summary:
Minimum exposure: 4000.0
Maximum exposure: 5000.0
Ratio max/min: 1.25
Normalizing hyperspectral data using max exposure as reference...
Found exposure times for 21 excitation wavelengths


({'data': {'300.0': {'cube': array([[[  6.,   0.,   9., ...,   0.,   0.,  30.],
            [  0.,   0.,  14., ...,   0.,  17.,   0.],
            [  0.,  17.,   0., ...,   0.,  11.,  24.],
            ...,
            [  0.,  19.,   0., ...,  50.,  55.,  11.],
            [ 35.,   0.,   9., ...,  66.,  30.,  40.],
            [ 27.,  19.,   0., ..., 127.,  84.,  48.]],
    
           [[ 14.,  22.,   1., ...,  19.,   9.,  24.],
            [ 17.,   0.,  30., ...,   4.,   0.,   0.],
            [ 17.,   0.,  19., ...,   9.,   0.,  11.],
            ...,
            [ 45.,   0.,  11., ...,   9.,  35.,  60.],
            [  9.,  50.,   0., ...,  58.,  30.,  22.],
            [ 22.,  24.,  50., ..., 174., 138.,  50.]],
    
           [[ 11.,  11.,   0., ...,  22.,  22.,   0.],
            [ 17.,  32.,  19., ...,   0.,  30.,   4.],
            [  4.,  11.,   0., ...,   0.,  14.,   6.],
            ...,
            [ 17.,   0.,  50., ...,  30.,  58.,  55.],
            [ 22.,  30.,  37., .