# Loading the Raw Data Pipeline

Imports

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from HyperspectralDataLoader import HyperspectralDataLoader, save_dataframe, load_data_and_create_df
import pandas as pd
from pathlib import Path
import pickle
from typing import Dict, List, Tuple, Optional, Union

import warnings
warnings.filterwarnings("ignore")

Experiment Data Loading

In [4]:
data_path = '../Data/Kiwi'
metadata_path = '../Data/Kiwi/metadata.xlsx'

loader = HyperspectralDataLoader(
    data_path=data_path,
    metadata_path=metadata_path,
    cutoff_offset=20,
    use_fiji=True,
    verbose=True
)

Initializing ImageJ (Fiji)...


In [5]:
loader.load_data(apply_cutoff=True)

loader.print_summary()

loader.save_to_pkl('Data/Kiwi Experiment/pickles/KiwiData.pkl')

Loading 300.im3 ...


Operating in headless mode - the original ImageJ will have limited functionality.


Loading 310.im3 ...
Loading 320.im3 ...
Loading 330.im3 ...
Loading 340.im3 ...
Loading 350.im3 ...
Loading 360.im3 ...
Loading 370.im3 ...
Loading 380.im3 ...
Loading 390.im3 ...
Loading 400.im3 ...
Loading 410.im3 ...
Loading 420.im3 ...
Loading 430.im3 ...
Loading 440.im3 ...
Loading 450.im3 ...
Loading 460.im3 ...
Loading 470.im3 ...
Loading 480.im3 ...
Loading 490.im3 ...
Loading 500.im3 ...
Processing data with cutoff offset: 20nm...
Applied cutoff for excitation 300.0nm
Removed wavelengths between 580.0nm and 620.0nm
Original data shape: (1024, 1392, 31), filtered shape: (1024, 1392, 26)
Applied cutoff for excitation 310.0nm
Removed wavelengths between 600.0nm and 640.0nm
Original data shape: (1024, 1392, 31), filtered shape: (1024, 1392, 26)
Applied cutoff for excitation 320.0nm
Removed wavelengths between 620.0nm and 660.0nm
Original data shape: (1024, 1392, 31), filtered shape: (1024, 1392, 26)
Applied cutoff for excitation 330.0nm
Removed wavelengths between 640.0nm and 680.

Data Transformation - 2D Parquet - Spatial Data + Intensity Spectrum

In [6]:
pickle_file = "Data/Kiwi Experiment/pickles/KiwiData.pkl"

df = load_data_and_create_df(pickle_file)

save_dataframe(df,"Data/Kiwi Experiment/pickles/KiwiData.parquet")

print("\nSample of the dataframe:")
print(df.head())

print("\nDataframe statistics:")
print(f"Total rows (pixels): {len(df)}")
print(f"Total columns: {len(df.columns)}")
print(f"Memory usage: {df.memory_usage().sum() / 1024 / 1024:.2f} MB")

Found 21 excitation wavelengths
Generated 562 valid excitation-emission combinations
Image dimensions: 1024 x 1392 pixels
Created initial dataframe with 1425408 rows
Final dataframe has 564 columns
Saving dataframe to Data/Kiwi Experiment/pickles/KiwiData.parquet
Saved dataframe with 1425408 rows and 564 columns

Sample of the dataframe:
   x  y  420-300  430-300  440-300  450-300  460-300  470-300  480-300  \
0  0  0      6.0      0.0      9.0     14.0      9.0     19.0      0.0   
1  1  0      0.0      0.0     14.0      6.0     40.0      0.0     22.0   
2  2  0      0.0     17.0      0.0      0.0      9.0      0.0      6.0   
3  3  0      0.0      1.0      0.0     35.0     58.0     27.0     17.0   
4  4  0     11.0      0.0      0.0     22.0      9.0     32.0     11.0   

   490-300  ...  630-500  640-500  650-500  660-500  670-500  680-500  \
0      1.0  ...     14.0      1.0     17.0      0.0     48.0      0.0   
1      0.0  ...     35.0      0.0     24.0     19.0      0.0      0.0

Normalizing by Exposure Time

In [3]:
from HyperspectralDataLoader import normalize_and_save_both_versions

up_file_path, down_file_path = normalize_and_save_both_versions('Data/Kiwi Experiment/pickles/KiwiData.pkl', 'Data/Kiwi Experiment/pickles/')

Loading data from Data/Kiwi Experiment/pickles/KiwiData.pkl...

Exposure Time Information:
Excitation (nm) Exposure Time  
------------------------------
300.0           5000.0         
310.0           5000.0         
320.0           5000.0         
330.0           5000.0         
340.0           5000.0         
350.0           5000.0         
360.0           5000.0         
370.0           5000.0         
380.0           5000.0         
390.0           5000.0         
400.0           5000.0         
410.0           5000.0         
420.0           5000.0         
430.0           5000.0         
440.0           5000.0         
450.0           4000.0         
460.0           4000.0         
470.0           4000.0         
480.0           4000.0         
490.0           4000.0         
500.0           4000.0         

Summary:
Minimum exposure: 4000.0
Maximum exposure: 5000.0
Ratio max/min: 1.25
Normalizing hyperspectral data using max exposure as reference...
Found exposure times for 21 

In [4]:
up_file_path, down_file_path

(WindowsPath('Data/Kiwi Experiment/pickles/KiwiData_normalized_exposure_up.pkl'),
 WindowsPath('Data/Kiwi Experiment/pickles/KiwiData_normalized_exposure_down.pkl'))

In [7]:
df_up, df_down = load_data_and_create_df(up_file_path), load_data_and_create_df(down_file_path)

Found 21 excitation wavelengths
Generated 562 valid excitation-emission combinations
Image dimensions: 1024 x 1392 pixels
Created initial dataframe with 1425408 rows
Final dataframe has 564 columns
Found 21 excitation wavelengths
Generated 562 valid excitation-emission combinations
Image dimensions: 1024 x 1392 pixels
Created initial dataframe with 1425408 rows
Final dataframe has 564 columns


In [8]:
save_dataframe(df_up,"Data/Kiwi Experiment/parquests/KiwiDataExposureUpNormalized.parquet")
save_dataframe(df_down,"Data/Kiwi Experiment/parquests/KiwiDataExposureDownNormalized.parquet")

Saving dataframe to Data/Kiwi Experiment/parquests/KiwiDataExposureUpNormalized.parquet
Saved dataframe with 1425408 rows and 564 columns
Saving dataframe to Data/Kiwi Experiment/parquests/KiwiDataExposureDownNormalized.parquet
Saved dataframe with 1425408 rows and 564 columns
