# Loading the Raw Data Pipeline

Imports

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from HyperspectralDataLoader import HyperspectralDataLoader, save_dataframe, load_data_and_create_df, load_masked_data_and_create_df
import pandas as pd
from pathlib import Path
import pickle
from typing import Dict, List, Tuple, Optional, Union

import warnings
warnings.filterwarnings("ignore")

Experiment Data Loading

In [None]:
data_path = '../Data/Kiwi'
metadata_path = '../Data/Kiwi/metadata.xlsx'

loader = HyperspectralDataLoader(
    data_path=data_path,
    metadata_path=metadata_path,
    cutoff_offset=30,
    use_fiji=True,
    verbose=True
)

In [None]:
loader.load_data(apply_cutoff=True)

loader.print_summary()

loader.save_to_pkl('Data/Kiwi Experiment/pickles/KiwiData.pkl')

Data Transformation - 2D Parquet - Spatial Data + Intensity Spectrum

In [None]:
pickle_file = "Data/Kiwi Experiment/pickles/KiwiData.pkl"

df = load_data_and_create_df(pickle_file)

save_dataframe(df,"Data/Kiwi Experiment/pickles/KiwiData.parquet")

print("\nSample of the dataframe:")
print(df.head())

print("\nDataframe statistics:")
print(f"Total rows (pixels): {len(df)}")
print(f"Total columns: {len(df.columns)}")
print(f"Memory usage: {df.memory_usage().sum() / 1024 / 1024:.2f} MB")

Normalizing by Exposure Time

In [None]:
from HyperspectralDataLoader import normalize_and_save_both_versions

up_file_path, down_file_path = normalize_and_save_both_versions('Data/Kiwi Experiment/pickles/KiwiData.pkl', 'Data/Kiwi Experiment/pickles/')

In [None]:
up_file_path, down_file_path

In [None]:
df_up, df_down = load_data_and_create_df(up_file_path), load_data_and_create_df(down_file_path)

In [None]:
save_dataframe(df_up,"Data/Kiwi Experiment/parquests/KiwiDataExposureUpNormalized.parquet")
save_dataframe(df_down,"Data/Kiwi Experiment/parquests/KiwiDataExposureDownNormalized.parquet")

Masked Data Processing

In [5]:
masked_pickle_file = "Data/Kiwi Experiment/pickles/masked_KiwiData.pkl"
df_masked = load_masked_data_and_create_df(masked_pickle_file)
save_dataframe(df_masked, "Data/Kiwi Experiment/parquests/KiwiDataMasked.parquet")

Found 21 excitation wavelengths
Generated 562 valid excitation-emission combinations
Image dimensions: 1024 x 1392 pixels
Identified 245642 masked pixels, keeping 1179766 pixels
Created initial dataframe with 1179766 rows
Final dataframe has 564 columns
Saving dataframe to Data/Kiwi Experiment/parquests/KiwiDataMasked.parquet
Saved dataframe with 1179766 rows and 564 columns


In [6]:
masked_pickle_file = "Data/Kiwi Experiment/pickles/masked_KiwiData_normalized_exposure_up.pkl"
df_masked = load_masked_data_and_create_df(masked_pickle_file)
save_dataframe(df_masked, "Data/Kiwi Experiment/parquests/KiwiDataMaskedUpNormalized.parquet")

Found 21 excitation wavelengths
Generated 562 valid excitation-emission combinations
Image dimensions: 1024 x 1392 pixels
Identified 245642 masked pixels, keeping 1179766 pixels
Created initial dataframe with 1179766 rows
Final dataframe has 564 columns
Saving dataframe to Data/Kiwi Experiment/parquests/KiwiDataMaskedUpNormalized.parquet
Saved dataframe with 1179766 rows and 564 columns


In [7]:
masked_pickle_file = "Data/Kiwi Experiment/pickles/masked_KiwiData_normalized_exposure_down.pkl"
df_masked = load_masked_data_and_create_df(masked_pickle_file)
save_dataframe(df_masked, "Data/Kiwi Experiment/parquests/KiwiDataMaskedDownNormalized.parquet")

Found 21 excitation wavelengths
Generated 562 valid excitation-emission combinations
Image dimensions: 1024 x 1392 pixels
Identified 245642 masked pixels, keeping 1179766 pixels
Created initial dataframe with 1179766 rows
Final dataframe has 564 columns
Saving dataframe to Data/Kiwi Experiment/parquests/KiwiDataMaskedDownNormalized.parquet
Saved dataframe with 1179766 rows and 564 columns
