In [4]:
%load_ext autoreload
%autoreload 2

import spectral_unmixing_tools as el_spectral

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Extract spectra as array

In [6]:
import rasterio
import numpy as np

class ENVIProcessor:
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = None  # This will hold the raster data array
        self.file_type = "envi"
        # Other attributes...

    def load_data(self):
        """Loads the raster data from the file_path into self.data"""
        with rasterio.open(self.file_path) as src:
            self.data = src.read()  # Read all bands

    def get_chunk_from_extent(self, corrections=[], resample=False):
        self.load_data()  # Ensure data is loaded
        with rasterio.open(self.file_path) as src:
            bounds = src.bounds
            width, height = src.width, src.height
            col_start, line_start = 0, 0
            col_end, line_end = width, height

            # Assuming self.data is a 3D numpy array with dimensions [bands, rows, cols]
            chunk = self.data[:, line_start:line_end, col_start:col_end]
            
            # Apply any processing to chunk here...
            # For example, to demonstrate, flip chunk vertically
            chunk = np.flip(chunk, axis=1)
            
            return chunk



# Example usage:
# Replace 'your_envi_file_path' with the actual path to your ENVI file
raster_path = "NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200801_161441_reflectance/NEON_D13_NIWO_DP1_20200801_161441_reflectance"  # Update this to your actual raster file path
processor = ENVIProcessor(raster_path)
chunk = processor.get_chunk_from_extent(corrections=['some_correction'], resample=False)



In [4]:
## flatten array into a 2D df


In [7]:
import pandas as pd
import numpy as np

def flatten_array_to_dataframe(array):
    """
    Flattens a 3D numpy array into a pandas DataFrame.
    
    Parameters:
    - array: A 3D numpy array of shape (bands, rows, cols).
    
    Returns:
    - A pandas DataFrame where each row represents a pixel across all bands.
    """
    # Ensure the input is a 3D numpy array
    if len(array.shape) != 3:
        raise ValueError("Input array must be 3-dimensional.")
    
    bands, rows, cols = array.shape
    # Reshape the array to have pixels as rows and bands as columns
    reshaped_array = array.reshape(bands, -1).T  # Transpose to make bands as columns
    
    # Create a DataFrame from the reshaped array
    df = pd.DataFrame(reshaped_array, columns=[f'Band_{i+1}' for i in range(bands)])
    
    # Optionally, add pixel row and column indices
    pixel_indices = np.indices((rows, cols)).reshape(2, -1).T  # 2D array of row,col indices for each pixel
    df['Pixel_Row'] = pixel_indices[:, 0]
    df['Pixel_Col'] = pixel_indices[:, 1]
    
    return df

# Example usage:
# Assuming 'chunk' is your 3D numpy array of shape (426, 11138, 1031)
chunk = np.random.rand(426, 11138, 1031)  # Example array, replace with your actual data
df = flatten_array_to_dataframe(chunk)



In [8]:
df

Unnamed: 0,Band_1,Band_2,Band_3,Band_4,Band_5,Band_6,Band_7,Band_8,Band_9,Band_10,...,Band_419,Band_420,Band_421,Band_422,Band_423,Band_424,Band_425,Band_426,Pixel_Row,Pixel_Col
0,0.687524,0.304722,0.419995,0.259451,0.507092,0.323172,0.403418,0.410082,0.041743,0.759389,...,0.382299,0.014212,0.008055,0.713550,0.675177,0.513762,0.248871,0.072115,0,0
1,0.718638,0.490737,0.360811,0.531658,0.511925,0.502386,0.924304,0.908558,0.062361,0.543072,...,0.448249,0.347809,0.912210,0.629690,0.046711,0.932032,0.579875,0.617307,0,1
2,0.911718,0.410745,0.934476,0.256145,0.123791,0.206306,0.282671,0.817732,0.919495,0.264951,...,0.284105,0.207181,0.075888,0.470861,0.049957,0.371652,0.781437,0.655946,0,2
3,0.405207,0.791517,0.936675,0.433172,0.866013,0.461116,0.322686,0.013696,0.010301,0.600920,...,0.764502,0.966043,0.137867,0.466293,0.657372,0.682398,0.611508,0.221807,0,3
4,0.063385,0.777884,0.941228,0.666460,0.575846,0.827420,0.859770,0.105069,0.319631,0.696536,...,0.038016,0.638541,0.814261,0.258037,0.573052,0.936515,0.425533,0.659791,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11483273,0.762910,0.475233,0.448766,0.360571,0.277480,0.073007,0.423881,0.083852,0.307329,0.339650,...,0.202143,0.914050,0.561372,0.528521,0.656066,0.789577,0.642838,0.791538,11137,1026
11483274,0.517250,0.058740,0.460863,0.808285,0.752132,0.039975,0.165822,0.996934,0.440954,0.302505,...,0.252671,0.592800,0.900815,0.566804,0.788629,0.046903,0.734878,0.644062,11137,1027
11483275,0.119028,0.988765,0.295493,0.011601,0.173720,0.642613,0.409197,0.529874,0.447891,0.788607,...,0.692102,0.625139,0.715138,0.082538,0.059340,0.858740,0.892223,0.610468,11137,1028
11483276,0.504178,0.161125,0.412866,0.960469,0.941566,0.294474,0.123558,0.640859,0.229713,0.782693,...,0.919020,0.741212,0.643234,0.022654,0.111536,0.840001,0.001191,0.003219,11137,1029


## Reshape data frame and change labels

In [None]:
import numpy as np

def iterate_flatten_melt_array(array):
    """
    Generator to iterate over a 3D numpy array and yield "melted" data.
    
    Parameters:
    - array: A 3D numpy array of shape (bands, rows, cols).
    
    Yields:
    - Tuple of (Pixel_Row, Pixel_Col, Band_ID, Wavelength) for each pixel-band combination.
    """
    bands, rows, cols = array.shape
    
    for band in range(bands):
        for row in range(rows):
            for col in range(cols):
                yield (row, col, f'Band_{band+1}', array[band, row, col])

# Example usage
chunk = np.random.rand(426, 11138, 1031)  # Replace with your actual data

# To demonstrate or test the generator, you can iterate through a small portion of it
for i, data_point in enumerate(iterate_flatten_melt_array(chunk)):
    print(data_point)
    if i > 100:  # Adjust this condition to control how many items you want to print
        break


In [None]:
import csv

# Open a CSV file for writing
with open('melted_data.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Pixel_Row', 'Pixel_Col', 'Band_ID', 'Wavelength'])  # Write header

    # Write each data point
    for data_point in iterate_flatten_melt_array(chunk):
        writer.writerow(data_point)
import numpy as np
import pandas as pd

def batch_flatten_melt_array(array, batch_size=1000000):
    """
    Generator to iterate over a 3D numpy array and yield batches of "melted" data.
    
    Parameters:
    - array: A 3D numpy array of shape (bands, rows, cols).
    - batch_size: The number of rows in each batch.
    
    Yields:
    - A DataFrame containing a batch of melted data.
    """
    bands, rows, cols = array.shape
    total_pixels = rows * cols
    num_batches = (total_pixels + batch_size - 1) // batch_size  # Ceiling division to get the number of batches
    
    for batch in range(num_batches):
        batch_data = []
        start_index = batch * batch_size
        end_index = min(start_index + batch_size, total_pixels)
        
        for index in range(start_index, end_index):
            row = index // cols
            col = index % cols
            for band in range(bands):
                batch_data.append((row, col, f'Band_{band+1}', array[band, row, col]))
                
        batch_df = pd.DataFrame(batch_data, columns=['Pixel_Row', 'Pixel_Col', 'Band_ID', 'Wavelength'])
        yield batch_df

# Example usage
chunk = np.random.rand(426, 11138, 1031)  # Replace with your actual data

# Iterate through each batch and process
for i, batch_df in enumerate(batch_flatten_melt_array(chunk)):
    print(f"Processing batch {i+1}")
    # Process the batch_df here
    # For example, you could save each batch to a separate CSV file
    batch_df.to_csv(f'melted_data_batch_{i+1}.csv', index=False)
    if i == 0:  # For demonstration, break after processing the first batch
        break
