In [1]:
import numpy as np
import os
from spectral.io import envi
import pandas as pd
import rasterio
import matplotlib.pyplot as plt

In [None]:

hsi_data = ''

# Open the BIL file using Rasterio
with rasterio.open(hsi_data) as dataset:
    
    # Read a band (for visualization purposes)
    band1 = dataset.read(30)
    
    # Visualize the first band
    plt.imshow(band1)
    plt.colorbar()
    plt.title('Random Cube')
    plt.show()
    

hsi_data = ''

# Open the BIL file using Rasterio
with rasterio.open(hsi_data) as dataset:
    
    band = dataset.read(30)
    
    # Visualize the first band
    plt.imshow(band)
    plt.colorbar()
    plt.title('Original')
    plt.show()

In [2]:
def calculate_mean_spectrum(image_data):
    
    """Calculate the mean spectrum across all spatial pixels."""
    
    return np.mean(image_data, axis=(0, 1))  # 1D array (300,)



def reshape_spectrum(mean_spectrum):
    
    """Reshape the mean spectrum to (1, 1, 300)."""
    
    return mean_spectrum.reshape((1, 1, len(mean_spectrum)))  # (1, 1, 300)



def save_spectrum(mean_spectrum, hdr_data, output_spec_path, output_hdr_path):
    
    """Save the mean spectrum as a .spec file with .spec.hdr."""
    
    # Reshape mean spectrum to (1, 1, 300)
    mean_spectrum_reshaped = reshape_spectrum(mean_spectrum)
    
    # Save the mean spectrum data (.spec)
    mean_spectrum_reshaped.tofile(output_spec_path)  # Save as a binary file
    
    # Modify and save the header file (.spec.hdr)
    hdr_data['lines'] = 1  # 1 line
    hdr_data['samples'] = 1  # 1 sample
    hdr_data['bands'] = len(mean_spectrum)  # 300 bands
    
    
    envi.write_envi_header(output_hdr_path, hdr_data)



def create_output_structure(input_folder, output_base_folder, current_file):
    
    """Create the corresponding output folder structure."""
    
    relative_path = os.path.relpath(current_file, input_folder)
    relative_folder = os.path.dirname(relative_path)
    
    output_folder = os.path.join(output_base_folder, relative_folder)
    os.makedirs(output_folder, exist_ok=True)
    
    return output_folder



def process_hsi_images(input_folder, output_base_folder):
    
    """Recursively traverse the folder structure, load HSI images, and save the mean spectrum."""
    
    for root, dirs, files in os.walk(input_folder):
        
        for file in files:
            
            if file.endswith('.bil'):
                
                # Get the .bil and .bil.hdr file paths
                bil_path = os.path.join(root, file)
                hdr_path = bil_path + '.hdr'
                
                # Load the hyperspectral image
                img = envi.open(hdr_path, bil_path)
                image_data = img.load()  # This will be a numpy array
                
                # Calculate the mean spectrum
                mean_spectrum = calculate_mean_spectrum(image_data)
                
                # Create corresponding output folder structure
                output_folder = create_output_structure(input_folder, output_base_folder, bil_path)
                
                # Correctly define the output .spec and .spec.hdr file paths in the new structure
                output_spec_path = os.path.join(output_folder, file.replace('.bil', '.spec'))
                output_hdr_path = os.path.join(output_folder, file.replace('.bil', '.spec.hdr'))
                
                # Save the mean spectrum
                save_spectrum(mean_spectrum, img.metadata, output_spec_path, output_hdr_path)
                
                print(f"Processed: {bil_path} -> {output_spec_path}")

if __name__ == "__main__":
    
    input_folder = 'masked_datacube_30'
    output_base_folder = 'masked_datacube_30_SM'
    
    process_hsi_images(input_folder, output_base_folder)

Processed: masked_datacube_30\AE\espece10_acq1_cube_1.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_1.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_10.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_10.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_11.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_11.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_12.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_12.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_13.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_13.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_14.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_14.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_15.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_15.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_16.bil -> masked_datacube_30_SM\AE\espece10_acq1_cube_16.spec
Processed: masked_datacube_30\AE\espece10_acq1_cube_17.bil -> masked_datacube_30_SM\AE\esp

In [3]:
#convert to .npy

def load_and_reshape_spec(spec_file, hdr_file):
    
    """Load the .spec file and reshape the data from (1, 1, 300) to (1, 300)."""
    
    # Load the .spec file using numpy
    
    data = np.fromfile(spec_file, dtype=np.float32)
    
    # Get the metadata from the .spec.hdr file
    hdr_data = envi.read_envi_header(hdr_file)
    
    # Check if the data shape is (1, 1, 300)
    lines = int(hdr_data['lines'])       # Should be 1
    samples = int(hdr_data['samples'])   # Should be 1
    bands = int(hdr_data['bands'])       # Should be 300
    
    if lines == 1 and samples == 1 and bands == len(data):
        
        # Reshape to (1, 1, 300)
        data = data.reshape((lines, samples, bands))
        
        # Reshape to (1, 300)
        reshaped_data = data.reshape((1, bands))
        
    else:
        
        raise ValueError(f"Unexpected shape in {spec_file}: expected (1, 1, 300), got {(lines, samples, bands)}")
    
    
    return reshaped_data



def create_output_structure(input_folder, output_base_folder, current_file):
    
    """Create the corresponding output folder structure in the output directory."""
    
    relative_path = os.path.relpath(current_file, input_folder)
    relative_folder = os.path.dirname(relative_path)
    
    output_folder = os.path.join(output_base_folder, relative_folder)
    os.makedirs(output_folder, exist_ok=True)
    
    
    return output_folder



def process_spec_files(input_folder, output_base_folder):
    
    """Traverse the folder structure, convert .spec and .spec.hdr files to .npy, and save them."""
    for root, dirs, files in os.walk(input_folder):
        
        for file in files:
            
            if file.endswith('.spec'):
                
                # Get the corresponding .spec and .spec.hdr paths
                spec_file = os.path.join(root, file)
                hdr_file = spec_file + '.hdr'
                
                if not os.path.exists(hdr_file):
                    print(f"Header file missing for {spec_file}, skipping.")
                    
                    continue
                
                
                # Load and reshape the .spec file
                try:
                    
                    reshaped_data = load_and_reshape_spec(spec_file, hdr_file)
                    
                except ValueError as e:
                    
                    print(f"Error processing {spec_file}: {e}")
                    
                    continue
                
                # Create corresponding output folder structure
                output_folder = create_output_structure(input_folder, output_base_folder, spec_file)
                
                # Define the output .npy file path
                output_npy_path = os.path.join(output_folder, file.replace('.spec', '.npy'))
                
                # Save the reshaped data as a .npy file
                np.save(output_npy_path, reshaped_data)
                
                print(f"Processed: {spec_file} -> {output_npy_path}")



if __name__ == "__main__":
    
    input_folder = 'masked_datacube_30_SM'
    output_base_folder = 'masked_datacube_30_SM_npy'
    process_spec_files(input_folder, output_base_folder)

Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_1.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_1.npy
Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_10.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_10.npy
Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_11.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_11.npy
Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_12.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_12.npy
Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_13.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_13.npy
Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_14.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_14.npy
Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_15.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_15.npy
Processed: masked_datacube_30_SM\AE\espece10_acq1_cube_16.spec -> masked_datacube_30_SM_npy\AE\espece10_acq1_cube_16.npy
Processed: masked_datacube_30_SM\A

In [5]:
data = np.load('masked_datacube_30_SM_npy/DC/espece4_acq6_cube_28.npy')

print(data)
print(data.shape)

[[0.3889766  0.3811276  0.37380546 0.34433085 0.29300287 0.23292617
  0.19806662 0.20093787 0.20951054 0.19925512 0.21403487 0.21266802
  0.19800559 0.18837206 0.20815645 0.22351868 0.22870605 0.21558735
  0.21257846 0.23346694 0.24984208 0.25058493 0.23160554 0.22789542
  0.2416039  0.26781183 0.2695791  0.26543674 0.2594728  0.25661802
  0.25206152 0.25285503 0.2535358  0.25389853 0.2567301  0.2632745
  0.26309687 0.27196494 0.28011075 0.27997544 0.29595888 0.28724974
  0.30410627 0.30557066 0.3272229  0.31583455 0.32305542 0.32250246
  0.32868364 0.3270231  0.32804948 0.3248236  0.32772934 0.3290878
  0.33361807 0.34168562 0.357865   0.36678448 0.35570848 0.3419643
  0.34662905 0.32242733 0.31980604 0.3197012  0.70900613 0.7635859
  0.7344197  0.73091024 0.7096442  0.6929136  0.65401024 0.6398882
  0.60726845 0.604925   0.6035048  0.5964794  0.60792536 0.6175351
  0.6078063  0.6269721  0.62038743 0.6133676  0.60920453 0.59172046
  0.61198366 0.60576475 0.61915714 0.6140133  0.604653

In [6]:
#Labels
output_directory = 'masked_datacube_30_SM_npy'

# Initialize a dictionary for labels
labels = {}

for root, dirs, files in os.walk(output_directory):
    
    for file in files:
        
        if file.endswith('.npy'):
            
            # Full file path
            npy_file_path = os.path.join(root, file)
            
            # Load the NumPy array
            data = np.load(npy_file_path)
            
            # Extract the label from the folder name
            folder_name = os.path.basename(root)
            
            labels[npy_file_path] = folder_name  # Store the label


# Print all labels
print("Labels assigned based on folder names:")

for npy_file, label in labels.items():
    
    print(f"{npy_file}: {label}")

Labels assigned based on folder names:
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_1.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_10.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_11.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_12.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_13.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_14.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_15.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_16.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_17.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_18.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_19.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_2.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_20.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_21.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_22.npy: AE
masked_datacube_30_SM_npy\AE\espece10_acq1_cube_23.npy: AE
masked_datacube_30_

In [7]:
print(labels)

{'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_1.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_10.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_11.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_12.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_13.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_14.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_15.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_16.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_17.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_18.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_19.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_2.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_20.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_21.npy': 'AE', 'masked_datacube_30_SM_npy\\AE\\espece10_acq1_cube_22.npy': 'AE', 'masked_dat

In [10]:
def load_and_concatenate_npy_files(root_dir, label_from='filename'):
    
    data = []
    labels = []

    for root, dirs, files in os.walk(root_dir):
        
        for file in files:
            
            if file.endswith(".npy"):
                
                # Load the .npy file
                npy_path = os.path.join(root, file)
                npy_data = np.load(npy_path)
                
                # Handle cases where the data may not be 1D and needs flattening
                if len(npy_data.shape) > 1:
                    
                    npy_data = npy_data.flatten()
                
                # Append the data and the corresponding label
                data.append(npy_data)
                
                # Determine the label based on filename or folder
                if label_from == 'filename':
                    
                    label = os.path.splitext(file)[0]  # Use filename without extension as label
                    
                elif label_from == 'folder':
                    
                    label = os.path.basename(root)  # Use folder name as label
                else:
                    
                    raise ValueError("label_from must be either 'filename' or 'folder'")
                
                labels.append(label)

    # Convert the data and labels into a pandas DataFrame
    df = pd.DataFrame(data)
    df['label'] = labels

    return df

# Specify the root directory containing .npy files
root_directory = 'masked_datacube_30_SM_npy'

# Set label_from to either 'filename' or 'folder'
label_source = 'folder'  # or 'folder'

# Get the concatenated data and labels
df = load_and_concatenate_npy_files(root_directory, label_from=label_source)

# Save the final DataFrame to a CSV or a new .npy file
df.to_csv("Butterfly_30.csv", index=False)  # Option to save as CSV
np.save("Butterfly_30.npy", df.to_numpy())  # Option to save as .npy file


In [9]:
data = np.load('Butterfly_30.npy',allow_pickle = True)

print(data)
print(data.shape)

[[0.0 0.0 0.0 ... 0.0 0.0 'AE']
 [0.0 0.0 0.0 ... 0.0 0.0 'AE']
 [0.0 0.0 0.0 ... 0.0 0.0 'AE']
 ...
 [0.04444156959652901 0.047425538301467896 0.05166005343198776 ...
  0.10204653441905975 0.1023530662059784 'HM']
 [0.2063453495502472 0.22078101336956024 0.2513737082481384 ...
  0.3943129777908325 0.3980599641799927 'HM']
 [0.0783819779753685 0.08229844272136688 0.09022470563650131 ...
  0.1505163609981537 0.14643998444080353 'HM']]
(240, 193)


In [12]:

# Load the .npy file
data = np.load("Butterfly_30.npy",allow_pickle = True)

# Remove rows with zero values
filtered_data = data[~np.any(data == 0, axis=1)]

# Save the filtered data as a new .npy file
np.save("Butterfly_30_filtered_data.npy", filtered_data)

# Save the filtered data as a .csv file
np.savetxt("Butterfly_30_filtered_data.csv", filtered_data, delimiter=",", fmt='%s')

print("Filtered data saved as 'filtered_data.npy' and 'filtered_data.csv'")


Filtered data saved as 'filtered_data.npy' and 'filtered_data.csv'
