In [None]:
import pandas as pd 
import numpy as np 
import os 
from spectral import *
import spectral.io.envi as envi

This notebook describes the harmonization of different spectral libraries (SLIs) for the CarbonMapper Material Fractional Cover product.

* ECOSpecLib (https://speclib.jpl.nasa.gov/)
* UCSB (https://ecosis.org/package/urban-reflectance-spectra-from-santa-barbara--ca)
* SLUM (https://zenodo.org/record/4263842) 
* PHX2022 (https://github.com/CMLandOcean/MaterialFractionalCover/tree/main/spectral_libraries/PHX2022)

In [None]:
# get image data to retrieve wavelength settings for spectral resampling of SLIs
img = envi.open("path to ENVI image containing relevant band settings")

# Harmonization

SLIs have different spectral band settings and need to be resampled to the sensor's settings.

## ECOSpecLib

In [None]:
# folder where single textfiles were unzipped to
folder = "path to folder with ecospeclib textfiles '..\\ecospeclib-all\\'"

# Collect meta data from textfiles 
switch = 0 

for f in os.listdir(folder):
    if f.split(".")[-2]=="spectrum":
        f_meta = pd.read_csv(folder+f, nrows=20, sep='^([^:]+):', index_col=0, usecols=[1,2], header=None, encoding='latin-1', engine='python')
        f_meta = f_meta.T
        f_meta["Filename"] = f
        
        if switch==0:
            meta = f_meta.copy()
            switch = 1
        else:
            meta = meta.append(f_meta, ignore_index=True)
                    
# Clean up table
for col in ['Name', 'Type', 'Class', 'Subclass', 'Particle Size']:
    meta[col] = meta[col].str.strip()
    
meta["Type"] = meta["Type"].replace({"rock": "Rock", "Vegetation": "PV", "vegetation": "PV", "manmade": "Manmade", "non photosynthetic vegetation": "NPV", "soil": "Soil"})
meta["Class"] = meta["Class"].replace({"tree": "Tree", "bark": "Bark", "branches": "Branches", "flowers": "Flowers",  "leaves": "Leaves",  "lichen": "Lichen",  "needles": "Needles",  "grass": "Grass"})

# add column "MaterialClass" and reclassify endmembers accordingly
meta.loc[:, "MaterialClass"] = ""
meta.loc[meta.Class=="Concrete", "MaterialClass"] = "Concrete"
meta.loc[meta.Subclass=="Brick", "MaterialClass"] = "Brick"
meta.loc[meta.Subclass=="Cement Cinderblock", "MaterialClass"] = "Concrete"
meta.loc[meta.Subclass=="Cinder", "MaterialClass"] = "Other man-made"
meta.loc[meta.Subclass=="Glas", "MaterialClass"] = "Other man-made"
meta.loc[meta.Subclass=="Marble", "MaterialClass"] = "Other man-made"
meta.loc[meta.Subclass=="Paint", "MaterialClass"] = "Other man-made"
meta.loc[(meta.Class=="General Construction Material") & (meta.Subclass=="Wood"), "MaterialClass"] = "NPV"
meta.loc[meta.Class=="Road", "MaterialClass"] = "Asphalt"
meta.loc[meta.Subclass=="Metal", "MaterialClass"] = "Metal"
meta.loc[meta.Subclass=="Roofing Paper", "MaterialClass"] = "Other man-made"
meta.loc[meta.Name=="Asphalt Shingle", "MaterialClass"] = "Asphalt"
meta.loc[(meta.Class=="Roofing Material") & (meta.Subclass=="Rubber"), "MaterialClass"] = "Other man-made"
meta.loc[meta.Name=="Asphalt roofing shingle", "MaterialClass"] = "Asphalt"
meta.loc[meta.Name=="Reddish Asphalt roofing shingle", "MaterialClass"] = "Asphalt"
meta.loc[meta.Name=="Reddish asphalt Shingle", "MaterialClass"] = "Asphalt"
meta.loc[meta.Name=="Slate stone Shingle", "MaterialClass"] = "Natural substrate"
meta.loc[meta.Name=="Terra cotta Tiles", "MaterialClass"] = "Brick"
meta.loc[meta.Type=="NPV", "MaterialClass"] = "NPV"
meta.loc[(meta.Type=="NPV") & (meta.Class=="Lichen"), "MaterialClass"] = "EXCLUDED"
meta.loc[meta.Type=="PV", "MaterialClass"] = "PV"
meta.loc[meta.Type=="Rock", "MaterialClass"] = "Natural substrate"
meta.loc[meta.Type=="Soil", "MaterialClass"] = "Natural substrate"
meta.loc[meta.Class=="Frost", "MaterialClass"] = "EXCLUDED"
meta.loc[meta.Class=="Ice", "MaterialClass"] = "EXCLUDED"
meta.loc[meta.Class=="Snow", "MaterialClass"] = "EXCLUDED"
meta.loc[meta.Class=="Tap Water", "MaterialClass"] = "Water"
meta.loc[meta.Type=="Meteorites", "MaterialClass"] = "EXCLUDED"
meta.loc[meta.Type=="Mineral", "MaterialClass"] = "EXCLUDED"

# add SLI name: ("sli_name" and "index") will later be the composite key to retrieve endmember-specific information from the original tables
meta["sli_name"] = "ECOSpecLib"

# Filter rows that have values < 450 nm
meta = meta[(meta["First X Value"].astype(float) < 0.450) | (meta["Last X Value"].astype(float) < 0.450)]

# Collect spectral data from textfiles and resample to GAO
switch = 0 

for f in meta["Filename"]: # [meta_vis.Filename=='rock.igneous.felsic.solid.vswir.ward15.jpl.perkin.spectrum.txt']
    
    print(f)

    # read spectrum and wavelengths from file
    spectrum = pd.read_csv(folder+f, skiprows=20, sep='\t', header=None, index_col=0, encoding='latin-1', engine='python')
    spectrum = spectrum.T

    # get wavelengths and values
    wavelengths = spectrum.columns.values.astype(float)
    values = spectrum.iloc[0].values.astype(float)/100
    
    # reverse values and wavelengths in case wavelengths are inverted
    if wavelengths[0] > wavelengths[-1]:
        wavelengths = wavelengths[::-1]
        values = values[::-1]
        
    # resample to GAO
    band_resampler = BandResampler(wavelengths*1000, np.array(img.bands.centers))    
    new_values = band_resampler(values)
    
    # convert to dataframe
    spectrum = pd.DataFrame(new_values, index=np.array(img.bands.centers)).T
    spectrum["Filename"] = f
    
    if switch==0:
        spectra = spectrum.copy()
        switch = 1
    else:
        spectra = spectra.append(spectrum, ignore_index=True)
        
# Merge meta data with spectra
ecospeclib = meta.merge(spectra, left_on="Filename", right_on="Filename")

## UCSB

In [None]:
ucsb = pd.read_csv("path to urban-reflectance-spectra-from-santa-barbara--ca.csv")

meta = ucsb[ucsb.columns[:-1075]]
spectra = ucsb[ucsb.columns[-1075:]]/10000

MaterialClass = {'ANNUAL_FORB': 'PV', # herb
          'ASPHALT_GRAVEL': 'Asphalt',
          'ASPHALT_PARKING_LOT': 'Asphalt',
          'ASPHALT_ROAD': 'Asphalt', 
          'ASPHALT_ROOF': 'Asphalt', 
          'BARK': 'NPV', 
          'BOUGAINVILLEA': 'PV',
          'BRICK': 'Brick',
          'BRICK_SIDEWALK': 'Brick', 
          'COMP_SHINGLE_ROOF': 'Other man-made', 
          'CONCRETE_BRIDGE': 'Concrete',
          'CONCRETE_GRAVEL': 'Concrete', 
          'CONCRETE_PARKING_LOT': 'Concrete',
          'CONCRETE_ROAD': 'Concrete', 
          'CONCRETE_SIDEWALK': 'Concrete',
          'CONCRETE_TILE_ROOF': 'Concrete',
          'ENGLISH_IVY': 'PV', 
          'GLASS_ROOF': 'Other man-made', 
          'GRAVEL': 'Natural substrate',
          'GRAVEL_ROOF': 'Natural substrate', 
          'IRGR': 'PV', # grass
          'LICHEN': 'EXCLUDED', 
          'METAL_MANHOLE': 'Metal',
          'METAL_ROOF': 'Metal', 
          'MIRRA': 'PV',
          'NPV': 'NPV', 
          'PAINT': 'Other man-made', 
          'PAINTED_ROOF': 'Other man-made', 
          'PALM': 'PV', 
          'SAND': 'Natural substrate', 
          'SOIL': 'Natural substrate', 
          'STAR_JASMINE': 'PV', 
          'TILE_ROAD': 'Other man-made',
          'TILE_ROOF': 'Other man-made', 
          'WOOD': 'NPV',
          'WOOD_SHINGLE_ROOF': 'NPV'
          }

# add column "MaterialClass" and reclassify endmembers accordingly
meta.loc[:, "MaterialClass"] = meta["Level_7"].replace(MaterialClass)

# add SLI name: ("sli_name" and "index") will later be the composite key to retrieve endmember-specific information from the original tables
meta.assign(sli_name="UCSB")

# Get wavelengths and values
wavelengths = spectra.columns.values.astype(float)
values = spectra.values

# resample to GAO
band_resampler = BandResampler(centers1 = wavelengths, 
                               centers2 = np.array(img.bands.centers), 
                               fwhm1 = None, 
                               fwhm2 = img.bands.bandwidths)     
new_values = band_resampler(values.T)

spectra = pd.DataFrame(new_values, index = np.array(img.bands.centers)).T

ucsb_gao = meta.merge(spectra, how='left', left_index=True, right_index=True)

## SLUM

SLUM is provided as two csv files for shortwave and infrared spectra and a describing pdf containing relevant meta information. We collected this information in a file (LUMA_SLUM_META.csv) provided in the GitHub repository (https://github.com/CMLandOcean/MaterialFractionalCover/tree/main/spectral_libraries/SLUM).

In [None]:
# read meta data
meta = pd.read_csv("path to \\LUMA_SLUM_META.csv", index_col=0)

# add column "MaterialClass" and reclassify endmembers accordingly
meta.loc[:, "MaterialClass"] = ""
meta.loc[meta.Superclass=="Asphalt", "MaterialClass"] = "Asphalt"
meta.loc[(meta.Superclass=="Brick") & (meta.Class=="Cement brick"), "MaterialClass"] = "Concrete"
meta.loc[(meta.Superclass=="Brick") & (meta.Class=="Ceramic brick"), "MaterialClass"] = "Brick"
meta.loc[(meta.Superclass=="Brick") & (meta.Material=="Ceramic brick with cement"), "MaterialClass"] = "Other man-made"
meta.loc[(meta.Superclass=="Brick") & (meta.Material=="Ceramic brick with paint"), "MaterialClass"] = "Other man-made"
meta.loc[(meta.Superclass=="Brick") & (meta.Material=="Ceramic with cement"), "MaterialClass"] = "Other man-made"
meta.loc[(meta.Superclass=="Brick") & (meta.Material=="Ceramic with paint"), "MaterialClass"] = "Other man-made"
meta.loc[meta.Superclass=="Concrete/Cement", "MaterialClass"] = "Concrete"
meta.loc[meta.Class=="Granite", "MaterialClass"] = "Natural substrate"
meta.loc[(meta.Class=="Granite") & (meta.Material=="Granite with cement"), "MaterialClass"] = "Other man-made"
meta.loc[meta.Class=="Metal", "MaterialClass"] = "Metal"
meta.loc[(meta.Class=="Metal") & (meta.Material=="Metal with paint"), "MaterialClass"] = "Metal"
meta.loc[meta.Superclass=="PVC", "MaterialClass"] = "Plastic"
meta.loc[meta.Superclass=="Quartzite", "MaterialClass"] = "Natural substrate"
meta.loc[meta.Superclass=="Shingle", "MaterialClass"] = "Natural substrate"
meta.loc[(meta.Superclass=="Shingle") & (meta.Material=="Fiber cement"), "MaterialClass"] = "Concrete"
meta.loc[meta.Superclass=="Stone", "MaterialClass"] = "Natural substrate"
meta.loc[(meta.Superclass=="Tile") & (meta.Material=="Cement"), "MaterialClass"] = "Concrete"
meta.loc[(meta.Superclass=="Tile") & (meta.Material=="Ceramic"), "MaterialClass"] = "Brick"

# add SLI name: ("sli_name" and "index") will later be the composite key to retrieve endmember-specific information from the original tables
meta.loc[:, "sli_name"] = "SLUM"

# read spectra
spectra = pd.read_csv("path to \\LUMA_SLUM_SW.csv", index_col=0).T

# get wavelengths, values, and ID 
wavelengths = spectra.columns.values.astype(float)
values = spectra.values
idx = spectra.index

# resample to GAO
band_resampler = BandResampler(centers1 = wavelengths, 
                               centers2 = np.array(img.bands.centers), 
                               fwhm1 = None, 
                               fwhm2 = img.bands.bandwidths)                                
new_values = band_resampler(values.T)
spectra = (pd.DataFrame(new_values, index = np.array(img.bands.centers)).T).set_index(idx)/100

# merge meta with spectra and write to file
slum_gao = meta.merge(spectra, how='left', left_index=True, right_index=True).reset_index()

## PHX

### GAO20220716t191531p0000_iacorn_refl_ort

In [None]:
phx_gao = pd.read_csv("path to \\GAO20220716t191531p0000_iacorn_refl_ort_sli.csv")

# clean up DataFrame
phx_gao['name'] = phx_gao['name'].apply(lambda x: x[:-2])
phx_gao = phx_gao.drop(columns=['source'])
phx_gao.insert(1, "sli_name", "GAO20220716t191531p0000_iacorn_refl_ort")

# add column "MaterialClass" and reclassify endmembers accordingly
phx_gao.loc[:, "MaterialClass"] = ""
phx_gao.loc[phx_gao.name=="pool1", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool2", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool3", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool4", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool5", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool6", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool7", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool8", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool9", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool10", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool11", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool12", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool13", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool14", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool15", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="pool16", "MaterialClass"] = "Water"
phx_gao.loc[phx_gao.name=="blue_tarp", "MaterialClass"] = "Plastic"
phx_gao.loc[phx_gao.name=="yellow_tarp", "MaterialClass"] = "Plastic"
phx_gao.loc[phx_gao.name=="light_blue_tarp", "MaterialClass"] = "Plastic"
phx_gao.loc[phx_gao.name=="reddish_gravel", "MaterialClass"] = "Natural substrate"
phx_gao.loc[phx_gao.name=="playground_sand", "MaterialClass"] = "Natural substrate"
phx_gao.loc[phx_gao.name=="red_shipping_container", "MaterialClass"] = "Metal"
phx_gao.loc[phx_gao.name=="grey_bare_metal_roof", "MaterialClass"] = "Metal"
phx_gao.loc[phx_gao.name=="white_flat_roof", "MaterialClass"] = "Other man-made"
phx_gao.loc[phx_gao.name=="yellow_painted_metal_roof", "MaterialClass"] = "Metal"
phx_gao.loc[phx_gao.name=="terracotta_shingle", "MaterialClass"] = "Brick"
phx_gao.loc[phx_gao.name=="grey_bare_metal_roof", "MaterialClass"] = "Metal"
phx_gao.loc[phx_gao.name=="metal_roof", "MaterialClass"] = "Metal"
phx_gao.loc[phx_gao.name=="red_brown_metal_roof", "MaterialClass"] = "Metal"
phx_gao.loc[phx_gao.name=="baseball_infield_clay", "MaterialClass"] = "Natural substrate"
phx_gao.loc[phx_gao.name=="concrete1", "MaterialClass"] = "Concrete"
phx_gao.loc[phx_gao.name=="concrete2", "MaterialClass"] = "Concrete"
phx_gao.loc[phx_gao.name=="asphalt1", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt2", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt3", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt4", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt5", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt6", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt7", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt8", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt9", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt10", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt11", "MaterialClass"] = "Asphalt"
phx_gao.loc[phx_gao.name=="asphalt12", "MaterialClass"] = "Asphalt"

# take mean spectrum as endmember
phx_gao_mean = (phx_gao.groupby(["name","MaterialClass","sli_name"]).mean()/10000).reset_index()

# rename cols
phx_gao_mean.columns = np.append(phx_gao_mean.columns[:3].values, np.array(img.bands.centers))

# Combination & Preparation

## for GAO

In [None]:
# reset_index results in a new col "level_0" that has the original indices from the original tables
combined_sli = pd.concat([slum_gao, ucsb_gao, ecospeclib, phx_gao_mean]).reset_index()
combined_sli = combined_sli.rename(columns={"index": "original_index"})

# get rid of rows with only NaN reflectance
combined_sli = combined_sli[~combined_sli[np.array(img.bands.centers)].isnull().all(axis=1)]

# get rid of Minerals and EXCLUDED EMs
combined_sli = combined_sli[(combined_sli.Type != "Mineral") & (combined_sli.MaterialClass != "EXCLUDED")]

# # get rid of bands that have any NaNs
# combined_sli = combined_sli[combined_sli.columns[~np.isin(combined_sli.columns.values, combined_sli.columns[10:428+10][combined_sli[combined_sli.columns[10:428+10]].isna().any(axis=0)].values)]]

# sort cols so that spectra are the last 428 cols
combined_sli = combined_sli[combined_sli.columns[:10].to_list() + combined_sli.columns[10+428:].to_list() + combined_sli.columns[10:10+428].to_list()]
combined_sli

combined_sli.to_csv("path to \\GAO_SLI.csv")

## for Tanager

In [None]:
# access Tanagerized data
img = envi.open("path to ENVI image containing relevant band settings")

# get combined GAO SLI
gao_sli = pd.read_csv("path to \\GAO_SLI.csv")

# resample GAO SLI to Tanager band settings
band_resampler = BandResampler(centers1 = np.array(gao_sli[gao_sli.columns[-428:]].columns.astype(float)), 
                               centers2 = np.array(img.bands.centers), 
                               fwhm1 = None, 
                               fwhm2 = img.bands.bandwidths) 

spectra = pd.DataFrame(band_resampler(gao_sli[gao_sli.columns[-428:]].T), index=np.array(img.bands.centers)).T

# combine Tanagerized spectra with meta data
combined_sli = gao_sli[gao_sli.columns[:-428]].merge(spectra, how='left', left_index=True, right_index=True)
combined_sli

# save to disk
combined_sli.to_csv("path to \\Tanager_SLI.csv")