In [49]:
# Set paths
gpkg_path   = "data/silvertree/occurrence.gpkg"                                     # GBIF/iNat geopackage
cube_path   = "data/AVIRIS_L3/ang20231126t084422_002_L2A_OE_main_27577724_RFL_ORT"  # data file (no .hdr)
hdr_path    = cube_path + ".hdr"                                                    # companion header

In [50]:
# Import python modules
import re
from os import path
import geopandas as gpd
import s3fs
import pandas as pd
import xarray as xr
from shapely.geometry import box, mapping, Point
import rioxarray as riox
import numpy as np
import netCDF4 as nc
import hvplot.xarray
import holoviews as hv
import xvec
import shap
import matplotlib.pyplot as plt
from dask.diagnostics import ProgressBar
import warnings
import rasterio
import hvplot.pandas
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import xgboost as xgb

warnings.filterwarnings('ignore')
hvplot.extension('bokeh')
hv.extension('bokeh')

In [56]:
# Header parser: wavelengths, FWHM, BBL, nodata 
def parse_envi_header(hdr_file: str):
    """Return {'wavelengths': np.ndarray[nm], 'bbl_mask': np.ndarray[bool] or None, 'nodata': float|None}."""
    with open(hdr_file, 'r', encoding='utf-8', errors='ignore') as f:
        txt = f.read()

    def _list(key):
        m = re.search(rf'{key}\s*=\s*\{{([^}}]*)\}}', txt, flags=re.IGNORECASE | re.DOTALL)
        if not m:
            return None
        vals = [v for v in re.split(r'[,\s]+', m.group(1).strip()) if v]
        return np.array([float(v) for v in vals], dtype=float)

    wl = _list('wavelength')
    if wl is None:
        raise ValueError("Header has no 'wavelength' entry.")

    # units → nm
    um = re.search(r'wavelength\s+units\s*=\s*([^\n\r]+)', txt, re.IGNORECASE)
    units = um.group(1).strip().strip('{}').lower() if um else 'nanometers'
    if ('micro' in units) or ('µm' in units) or ('microm' in units):
        wl = wl * 1000.0

    # bbl (1=good, 0=bad) → boolean; guard against inverted conventions
    bbl = _list('bbl')
    bbl_mask = None
    if bbl is not None:
        bbl_mask = (bbl.astype(int) == 1)
        if bbl_mask.mean() < 0.1:
            bbl_mask = ~bbl_mask

    # nodata
    nd = re.search(r'data\s+ignore\s+value\s*=\s*([-\d\.Ee+]+)', txt, re.IGNORECASE)
    nodata = float(nd.group(1)) if nd else None

    return {'wavelengths': wl, 'bbl_mask': bbl_mask, 'nodata': nodata}

In [57]:
# 1. Parse the header for wavelengths, nodata, and BBL
hdr = parse_envi_header(hdr_path)
wavelengths = hdr["wavelengths"]
hdr_bbl     = hdr["bbl_mask"]
hdr_nodata  = hdr["nodata"]

In [58]:
# 2. Load and reproject your silvertree points
gdf = gpd.read_file(gpkg_path)
with rasterio.open(cube_path) as src:
    raster_crs = src.crs
gdf = gdf.to_crs(raster_crs)

In [59]:
# 3. Find the flightline footprint
with rasterio.open(cube_path) as src:
    xmin, ymin, xmax, ymax = src.bounds
footprint = box(xmin, ymin, xmax, ymax)

In [60]:
# 4. Filter to points inside the footprint
gdf = gdf[gdf.geometry.within(footprint)]
assert len(gdf) > 0, "No silvertree occurrences in this flightline."

In [61]:
# 5. Generate random background points (same count as positives)
#    We buffer the silvertree points by ~15 m (3 pixels) to avoid overlap
buffered_silvertree = gdf.buffer(15)
background_points = []
rng = np.random.default_rng(seed=42)
while len(background_points) < len(gdf):
    rand_x = rng.uniform(xmin, xmax)
    rand_y = rng.uniform(ymin, ymax)
    pt = Point(rand_x, rand_y)
    if footprint.contains(pt) and not buffered_silvertree.geometry.intersects(pt).any():
        background_points.append(pt)

In [62]:
# 6. Sample spectra at both silvertree and background points
coords_positive  = [(p.x, p.y) for p in gdf.geometry]
coords_negative  = [(p.x, p.y) for p in background_points]
all_coords       = coords_positive + coords_negative
labels           = [1]*len(coords_positive) + [0]*len(coords_negative)

with rasterio.open(cube_path) as src:
    spectra = np.vstack(list(src.sample(all_coords))).astype("float32")
    nodata  = src.nodata if src.nodata is not None else (hdr_nodata if hdr_nodata is not None else np.nan)
    if np.isfinite(nodata):
        spectra[spectra == nodata] = np.nan

In [63]:
# 7. Clean the spectra: remove other sentinels and implausible values
for sentinel in (-32768, -32767, -1e20, 1e20):
    spectra[spectra == sentinel] = np.nan
# mask reflectances < −0.05 or > 1.5
spectra[(spectra < -0.05) | (spectra > 1.5)] = np.nan

In [64]:
# 8. Apply your bad‑band mask (manual + header)
manual_bbl = np.ones_like(wavelengths, dtype=bool)
manual_bbl[197:206] = False
manual_bbl[285:318] = False
good_bands = manual_bbl & (hdr_bbl if hdr_bbl is not None else manual_bbl)
spectra[:, ~good_bands] = np.nan
wavelengths_good = wavelengths[good_bands]

In [65]:
# 9. Drop any rows with too many missing values (or fill with bandwise means)
mask_valid = np.isfinite(spectra).sum(axis=1) >= int(0.9 * len(wavelengths_good))
X = spectra[mask_valid][:, good_bands]
y = np.array(labels)[mask_valid]
# Simple imputation: fill any NaNs with the bandwise mean
band_means = np.nanmean(X, axis=0)
inds       = np.where(np.isnan(X))
X[inds]    = np.take(band_means, inds[1])

In [66]:
# 10. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

In [67]:
# 11. Hyperparameter tuning with XGBoost
param_grid = {
    "learning_rate": [0.05, 0.1],
    "max_depth": [5, 10],
    "n_estimators": [100, 300],
}
xgb_model  = xgb.XGBClassifier(
    objective="binary:logistic",
    eval_metric="logloss",
    tree_method="hist",
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
)
grid_search = GridSearchCV(
    xgb_model,
    param_grid,
    cv=5,
    scoring="f1",
    n_jobs=-1,
    verbose=0,
)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

In [68]:
# 12. Evaluate on the test set
y_pred   = best_model.predict(X_test)
report   = classification_report(y_test, y_pred, target_names=["background","silvertree"])
print(report)

              precision    recall  f1-score   support

  background       0.53      0.50      0.52        50
  silvertree       0.64      0.67      0.66        67

    accuracy                           0.60       117
   macro avg       0.59      0.59      0.59       117
weighted avg       0.60      0.60      0.60       117



In [None]:
# What it basically tells me is that the model i ran can accurately pick up L.argentem 60% of the time from the cube I provieded it. 
# Although it is over 50%, 60% is still not a good enough percentage. What I should rather do is check the sensor directly to see how accurate it is 
# in picking up L.argenteum from the cube. I will do it without building a model.

In [None]:
SENSOR ACCURACY CHECK ----->

In [69]:
import numpy as np
import geopandas as gpd
import rasterio
from shapely.geometry import box
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, roc_curve
from scipy.signal import savgol_filter

In [70]:
# --- helpers built for your current objects -----------------------------------

def nearest_band_index(wavelengths, target_nm):
    return int(np.argmin(np.abs(wavelengths - target_nm)))

def sample_window_mean(src, x, y, half=2, good_mask=None, nir_idx=None, red_idx=None, ndvi_min=0.2):
    """Return mean spectrum in a (2*half+1)^2 window around (x,y), masking low-NDVI pixels."""
    row, col = src.index(x, y)
    w = rasterio.windows.Window(col-half, row-half, 2*half+1, 2*half+1)
    # read all bands in the window -> (bands, h, w)
    cube_win = src.read(indexes=None, window=w).astype('float32')
    if cube_win.size == 0:
        return None
    # NDVI mask to de-emphasise soil/shade
    if nir_idx is not None and red_idx is not None:
        nir = cube_win[nir_idx]
        red = cube_win[red_idx]
        ndvi = (nir - red) / (nir + red + 1e-6)
        mask = (ndvi >= ndvi_min) & np.isfinite(ndvi)
    else:
        mask = np.ones_like(cube_win[0], dtype=bool)
    # mean over valid pixels; if none valid, fall back to plain mean
    if mask.sum() == 0:
        spec = np.nanmean(cube_win.reshape(cube_win.shape[0], -1), axis=1)
    else:
        spec = np.nanmean(cube_win[:, mask], axis=1)
    return spec

def brightness_normalise(spec):
    norm = np.linalg.norm(spec[np.isfinite(spec)])
    return spec / (norm + 1e-9)

def first_derivative(spec, window=11, poly=2):
    s = np.copy(spec)
    ok = np.isfinite(s)
    s[~ok] = np.nanmean(s[ok]) if ok.any() else 0.0
    d = savgol_filter(s, window_length=window, polyorder=poly, deriv=1, mode='interp')
    return d

def ace_scores(X, target, C_inv):
    # ACE: ((x^T C^-1 t)^2) / ((t^T C^-1 t)(x^T C^-1 x))
    xt = X @ (C_inv @ target)
    tt = target @ (C_inv @ target)
    xx = np.einsum('ij,ij->i', X @ C_inv, X)
    return (xt**2) / (tt * xx + 1e-12)

In [71]:
# --- build training and test sets of spectra ----------------------------------

# 1) open raster; get nodata; choose NIR/Red band indices for NDVI
with rasterio.open(cube_path) as src:
    rio_nodata = src.nodata
    wavelengths = np.array(cube.coords['band'].values)  # your band coordinate is wavelengths
    nir_idx = nearest_band_index(wavelengths, 840)      # AVIRIS-NG closest to 842 nm
    red_idx = nearest_band_index(wavelengths, 660)

In [72]:
# 2) define good bands: my manual + header BBL
manual_bbl = np.ones_like(wavelengths, dtype=bool)
manual_bbl[197:206] = False
manual_bbl[285:318] = False
good_bands = manual_bbl & (hdr_bbl if hdr_bbl is not None else manual_bbl)
wl_good = wavelengths[good_bands]

In [73]:
# 3) build crown proxies ( Iwill replace this with actual crown measurements when I do field work)
occ = gdf[gdf.geometry.within(box(*rasterio.open(cube_path).bounds))]
occ_buf = occ.buffer(3.0)  # ~5×5 pixels at 4.8 m if half=2 below

In [74]:
# 4) sample positive spectra (window mean + cleaning + features)
pos_specs = []
with rasterio.open(cube_path) as src:
    for geom in occ_buf.centroid:  # centroid sampling; for true polygons, we’d tile
        spec = sample_window_mean(src, geom.x, geom.y, half=2,
                                  nir_idx=nir_idx, red_idx=red_idx, ndvi_min=0.2)
        if spec is None: 
            continue
        # nodata and range cleaning
        if rio_nodata is not None and np.isfinite(rio_nodata):
            spec[spec == rio_nodata] = np.nan
        spec[(spec < -0.05) | (spec > 1.5)] = np.nan
        spec = spec[good_bands]
        if np.isfinite(spec).sum() < 0.9 * spec.size:
            continue
        # modest pre-processing to stabilise detector
        spec = brightness_normalise(spec)
        spec = first_derivative(spec, window=11, poly=2)
        pos_specs.append(spec)
pos_specs = np.array(pos_specs)
assert len(pos_specs) >= 20, "Too few positive spectra; expand buffers or add crowns."

In [75]:
# 5) sample matched background spectra away from positives
from shapely.strtree import STRtree
tree = STRtree(list(occ_buf.geometry))
footprint = box(*rasterio.open(cube_path).bounds)
rng = np.random.default_rng(42)
neg_specs = []
with rasterio.open(cube_path) as src:
    while len(neg_specs) < len(pos_specs):
        x = rng.uniform(footprint.bounds[0], footprint.bounds[2])
        y = rng.uniform(footprint.bounds[1], footprint.bounds[3])
        p = box(x, y, x, y).centroid
        # reject if near positives
        if tree.query(p.buffer(15)).size > 0: 
            continue
        spec = sample_window_mean(src, x, y, half=2, nir_idx=nir_idx, red_idx=red_idx, ndvi_min=0.2)
        if spec is None: 
            continue
        if rio_nodata is not None and np.isfinite(rio_nodata):
            spec[spec == rio_nodata] = np.nan
        spec[(spec < -0.05) | (spec > 1.5)] = np.nan
        spec = spec[good_bands]
        if np.isfinite(spec).sum() < 0.9 * spec.size:
            continue
        spec = brightness_normalise(spec)
        spec = first_derivative(spec, window=11, poly=2)
        neg_specs.append(spec)
neg_specs = np.array(neg_specs)

In [76]:
# --- fit detector and evaluate ------------------------------------------------

# target signature = median positive spectrum; background covariance from negatives
target = np.nanmedian(pos_specs, axis=0)
mu_b   = np.nanmean(neg_specs, axis=0)
X_b    = np.where(np.isfinite(neg_specs), neg_specs, mu_b)
C_b    = np.cov(X_b.T) + 1e-6 * np.eye(X_b.shape[1])  # small ridge for stability
Cinv   = np.linalg.pinv(C_b)

In [77]:
# scores on held-out sets (simple split here; use spatial CV in practice)
n = len(pos_specs)
split = int(0.7*n)
X_pos_train, X_pos_test = pos_specs[:split], pos_specs[split:]
m = len(neg_specs)
split_n = int(0.7*m)
X_neg_train, X_neg_test = neg_specs[:split_n], neg_specs[split_n:]

In [78]:
# recompute target on training only (no peeking)
target = np.nanmedian(X_pos_train, axis=0)
C_b    = np.cov(np.where(np.isfinite(X_neg_train), X_neg_train, mu_b).T) + 1e-6*np.eye(X_b.shape[1])
Cinv   = np.linalg.pinv(C_b)

scores_pos = ace_scores(X_pos_test, target, Cinv)
scores_neg = ace_scores(X_neg_test, target, Cinv)

y_true  = np.r_[np.ones_like(scores_pos), np.zeros_like(scores_neg)]
y_score = np.r_[scores_pos, scores_neg]

roc_auc = roc_auc_score(y_true, y_score)
ap      = average_precision_score(y_true, y_score)
print(f"ACE ROC-AUC: {roc_auc:.3f}   Average Precision: {ap:.3f}")

ACE ROC-AUC: 0.612   Average Precision: 0.626


In [None]:
# It seems as though it is no different at picking up L.argenteum as with the model I had made. But because this is just one cube, I am hoping that 
# adding more cubes will help bring in new examples of silvertree crowns and background types. Bringing in spectra from neighbouring flightlines 
# will give me more variation in illumination, canopy orientation and surrounding vegetation. That additional variation can make the target 
# signature more representative and the clutter model more robust.

In [79]:
# Import modules
import numpy as np
import geopandas as gpd
import rasterio
from shapely.geometry import box, Point
from shapely.strtree import STRtree
from sklearn.metrics import roc_auc_score, average_precision_score
from scipy.signal import savgol_filter
from rasterio.windows import Window
import os

In [81]:
# --- Utility functions --------------------------------------------------------

def parse_envi_header(hdr_file):
    """Parse wavelengths, BBL and nodata from an ENVI header."""
    import re
    with open(hdr_file, 'r', encoding='utf-8', errors='ignore') as f:
        txt = f.read()
    def _list(key):
        m = re.search(rf'{key}\s*=\s*\{{([^}}]*)\}}', txt, flags=re.IGNORECASE|re.DOTALL)
        if not m: return None
        vals = [v for v in re.split(r'[,\s]+', m.group(1).strip()) if v]
        return np.array([float(v) for v in vals], dtype=float)
    wl = _list('wavelength')
    units = re.search(r'wavelength\s+units\s*=\s*([^\n\r]+)', txt, re.IGNORECASE)
    units = units.group(1).strip().strip('{}').lower() if units else 'nanometers'
    if 'micro' in units or 'µm' in units:
        wl = wl * 1000.0
    bbl = _list('bbl')
    bbl_mask = None
    if bbl is not None:
        bbl_mask = (bbl.astype(int) == 1)
        if bbl_mask.mean() < 0.1:
            bbl_mask = ~bbl_mask
    nd = re.search(r'data\s+ignore\s+value\s*=\s*([-\d\.Ee+]+)', txt, re.IGNORECASE)
    nodata = float(nd.group(1)) if nd else None
    return {'wavelengths': wl, 'bbl_mask': bbl_mask, 'nodata': nodata}

def nearest_band_index(wavelengths, target_nm):
    return int(np.argmin(np.abs(wavelengths - target_nm)))

def sample_window_mean(src, x, y, half=2, nir_idx=None, red_idx=None, ndvi_min=0.2):
    """Return mean spectrum in a (2*half+1)^2 window around (x,y), masking low-NDVI pixels."""
    row, col = src.index(x, y)
    w = Window(col - half, row - half, 2*half+1, 2*half+1)
    cube_win = src.read(indexes=None, window=w).astype('float32')  # shape: bands × h × w
    if cube_win.size == 0:
        return None
    h, w_ = cube_win.shape[1], cube_win.shape[2]
    # NDVI mask to downweight soil/shade
    if nir_idx is not None and red_idx is not None:
        nir = cube_win[nir_idx]
        red = cube_win[red_idx]
        ndvi = (nir - red) / (nir + red + 1e-9)
        mask = (ndvi >= ndvi_min) & np.isfinite(ndvi)
    else:
        mask = np.ones_like(cube_win[0], dtype=bool)
    # mean across valid pixels
    if mask.sum() == 0:
        spec = np.nanmean(cube_win.reshape(cube_win.shape[0], -1), axis=1)
    else:
        spec = np.nanmean(cube_win[:, mask], axis=1)
    return spec

def brightness_normalise(spec):
    norm = np.linalg.norm(spec[np.isfinite(spec)])
    return spec / (norm + 1e-9)

def first_derivative(spec, window=11, poly=2):
    s = np.copy(spec)
    ok = np.isfinite(s)
    s[~ok] = np.nanmean(s[ok]) if ok.any() else 0.0
    d = savgol_filter(s, window_length=window, polyorder=poly, deriv=1, mode='interp')
    return d

def ace_scores(X, target, C_inv):
    """Compute ACE scores for each row in X given target and inverse covariance."""
    xt = X @ (C_inv @ target)
    tt = target @ (C_inv @ target)
    xx = np.einsum('ij,ij->i', X @ C_inv, X)
    return (xt**2) / (tt * xx + 1e-12)

In [88]:
# --- Paths to your data -------------------------------------------------------

cube_paths = [
    "data/ALL/ang20231126t084422_000_L2A_OE_main_27577724_RFL_ORT",
    "data/ALL/ang20231126t084422_001_L2A_OE_main_27577724_RFL_ORT",
    "data/ALL/ang20231126t084422_002_L2A_OE_main_27577724_RFL_ORT",
    "data/ALL/ang20231126t084422_003_L2A_OE_main_27577724_RFL_ORT",
    "data/ALL/ang20231126t084422_004_L2A_OE_main_27577724_RFL_ORT",
    "data/ALL/ang20231126t084422_005_L2A_OE_main_27577724_RFL_ORT",
    "data/ALL/ang20231126t084422_006_L2A_OE_main_27577724_RFL_ORT",
    "data/ALL/ang20231126t084422_007_L2A_OE_main_27577724_RFL_ORT",
]
gpkg_path  = "data/silvertree/occurrence.gpkg"
buffer_m   = 3.0  # radius (m) to create small buffers for positive samples
random_seed = 42

In [82]:
# --- Load occurrence points once ---------------------------------------------

gdf_global = gpd.read_file(gpkg_path)

In [83]:
# --- Containers for combined spectra -----------------------------------------

pos_specs_all = []
neg_specs_all = []
global_wavelengths = None
global_bbl = None

In [89]:
# --- Loop over cubes to extract spectra ---------------------------------------

for cube_path in cube_paths:
    hdr = parse_envi_header(cube_path + ".hdr")
    wl  = hdr['wavelengths']
    bbl = hdr['bbl_mask']
    nd  = hdr['nodata']

    # Use each cube’s own BBL to define its "good bands"
    if bbl is None:
        print(f"{cube_path}: No BBL found; skipping")
        continue
    # Determine "good bands" by this cube’s BBL alone
    good_bands_cube = bbl
    # Keep track of global wavelengths and intersection of BBLs
    if global_wavelengths is None:
        global_wavelengths = wl.copy()
        global_bbl = bbl.copy()
    else:
        if not np.allclose(wl, global_wavelengths):
            print(f"{cube_path}: Wavelengths differ from previous cubes; skipping")
            continue
        global_bbl = global_bbl & bbl

    # Open cube and reproject points
    with rasterio.open(cube_path) as src:
        raster_crs = src.crs
        gdf = gdf_global.to_crs(raster_crs)
        xmin, ymin, xmax, ymax = src.bounds
        foot = box(xmin, ymin, xmax, ymax)
        gdf = gdf[gdf.geometry.within(foot)]
        if len(gdf) == 0:
            print(f"{cube_path}: No silvertrees in this cube")
            continue

        # Sample positive spectra
        pos_cube = []
        for geom in gdf.buffer(buffer_m).centroid:
            spec = sample_window_mean(
                src, geom.x, geom.y, half=2,
                nir_idx=nearest_band_index(wl, 840),
                red_idx=nearest_band_index(wl, 660),
                ndvi_min=0.2,
            )
            if spec is None: continue
            # Remove nodata and outliers
            if nd is not None and np.isfinite(nd):
                spec[spec == nd] = np.nan
            spec[(spec < -0.05) | (spec > 1.5)] = np.nan
            spec = spec[good_bands_cube]
            if np.isfinite(spec).sum() < 0.9 * spec.size:
                continue
            # Preprocess
            spec = brightness_normalise(spec)
            spec = first_derivative(spec, window=11, poly=2)
            pos_cube.append(spec)
        pos_specs_all.extend(pos_cube)

        # Build background sampler
        tree = STRtree(list(gdf.buffer(10.0).geometry))
        rng = np.random.default_rng(seed=random_seed)
        neg_cube = []
        while len(neg_cube) < len(pos_cube):
            x = rng.uniform(xmin, xmax)
            y = rng.uniform(ymin, ymax)
            pt = Point(x, y)
            if not foot.contains(pt) or tree.query(pt).size > 0:
                continue
            spec = sample_window_mean(
                src, x, y, half=2,
                nir_idx=nearest_band_index(wl, 840),
                red_idx=nearest_band_index(wl, 660),
                ndvi_min=0.2,
            )
            if spec is None: continue
            if nd is not None and np.isfinite(nd):
                spec[spec == nd] = np.nan
            spec[(spec < -0.05) | (spec > 1.5)] = np.nan
            spec = spec[good_bands_cube]
            if np.isfinite(spec).sum() < 0.9 * spec.size:
                continue
            spec = brightness_normalise(spec)
            spec = first_derivative(spec, window=11, poly=2)
            neg_cube.append(spec)
        neg_specs_all.extend(neg_cube)

print(f"Collected {len(pos_specs_all)} positive spectra and {len(neg_specs_all)} negative spectra")

data/ALL/ang20231126t084422_005_L2A_OE_main_27577724_RFL_ORT: No silvertrees in this cube
data/ALL/ang20231126t084422_006_L2A_OE_main_27577724_RFL_ORT: No silvertrees in this cube
Collected 297 positive spectra and 297 negative spectra


In [90]:
# --- Compute combined detector ------------------------------------------------

pos_specs_all = np.array(pos_specs_all)
neg_specs_all = np.array(neg_specs_all)
if len(pos_specs_all) == 0 or len(neg_specs_all) == 0:
    raise RuntimeError("No spectra collected; adjust buffer or file paths")

# Global good bands: intersection across all cubes
good_bands_global = global_bbl
wl_good           = global_wavelengths[good_bands_global]

# Keep only those bands in all spectra (they already are trimmed to each cube's bbl, but we align them)
# In this simplified approach, we assume the trimming by each cube's BBL gives the same number/order of bands.
# If not, you’d need to resample or pad to match across cubes.
X_pos = pos_specs_all
X_neg = neg_specs_all

# Target signature and background covariance
target = np.nanmedian(X_pos, axis=0)
mu_b   = np.nanmean(X_neg, axis=0)
X_neg  = np.where(np.isfinite(X_neg), X_neg, mu_b)
C_b    = np.cov(X_neg.T) + 1e-6 * np.eye(X_neg.shape[1])
C_inv  = np.linalg.pinv(C_b)

# Evaluate on held-out split
split_pos = int(0.7 * len(X_pos))
split_neg = int(0.7 * len(X_neg))
scores_pos = ace_scores(X_pos[split_pos:], target, C_inv)
scores_neg = ace_scores(X_neg[split_neg:], target, C_inv)
y_true  = np.r_[np.ones_like(scores_pos), np.zeros_like(scores_neg)]
y_score = np.r_[scores_pos, scores_neg]
roc_auc = roc_auc_score(y_true, y_score)
ap      = average_precision_score(y_true, y_score)
print(f"Combined ACE ROC‑AUC: {roc_auc:.3f}   Average Precision: {ap:.3f}")

Combined ACE ROC‑AUC: 0.698   Average Precision: 0.638


In [None]:
# So there was a slight increase. It puts it relatively close to 0.7, which is okay, but still not good enough. At this point I just have to go 
# to the field and take proper measurements to see the actual accuracy. Till then, 64% precision is all I have lol. 