In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import normalize
from scipy.optimize import nnls
import itertools

In [9]:
ref_df = pd.read_csv("reference_siamese.csv")
query_df = pd.read_csv("query_only_mixed.csv")

# --- Extract Labels ---
query_labels = query_df[["Label 1", "Label 2"]].values
query_df = query_df.drop(columns=["Label 1", "Label 2"])

In [10]:
def baseline_AsLS(y, lam=1e4, p=0.01, niter=10):
    L = len(y)
    D = np.diff(np.eye(L), 2)
    D = lam * D.dot(D.T)
    w = np.ones(L)
    for _ in range(niter):
        b = np.linalg.solve(np.diag(w) + D, w * y)
        w = p * (y > b) + (1 - p) * (y < b)
    return b

def preprocess(arr, lam=1e4, p=0.01, niter=10):
    out = np.zeros_like(arr)
    for i, spec in enumerate(arr):
        bkg = baseline_AsLS(spec, lam=lam, p=p, niter=niter)
        corr = spec - bkg
        nrm = np.linalg.norm(corr)
        normed = corr / nrm if nrm else corr
        out[i] = np.abs(normed)
    return out

In [11]:
ref_spectra = ref_df.drop(columns=["Label"]).values
ref_labels = ref_df["Label"].values
query_spectra = query_df.values

ref_proc = preprocess(ref_spectra)
query_proc = preprocess(query_spectra)

In [15]:
label_to_spec = dict(zip(ref_labels, ref_proc))

# --- Perform linear unmixing via NNLS for each query spectrum ---
predictions = []
for q_spec in query_proc:
    best_error = np.inf
    best_pair = (None, None)
    best_weights = (0, 0)
    for label1, label2 in itertools.combinations(ref_labels, 2):
        A = np.vstack([label_to_spec[label1], label_to_spec[label2]]).T
        weights, _ = nnls(A, q_spec)
        recon = A @ weights
        error = np.linalg.norm(q_spec - recon)
        if error < best_error:
            best_error = error
            best_pair = (label1, label2)
            best_weights = weights
    predictions.append(best_pair)

# --- Evaluate accuracy ---
correct_1 = sum(set(pred) & set(true) != set() for pred, true in zip(predictions, query_labels))

accuracy_top1 = correct_1 / len(query_labels)


accuracy_top1

0.16666666666666666