Goal: Identify best k using reconstruction error and stability metrics.

In [None]:
import os

# change working directory to project-3 root
if os.getcwd().split('/')[-1] != 'project-3':
    os.chdir('../../../')

from src.models.nmf_runner import NMFDecomposer
import numpy as np
import joblib
import matplotlib.pyplot as plt

In [None]:
matrix = joblib.load("data/processed/mutation_matrix.pkl")
X = matrix['X']

In [None]:
ks = range(2, 15)
recon_errors = []
stabilities = []

# loop over k
for k in ks:
    nmf = NMFDecomposer(n_components=k, objective_function="frobenius")
    W, H = nmf.fit(X)
    error = np.linalg.norm(X - W @ H, 'fro')
    stab = np.mean(nmf.get_stability(W))  # or std
    recon_errors.append(error)
    stabilities.append(stab)

# make elbow plot

fig, ax1 = plt.subplots()
ax1.plot(ks, recon_errors, label="Reconstruction Error")
ax1.set_ylabel("Reconstruction Error")

ax2 = ax1.twinx()
ax2.plot(ks, stabilities, label="Stability", color="orange")
ax2.set_ylabel("Stability")

plt.title("NMF Component Selection")
plt.xlabel("Number of Components (k)")
plt.show()
