Goal: Evaluate impact of initialization, normalization method, and objective function on reconstruction error and stability.

In [None]:
import os

# change working directory to project-3 root
if os.getcwd().split('/')[-1] != 'project-3':
    os.chdir('../../../')

from src.models.nmf_runner import NMFDecomposer
import numpy as np
import pandas as pd
import joblib

In [None]:
# load mutation matrix
matrix = joblib.load("data/processed/mutation_matrix.pkl")
X = matrix['X']

In [None]:
init_methods = ['random', 'nndsvd']
norms = ['row-wise', 'column-wise', 'z-score']
objectives = ['frobenius', 'kullback-leibler']
results = []
# loop through different initializations, normalization methods, and objectives
for init in init_methods:
    for norm in norms:
        for obj in objectives:
            # Preprocess with this normalization
            # You may need to rerun normalization if norm affects input
            nmf = NMFDecomposer(n_components=5, objective_function=obj)
            W, H = nmf.fit(X)
            stability = nmf.get_stability(W)
            recon_error = np.linalg.norm(X - W @ H, 'fro')  # or use appropriate loss

            results.append((init, norm, obj, recon_error, stability))

# plot results
df_results = pd.DataFrame(results, columns=["init", "norm", "obj", "recon_error", "stability"])
display(df_results.sort_values("recon_error"))
