# Import

In [1]:
from poisson_and_normalize import load_matrix, one_iteration
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
import os

# Load Files

In [3]:
M = load_matrix("mutation_matrix.csv")

# Preprocess and simulation

In [4]:
total_iter = 100
iters = [one_iteration(M, seed=i) for i in range(100)]

# NMF

In [63]:
random_state_num = 0
for K in range(8, 26):
    for i in range(total_iter):
        input_temp = iters[i]["gmm"]
        V = input_temp.values
        V = np.divide(V, V.sum(axis=0, keepdims=True), where=V.sum(axis=0, keepdims=True) != 0)
        V[V < 0] = 0  # clip all negatives to 0

        nmf_model = NMF(n_components=K, init='nndsvdar', solver='mu',
                        beta_loss='kullback-leibler', max_iter=10000, random_state=random_state_num)
        W = nmf_model.fit_transform(V)
        H = nmf_model.components_
    
        signature_df = pd.DataFrame(W, index=input_temp.index, columns=[f"Sig_{j+1}" for j in range(K)])
        exposure_df = pd.DataFrame(H, columns=input_temp.columns, index=[f"Sig_{j+1}" for j in range(K)])
    
        # Create output directories if they don't exist
        os.makedirs(f"Output/Signature/dim_{K}", exist_ok=True)
        os.makedirs(f"Output/Exposure/dim_{K}", exist_ok=True)
    
        # Save to nested folders
        signature_df.to_csv(f"Output/Signature/dim_{K}/signature_K{K}_run{i}.csv")
        exposure_df.to_csv(f"Output/Exposure/dim_{K}/exposure_K{K}_run{i}.csv")
    
    print(f"Done: K={K}")


Done: K=8
Done: K=9
Done: K=10
Done: K=11
Done: K=12
Done: K=13
Done: K=14
Done: K=15
Done: K=16
Done: K=17
Done: K=18
Done: K=19
Done: K=20
Done: K=21
Done: K=22
Done: K=23
Done: K=24
Done: K=25
