## Matrix Deomposition on PRECISE-1K

## Import Packages and Datasets

In [25]:
import os
import numpy as np
import pandas as pd
from sklearn.decomposition import (DictionaryLearning, FactorAnalysis, FastICA, IncrementalPCA, KernelPCA,
                                   LatentDirichletAllocation, NMF, PCA, SparseCoder, SparsePCA, TruncatedSVD,
                                   dict_learning, dict_learning_online, fastica, non_negative_factorization,
                                   sparse_encode)

In [26]:
P1K_X = pd.read_csv('../Data/P1K_X.csv', index_col=0)

## Useful_Functions

In [27]:
def save_decomposed_matrices(method_name, M, A, rows = P1K_X.index, columns = P1K_X.columns):
    folder_name = os.path.join(results_folder, method_name.replace(" ", "_"))
    os.makedirs(folder_name, exist_ok=True)
    M_df = pd.DataFrame(M, index=rows, columns=[f'Component_{i+1}' for i in range(M.shape[1])])
    A_df = pd.DataFrame(A, index=columns, columns=[f'Component_{i+1}' for i in range(A.shape[1])])
    M_df.to_csv(os.path.join(folder_name, f'M_{method_name}.csv'))
    A_df.to_csv(os.path.join(folder_name, f'A_{method_name}.csv'))

## Set Parameters

In [28]:
# Number of components
n_components = 250

In [29]:
# Random state for reproducibility
random_state = 42

In [30]:
results_folder = 'Results'
os.makedirs(results_folder, exist_ok=True)

## Runnning methods

### NMF

In [31]:
P1K_log_tpm = pd.read_csv('../Data/P1K_log_tpm.csv', index_col=0)

In [32]:
# Step 1: Initialize the NMF model
nmf = NMF(n_components=n_components, random_state=random_state, max_iter=50000, tol=0.001)

# Step 2: Fit and transform the data to get the first matrix
M_nmf = nmf.fit_transform(P1K_log_tpm)

# Step 3: Get the components (second matrix)
A_nmf = nmf.components_.T

# Step 4: Save the decomposed matrices
save_decomposed_matrices('NMF', M_nmf, A_nmf)



In [33]:
M_nmf.shape

(4257, 250)

In [34]:
A_nmf.shape

(1035, 250)

### PCA

In [35]:
# PCA
pca = PCA(n_components=n_components, random_state=random_state)
M_pca = pca.fit_transform(P1K_X)
A_pca = pca.components_.T
save_decomposed_matrices('PCA', M_pca, A_pca)

### Dictionary Learning

In [38]:
# # Dictionary Learning
# dict_learn = DictionaryLearning(n_components=n_components, random_state=random_state)
# A_dict_learn = dict_learn.fit_transform(P1K_X)
# M_dict_learn = dict_learn.components_

In [39]:
A_dict_learn.shape

(4257, 250)

In [40]:
M_dict_learn.shape

(250, 1035)

In [41]:
save_decomposed_matrices('Dictionary_Learning', A_dict_learn, M_dict_learn.T)

## SparseCoder

In [42]:
# SparseCoder
sparse_coder = SparseCoder(dictionary=dict_learn.components_)
M_sparse_coder = sparse_coder.transform(P1K_X)
A_sparse_coder = dict_learn.components_.T
save_decomposed_matrices('SparseCoder', M_sparse_coder, A_sparse_coder)

  f"X has feature names, but {self.__class__.__name__} was fitted without"


In [43]:
A_sparse_coder.shape

(1035, 250)

In [44]:
M_sparse_coder.shape

(4257, 250)

### Factor Analysis

In [45]:
# Factor Analysis
fa = FactorAnalysis(n_components=n_components, random_state=random_state)
M_fa = fa.fit_transform(P1K_X)
A_fa = fa.components_

In [46]:
A_fa.shape

(250, 1035)

In [47]:
M_fa.shape

(4257, 250)

In [48]:
save_decomposed_matrices('Factor_Analysis', M_fa, A_fa.T)

### FastICA

In [49]:
# FastICA
fast_ica = FastICA(n_components=n_components, random_state=random_state)
M_fast_ica = fast_ica.fit_transform(P1K_X)
A_fast_ica = fast_ica.mixing_
save_decomposed_matrices('Fast_ICA', M_fast_ica, A_fast_ica)

In [50]:
M_fast_ica.shape

(4257, 250)

In [51]:
A_fast_ica.shape

(1035, 250)

### Incremental PCA

In [52]:
# IncrementalPCA
ipca = IncrementalPCA(n_components=n_components)
M_ipca = ipca.fit_transform(P1K_X)
A_ipca = ipca.components_.T
save_decomposed_matrices('IncrementalPCA', M_ipca, A_ipca)