In [None]:
import h5py
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import NMF

def plot_matrix_and_factors(file_path, n_components=2):
    # Load the matrix from the HDF5 file
    with h5py.File(file_path, 'r') as hf:
        dataset_name = list(hf.keys())[0]  # Assuming there's only one dataset in the file
        matrix = hf[dataset_name][:]

    # Check for any negative values in the matrix
    if np.any(matrix < 0):
        print(f"Warning: The matrix contains negative values.")
    else:
        print(f"No negative values found in the matrix.")

    # Calculate and print various statistics of the matrix
    norm = np.linalg.norm(matrix)
    nonzero_count = np.count_nonzero(matrix)
    min_value = np.min(matrix)
    max_value = np.max(matrix)
    total_entries = matrix.size

    print(f"Norm of the matrix: {norm}")
    print(f"Number of nonzero values: {nonzero_count}")
    print(f"Minimum value in the matrix: {min_value}")
    print(f"Maximum value in the matrix: {max_value}")
    print(f"Total number of entries: {total_entries}")

    # Compute the correlation matrix
    correlation_matrix = np.corrcoef(matrix)

    # Plot the correlation matrix
    plt.figure(figsize=(10, 8))
    plt.imshow(correlation_matrix, aspect='auto', cmap='viridis')
    plt.colorbar(label='Correlation values')
    plt.title('Correlation Matrix')
    plt.xlabel('Columns')
    plt.ylabel('Columns')
    plt.show()

    # Perform symmetric NMF
    model = NMF(n_components=n_components, init='nndsvd', solver='mu', beta_loss='frobenius', max_iter=1000, random_state=42)
    W = model.fit_transform(matrix)
    H = model.components_

    print(W.shape)
    print(H.shape)
    
    # Plot the matrix
    plt.figure(figsize=(10, 8))
    plt.imshow(matrix, aspect='auto', cmap='viridis')
    plt.colorbar(label='Matrix values')
    plt.title('Example Matrix Plot')
    plt.xlabel('Columns')
    plt.ylabel('Rows')
    plt.show()

    # Plot each column of W on the same plot
    plt.figure(figsize=(10, 8))
    for i in range(W.shape[1]):
        plt.plot(W[:, i], label=f'Component {i+1}')
    plt.title('Columns of W')
    plt.xlabel('Samples')
    plt.ylabel('Values')
    plt.legend()
    plt.show()
    
    return W

# Example usage:
file_path = '../../../projects/single_cell_files/methy_1Mb_outerproduct_dir/chr1/sc24.CGATGT_outer_product.h5'
W = plot_matrix_and_factors(file_path, n_components=2)


Norm of the matrix: 11.859284752811536
Number of nonzero values: 62001
Minimum value in the matrix: -0.19453684858556777
Maximum value in the matrix: 0.24916274535690303
Total number of entries: 62001
