# Libraries

In [1]:
import os

# go up one directory
os.chdir("..")

import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn_extra.cluster import KMedoids
from functions import cosmic_val
from models.nmf import NMF_mult_tol
from sklearn.decomposition import NMF as nmf_sklearn
import matplotlib.pyplot as plt


# Data and common initial exposure

In [2]:
N = 10
LATENT_DIM = 2
TOLERANCE = 1e-10
MAX_ITERATIONS = 100_000_000

The data comes from various poisson and is a matrix of shape $(30,6)$

In [3]:
signature1 = np.array([2]*2 + [1]*2 + [0]*2) / 6
signature2 = np.array([0]*3 + [2]*3) / 6
signature_mat = np.vstack((signature1, signature2))
total_n = 3 * N
exposures = np.array([180, 20] * N + [100, 100] * N + [20, 180] * N).reshape(total_n, 2)
XMat1 = np.random.poisson(180 * signature1 + 20 * signature2, size=(N, 6))
XMat2 = np.random.poisson(100 * signature1 + 100 * signature2, size=(N, 6))
XMat3 = np.random.poisson(20 * signature1 + 180 * signature2, size=(N, 6))
XMat = np.concatenate((XMat1, XMat2, XMat3))

In [4]:
XMat_pd = pd.DataFrame(XMat)
XMat_pd.head()

print(XMat_pd.shape)

XMat_pd = XMat_pd.T

(30, 6)


In [5]:
E_init = np.random.rand(XMat.shape[0], LATENT_DIM)

print(E_init.shape)

(30, 2)


# Testing

In [6]:
losses_train = []
iterations = 100


for i in tqdm(range(iterations)):
    
    # Applying NMF
    signatures_nmf, exposures_nmf, loss_nmf, _, _, n_iter_nmf = NMF_mult_tol(XMat_pd.to_numpy(),
                                                                             rank = LATENT_DIM,
                                                                             tol = TOLERANCE,
                                                                             mse=True,
                                                                             G_0 = E_init.T)

    # Calculating signatures and exposures for NMF
    diagonals_nmf = signatures_nmf.sum(axis=0)
    exposures_nmf = exposures_nmf.T @ np.diag(diagonals_nmf)
    signatures_nmf = (signatures_nmf) @ np.diag(1 / diagonals_nmf)
    
    losses_train.append(loss_nmf[-1])
    



100%|██████████| 100/100 [00:02<00:00, 47.91it/s]


In [7]:
print("Losses train: ", np.mean(losses_train))

Losses train:  23.705507948396853


In [8]:
print(signatures_nmf)

[[0.00693442 0.34627321]
 [0.01479114 0.32038488]
 [0.01129534 0.14186093]
 [0.33664735 0.16997744]
 [0.30477861 0.01707972]
 [0.32555315 0.00442382]]
