In [18]:
from sklearn.decomposition import NMF, IncrementalPCA
from scipy.stats import ortho_group
from scipy.linalg import eigh
import fbpca
import numpy as np
import matplotlib.pyplot as plt

In [2]:
loading = np.random.normal(loc=0, scale=1, size=(256, 256)) # create loading matrix
components = ortho_group.rvs(dim=256)[:, :16] # create orthonomal matrix

In [3]:
print(components.shape)
print(loading.shape)

(256, 16)
(256, 256)


In [4]:
data = loading @ components
data = data.reshape(16, 16, 4, 4)

In [5]:
class IPCA:

    def __init__(self, K, D, Uhat0=None, sigma2_0=None, tol=1e-7):

        if Uhat0 is not None:
            assert Uhat0.shape == (D, K), "The shape of the initial guess Uhat0 must be (D,K)=(%d,%d)" % (D, K)
            self.Uhat = Uhat0.copy()

        else:
            # random initalization if not provided
            self.Uhat = np.random.normal(loc=0, scale=1 / D, size=(D, K))

        self.t = 1

        if sigma2_0 is not None:
            assert sigma2_0.shape == (K,), "The shape of the initial guess lambda0 must be (K,)=(%d,)" % (K)
            self.sigma2 = sigma2_0.copy()
        else:
            self.sigma2 = np.abs(np.random.normal(0, 1, (K,))) / np.sqrt(K)

        self.K = K
        self.D = D
        self.f = 1.0 / self.t
        self.tol = tol

    def fit_next(self, x):

        assert x.shape == (self.D,)
        self.t += 1
        self.f = 1.0 / self.t

        t, f, sigma2, Uhat, K, tol = self.t, self.f, self.sigma2, self.Uhat, self.K, self.tol

        sigma2 = (1 - f) * sigma2
        x = np.sqrt(f) * x

        # Project X into current estimate and check residual error
        y     = Uhat.T.dot(x)
        x     = x - Uhat.dot(y)
        normx = np.sqrt(x.dot(x))

        if (normx >= tol):
            sigma2 = np.concatenate((sigma2, [0]))
            y = np.concatenate((y, [normx]))
            Uhat = np.concatenate((Uhat, x[:, np.newaxis] / normx), 1)

        M = np.diag(sigma2) + np.outer(y, y.T)
        d, V = eigh(M, overwrite_a=True)

        idx    = np.argsort(d)[::-1]
        sigma2 = d[idx][:K]
        V      = V[:, idx]
        Uhat   = Uhat.dot(V[:, :K])

        self.Uhat   = Uhat
        self.sigma2 = sigma2

    def get_components(self, orthogonalize=True):

        components = np.asarray(self.Uhat)
        if orthogonalize:
            components, _ = np.linalg.qr(components)

        return components


In [6]:
def process_frame(ipca, frame):
    """
    Process frame as it is done in libertem architecture
    """
    frame_flatten = frame.reshape(frame.size,)
    ipca.fit_next(frame_flatten)

In [7]:
def process_partitions(prev, partitions):
    """
    Process partitions as it is done in libertem architecture
    i.e., stack up partitions, which are components vectors
    """
    return np.vstack([prev, partitions])

In [8]:
def perform_pca(component, n_components=10):
    """
    Perform PCA
    """
    U, S, V = fbpca.pca(component, k=n_components)
    
    loading = U @ S
    component = V
    
    return loading, component
    

In [9]:
def partition(data, num_part=4):
    """
    Partition the data as it is done in libertem architecture
    """
    nav_row, nav_col, sig_row, sig_col = data.shape
    
    partitions = []
    
    for i in range(0, nav_row, num_part):
        for j in range(0, nav_col, num_part):
            partitions.append(data[i:i+num_part, j:j+num_part, :, :].copy())

    return partitions

In [10]:
def fetch_frame(partition):
    """
    Take a frame from a partition as it is done in libertem architecture
    """
    frames = []
    
    part_row, part_col, sig_row, sig_col = partition.shape
    
    for i in range(part_row):
        for j in range(part_col):
            frames.append(partition[i, j, :, :].copy())
            
    return frames

In [11]:
def PCA(data):
    """
    Perform PCA as it is done in liberteme architecture under UDF interface
    """
    sig_dim = data.shape[2] * data.shape[3]
    partitions = partition(data)
    i = False
    
    for part in partitions:
        frames = fetch_frame(part)
        
        ipca = IPCA(K=16, D=sig_dim)

        for frame in frames:
            process_frame(ipca, frame)

        part_comp = ipca.get_components()
        
#         print(part_comp.shape)
        if not i:
            components = part_comp
        else:
            components = process_partitions(components, part_comp)
            
        i = True
    
#     print(components.shape)
    loading, component = perform_pca(components, n_components=16)
    
    return loading, component
        
        
            

In [12]:
loading, component = PCA(data)

In [13]:
loading.shape, component.shape, data.shape

((256,), (16, 16), (16, 16, 4, 4))

In [14]:
data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]*data.shape[3]))

In [15]:
projected_data = data @ component

In [16]:
print(projected_data.shape)

(256, 16)


In [17]:
fig, axes = plt.subplots()
axes.imshow(projected_data)

NameError: name 'plt' is not defined