In [63]:
import numpy as np

class FactorMatrix():
    def __init__(self, in_feature):
        self.gram = np.zeros((in_feature, in_feature))
        self.data_sum = np.zeros((in_feature))
        self.datum_acc = 0
    
    def reset(self):
        self.gram *= 0
        self.data_sum *= 0
        self.datum_acc *= 0
    
    def data_input(self, data):
        # data : n*f array, n datums with f features
        self.data_sum += data.sum(axis=0)
        self.gram += np.dot(data.T, data)
        self.datum_acc += len(data)
    
    def find_factor(self):
        if self.datum_acc == 0:
            raise ValueError("input data before analyze")
        
        mean_gram = self.gram/self.datum_acc
        
        #mean_gram += (10**(-7))*np.abs(mean_gram).max()
        self._mean = self.data_sum/self.datum_acc
        self.info, self.factor = np.linalg.eigh(mean_gram - np.dot(self._mean.reshape(1,-1), self._mean.reshape(-1,1)))
        self.info = self.info[::-1]
        self.info /= self.info.sum()
        self.factor = self.factor[:,::-1]
    
    def analyze(self, data, pick_factor):
        # pick_factor : number of factor
        return np.dot(data, self.factor[:, :pick_factor])

# test

In [90]:
data = np.zeros((1000000, 3))
factor = np.random.normal(0, 1, (1000000, 3))

data += 3*np.dot(factor[:,[0]], np.array([[-0.6, 0.8, 0]]))
data += 2*np.dot(factor[:,[1]], np.array([[0.8, 0.6, 0]]))
data += 1*np.dot(factor[:,[2]], np.array([[0, 0, 1]]))

In [91]:
test = FactorMatrix(3)
test.data_input(data)
test.find_factor()
np.abs(test.analyze(data, 3) - factor[:, :]*np.array([3,2,1])).max()

0.018028967808040752

In [92]:
test.info*14

array([9.00809129, 3.99389147, 0.99801723])