In [1]:
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import time
from functools import reduce

In [2]:
a = np.ones((5,5))
b = np.ones((5,6))
c = np.ones((6,1))

In [3]:
a @ b @ c

array([[30.],
       [30.],
       [30.],
       [30.],
       [30.]])

In [4]:
np.linalg.multi_dot([a, b, c])

array([[30.],
       [30.],
       [30.],
       [30.],
       [30.]])

In [5]:
reduce(np.dot, [a, b, c])

array([[30.],
       [30.],
       [30.],
       [30.],
       [30.]])

In [6]:
#Error
def _mean_squared_error(y, pred):
    return 0.5 * np.mean((y - pred) ** 2)

def _mean_abs_error(y, pred):
    return np.mean(np.abs(y, pred))

#activate function
def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

def _fourier(x):
    return np.sin(x)

def _identity(x):
    return x

def _hardlimit(x):
    return (x >= 0).astype(int)

#Get function
def getActivation(name):
    return {
        'sigmoid': _sigmoid,
        'fourier': _fourier,
        'hardlimit': _hardlimit,
    }[name]

def getLoss(name):
    return {
        'mse': _mean_squared_error,
        'mae': _mean_abs_error
    }[name]

#T function
def T(x,L1,L2,cn):
    r = x - np.dot(L1,x) + L2
    s = np.abs(r)-cn
    s = np.maximum(s,0,s)
    return s*np.sign(r)

def thetan(x0,x1,n):
    if (x0==x1).all():
        return 0
    else:
        return 1/(2**n*np.linalg.norm(x1-x0,'fro'))

In [7]:
def fit(H, Y, itrs, lam, display_time=False):
    #H = self._activation(X.dot(self._w) + self._bias)

    if display_time:
        start = time.time()

    L = 1. / np.max(np.linalg.eigvals(np.dot(H.T, H))).real
    m = H.shape[1]
    n = Y.shape[1]
    x0 = np.zeros((m,n))
    x1 = np.zeros((m,n))
    L1 = 2*L*np.dot(H.T, H)
    L2 = 2*L*np.dot(H.T, Y)

    for i in range(1,itrs+1):
        cn = ((2e-6*i)/(2*i+1))*lam*L
        beta = 0.9*i/(i+1)
        alpha = 0.9*i/(i+1)

        y = x1 + thetan(x0,x1,i)*(x1-x0)
        z = (1-beta)*x1 + beta*T(x1,L1,L2,cn)

        Ty = T(y,L1,L2,cn)
        Tz = T(z,L1,L2,cn)
        x = (1-alpha)*Ty + alpha*Tz

        x0, x1 = x1, x

    if display_time:
        stop = time.time()
        print(f'Train time: {stop-start}')

    return x

In [8]:
class ELM_AE:
    def __init__(self, n_hidden, X, activation='sigmoid',loss='mse'):
        self.X = X

        self._num_input_nodes = X.shape[1]
        self._num_output_units = X.shape[1]
        self._num_hidden_units = n_hidden
        
        self._activation = getActivation(activation)
        self._loss = getLoss(loss)
        
        self._beta = np.random.uniform(-1., 1., (self._num_hidden_units, self._num_output_units))
        self._w = np.random.uniform(-1., 1., (self._num_input_nodes, self._num_hidden_units))
        self._bias = np.zeros(shape=(self._num_hidden_units,))
        
        
    def fit(self, itrs, lam, display_time=False):
        H = self._activation(np.dot(self.X, self._w) + self._bias)

        if display_time:
            start = time.time()

        L = 1. / np.max(np.linalg.eigvals(np.dot(H.T, H))).real
        m = H.shape[1]
        n = self._num_output_units
        x0 = np.zeros((m,n))
        x1 = np.zeros((m,n))
        L1 = 2*L*np.dot(H.T, H)
        L2 = 2*L*np.dot(H.T, self.X)

        for i in range(1,itrs+1):
            cn = ((2e-6*i)/(2*i+1))*lam*L
            beta = 0.9*i/(i+1)
            alpha = 0.9*i/(i+1)

            y = x1 + thetan(x0,x1,i)*(x1-x0)
            z = (1-beta)*x1 + beta*T(x1,L1,L2,cn)

            Ty = T(y,L1,L2,cn)
            Tz = T(z,L1,L2,cn)
            x = (1-alpha)*Ty + alpha*Tz

            x0, x1 = x1, x

        if display_time:
            stop = time.time()
            print(f'Train time: {stop-start}')

        self._beta = x
        
    def __call__(self):
        H = self._activation( np.dot(self.X, self._w) + self._bias )
        return np.dot(H, self._beta)

In [9]:
class DELM:
    def __init__(self, X, y, hidden):
        self.X = X
        self.y = y
        self.hidden = hidden
        
    def fit(self, itrs, lam):
        train_layer = self.X
        self.w_layers = []
        
        for n in self.hidden:
            model_ae = ELM_AE(n, train_layer)
            model_ae.fit(itrs, lam)
            V = model_ae._beta
            self.w_layers.append(V.T)
            
            train_layer = train_layer @ V.T
        
        
        V = fit(train_layer, self.y, itrs, lam)
        self.w_layers.append(V)
        print(train_layer.shape)
        print(V.shape)
        self.output = train_layer @ V
        
    def __call__(self, X):
        return np.linalg.multi_dot([X] + self.w_layers)

In [10]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [11]:
x_train = x_train.reshape((x_train.shape[0], 28 * 28))

In [12]:
b = np.zeros((y_train.size, y_train.max()+1))
b[np.arange(y_train.size), y_train] = 1

In [13]:
y_train = b

In [14]:
model = DELM(x_train, y_train, [256, 128, 64])

In [15]:
model.fit(itrs=20, lam=1e-1)

  # Remove the CWD from sys.path while we load stuff.


(60000, 64)
(64, 10)


In [16]:
predict = model(x_test.reshape((x_test.shape[0], 28 * 28)))

In [17]:
predict = np.argmax(predict, axis=1)

In [18]:
predict

array([7, 6, 1, ..., 1, 1, 6], dtype=int64)

In [19]:
y_test

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

In [20]:
sum(predict == y_test)/y_test.shape[0]

0.5519