In [1]:
# import lib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.datasets import make_moons,make_classification,make_regression,make_circles
import sys
from mpl_toolkits.mplot3d import Axes3D 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA,KernelPCA
np.random.seed(23)
np.set_printoptions(precision=3,suppress=True)
plt.style.use('seaborn')

import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
tf.logging.set_verbosity(tf.logging.ERROR)
from tensorflow.examples.tutorials.mnist import input_data

def _sym_decorrelation(W):
    """ Symmetric decorrelation
    i.e. W <- (W * W.T) ^{-1/2} * W
    """
    s, u = np.linalg.eigh(np.dot(W, W.T))
    # u (resp. s) contains the eigenvectors (resp. square roots of
    # the eigenvalues) of W * W.T
    return np.dot(np.dot(u * (1./(np.sqrt(s+1e-8))+1e-8), u.T), W)

In [2]:
# import data
mnist = input_data.read_data_sets('../../Dataset/MNIST/', one_hot=True)
train_data,train_label,val_data,val_label,test_data,test_label = mnist.train.images,mnist.train.labels,mnist.validation.images,mnist.validation.labels,mnist.test.images, mnist.test.labels
print(train_data.shape,train_label.shape,train_data.max(),train_data.min())
print(val_data.shape,val_label.shape,val_data.max(),val_data.min())
print(test_data.shape,test_label.shape,test_data.max(),test_data.min())

Extracting ../../Dataset/MNIST/train-images-idx3-ubyte.gz
Extracting ../../Dataset/MNIST/train-labels-idx1-ubyte.gz
Extracting ../../Dataset/MNIST/t10k-images-idx3-ubyte.gz
Extracting ../../Dataset/MNIST/t10k-labels-idx1-ubyte.gz
(55000, 784) (55000, 10) 1.0 0.0
(5000, 784) (5000, 10) 1.0 0.0
(10000, 784) (10000, 10) 1.0 0.0


In [3]:
#  create class
def arctan(x):  return tf.arctan(x)
def tan(x)   :  return tf.tan(x)
def d_arctan(x):return 1/(1+x**2)
def d_tanh(x):  return 1/tf.cos(x)

class FNN():
    
    def __init__(self,input,output,act,d_act):
        self.w = tf.Variable(tf.random.normal())
        self.mw,self.mv = np.zeros_like()
        
        self.a = np.ones(output)
        self.b = np.ones(output)
        self.c = np.zeros(output)
        
        self.act  = act
        self.d_act= d_act

In [8]:
def tf_arctan(x): return tf.atan(x)
def tf_tan(x)   : return tf.tan(x)

def d_tf_arctan(x): return 1/(1+x**2)
def d_tf_tan(x):    return 1/(tf.cos(x)**2)

class PCA_Layer():

    def __init__(self,inc,outc,act=tf_arctan,d_act=d_tf_arctan):
        
        if outc == 1:
            self.w = tf.Variable(self.norm(tf.random_normal([inc,outc],stddev=0.05,seed=2)))
        else:
            self.w = tf.Variable(self.sym_decorrelation(tf.random_normal([inc,outc],stddev=0.05,seed=2)))
        self.a = tf.Variable(tf.ones([outc]))
        self.b = tf.Variable(tf.ones([outc]))
        self.c = tf.Variable(tf.zeros([outc]))
        
        self.mw,self.vw = tf.Variable(tf.zeros_like(self.w)),tf.Variable(tf.zeros_like(self.w))
        self.ma,self.va = tf.Variable(tf.zeros_like(self.a)),tf.Variable(tf.zeros_like(self.a))
        self.mb,self.vb = tf.Variable(tf.zeros_like(self.b)),tf.Variable(tf.zeros_like(self.b))
        self.mc,self.vc = tf.Variable(tf.zeros_like(self.c)),tf.Variable(tf.zeros_like(self.c))
        
        self.act,self.d_act = act,d_act

    def feedforward_linear(self,input=None):
        self.input  = input
        self.layer  = tf.matmul(self.input,self.w) 
        loss = tf.transpose(self.w) @ tf.transpose(self.input) @ self.input @ self.w
        return self.layer,loss
    def backprop_linear(self):
        gradw      = -2* tf.transpose(self.input) @ self.input @ self.w
        grad_pass  = -2*self.input@self.w@tf.transpose(self.w)
        
        update_w = []
        update_w.append(tf.assign( self.mw,self.mw*beta1 + (1-beta1) * (gradw)   ))
        update_w.append(tf.assign( self.vw,self.vw*beta2 + (1-beta2) * (gradw ** 2)   ))
        m_hatw = self.mw / (1-beta1)
        v_hatw = self.vw / (1-beta2)
        adam_midw = m_hatw *  learning_rate/(tf.sqrt(v_hatw) + adam_e)
        update_w.append(tf.assign(self.w,self.sym_decorrelation(tf.subtract(self.w,adam_midw ))))
        
        return grad_pass,update_w

    def feedforward_nonlinear(self,input=None):
        self.input  = input
        self.layer  = tf.matmul(self.input,self.w) 
        self.layerA = self.a*self.act(self.b*self.layer) + self.c
        loss = tf.transpose(self.w) @ tf.transpose(self.input) @ self.input @ self.w
        return self.layerA,loss
    def backprop_nonlinear(self,gradient):
        
        grada = tf.reduce_mean(gradient * self.act(self.b*self.layer),0)
        gradb = tf.reduce_mean(gradient * self.a * self.d_act(self.b*self.layer) * self.layer,0)
        gradc = tf.reduce_mean(gradient,0)
        gradw = tf.transpose(self.input) @ (gradient * self.a * self.d_act(self.b*self.layer) * self.b) - 2 * tf.transpose(self.input) @ self.input @ self.w
        grad_pass = (gradient * self.a * self.d_act(self.b*self.layer) * self.b) @ tf.transpose(self.w) - 2 * self.input@self.w@tf.transpose(self.w)

        update_w = []
        
        update_w.append(tf.assign( self.mw,self.mw*beta1 + (1-beta1) * (gradw)   ))
        update_w.append(tf.assign( self.vw,self.vw*beta2 + (1-beta2) * (gradw ** 2)   ))
        m_hatw    = self.mw / (1-beta1)
        v_hatw    = self.vw / (1-beta2)
        adam_midw = m_hatw *  learning_rate/(tf.sqrt(v_hatw) + adam_e)
        update_w.append(tf.assign(self.w,self.sym_decorrelation(tf.subtract(self.w,adam_midw))))
        
        update_w.append(tf.assign( self.ma,self.ma*beta1 + (1-beta1) * (grada)   ))
        update_w.append(tf.assign( self.va,self.va*beta2 + (1-beta2) * (grada ** 2)   ))
        m_hata    = self.ma / (1-beta1)
        v_hata    = self.va / (1-beta2)
        adam_mida = m_hata *  learning_rate/(tf.sqrt(v_hata) + adam_e)
        update_w.append(tf.assign(self.a,self.stand(tf.subtract(self.a,adam_mida))))
        
        update_w.append(tf.assign( self.mb,self.mb*beta1 + (1-beta1) * (gradb)   ))
        update_w.append(tf.assign( self.vb,self.vb*beta2 + (1-beta2) * (gradb ** 2)   ))
        m_hatb    = self.mb / (1-beta1)
        v_hatb    = self.vb / (1-beta2)
        adam_midb = m_hatb *  learning_rate/(tf.sqrt(v_hatb) + adam_e)
        update_w.append(tf.assign(self.b,self.stand(tf.subtract(self.b,adam_midb))))
        
        update_w.append(tf.assign( self.mc,self.mc*beta1 + (1-beta1) * (gradc)   ))
        update_w.append(tf.assign( self.vc,self.vc*beta2 + (1-beta2) * (gradc ** 2)   ))
        m_hatc    = self.mc / (1-beta1)
        v_hatc    = self.vc / (1-beta2)
        adam_midc = m_hatb *  learning_rate/(tf.sqrt(v_hatc) + adam_e)
        update_w.append(tf.assign(self.c,self.stand(tf.subtract(self.c,adam_midc))))

        return grad_pass,update_w
    
    def stand(self,w):
        mean,variance = tf.nn.moments(w,0)
        return (w-mean)/variance
    def norm(self,W):
        W = W/(tf.sqrt(tf.reduce_sum(W**2)+1e-8))
        return W
    def sym_decorrelation(self,W):
        s, u = tf.linalg.eigh(W@tf.transpose(W))
        result = (u * (1./(tf.sqrt(s+1e-2)+1e-2))) @ tf.transpose(u) @ W
        return result


In [9]:
# set the hyper
learning_rate = 0.000008
beta1,beta2,adam_e = 0.9,0.999,1e-8
mini_batch_size    = 10

In [6]:
# create layers 
l1 = PCA_Layer(784,128)
l2 = PCA_Layer(128,32)
l3 = PCA_Layer(32,16)
l4 = PCA_Layer(16,2)

x = tf.placeholder(tf.float32,[mini_batch_size,784])

layer1,l1l = l1.feedforward_nonlinear(x)
layer2,l2l = l2.feedforward_nonlinear(layer1)
layer3,l3l = l3.feedforward_nonlinear(layer2)
layer4,l4l = l4.feedforward_linear(layer3)

loss = tf.reduce_mean(l1l) + tf.reduce_mean(l2l) + tf.reduce_mean(l3l) + tf.reduce_mean(l4l)

grad4,grad4_update = l4.backprop_linear()
grad3,grad3_update = l3.backprop_nonlinear(grad4)
grad2,grad2_update = l2.backprop_nonlinear(grad3)
grad1,grad1_update = l1.backprop_nonlinear(grad2)
grad_update = grad4_update + grad3_update + grad2_update + grad1_update

In [7]:
# start
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
val_data_copy = np.copy(val_data)

for iter in range(1):
    
    val_data_copy  = shuffle(val_data_copy)
    for current_batch_index in range(0,len(val_data_copy),mini_batch_size):
        
        current_batch  = val_data_copy[current_batch_index:current_batch_index+mini_batch_size]
        sess_result    = sess.run([loss,grad_update],feed_dict={x:current_batch})
        print(sess_result[0])



0.19254354
nan


InvalidArgumentError: Got info = 15 for batch index 0, expected info = 0. Debug_info = heevd
	 [[node SelfAdjointEigV2_4 (defined at <ipython-input-4-065ca0cd9d45>:98)  = SelfAdjointEigV2[T=DT_FLOAT, compute_v=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](matmul_32)]]
	 [[{{node Assign_2/_53}} = _Recv[_start_time=0, client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1594_Assign_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'SelfAdjointEigV2_4', defined at:
  File "C:\ProgramData\Miniconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Miniconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\ProgramData\Miniconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "C:\ProgramData\Miniconda3\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "C:\ProgramData\Miniconda3\lib\asyncio\base_events.py", line 1434, in _run_once
    handle._run()
  File "C:\ProgramData\Miniconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel\kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 1080, in __init__
    self.run()
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\ProgramData\Miniconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\ProgramData\Miniconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\ProgramData\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\ProgramData\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "C:\ProgramData\Miniconda3\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\ProgramData\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\ProgramData\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\ProgramData\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-1caeaeeb7f65>", line 16, in <module>
    grad4,grad4_update = l4.backprop_linear()
  File "<ipython-input-4-065ca0cd9d45>", line 41, in backprop_linear
    update_w.append(tf.assign(self.w,self.sym_decorrelation(tf.subtract(self.w,adam_midw ))))
  File "<ipython-input-4-065ca0cd9d45>", line 98, in sym_decorrelation
    s, u = tf.linalg.eigh(W@tf.transpose(W))
  File "C:\ProgramData\Miniconda3\lib\site-packages\tensorflow\python\ops\linalg_ops.py", line 327, in self_adjoint_eig
    e, v = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=True, name=name)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tensorflow\python\ops\gen_linalg_ops.py", line 1640, in self_adjoint_eig_v2
    "SelfAdjointEigV2", input=input, compute_v=compute_v, name=name)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "C:\ProgramData\Miniconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Got info = 15 for batch index 0, expected info = 0. Debug_info = heevd
	 [[node SelfAdjointEigV2_4 (defined at <ipython-input-4-065ca0cd9d45>:98)  = SelfAdjointEigV2[T=DT_FLOAT, compute_v=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](matmul_32)]]
	 [[{{node Assign_2/_53}} = _Recv[_start_time=0, client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1594_Assign_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
