In [240]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
import itertools
import matplotlib.pyplot as plt
import time

df = pd.read_csv('DATA.csv')

train, test = train_test_split(df, test_size=0.255, random_state=1939671)

X = np.array(train[['x1', 'x2']])
y = np.array(train['y'])

X_test = np.array(train[['x1', 'x2']])
y_test = np.array(train['y'])

In [241]:
import copy

def J(f, x, dx=1e-8):
    n = x.shape
    func = f(x)
    jac = np.zeros(n)
    x_plus = x.copy()
    for i in range(n[0]):
        for j in range(n[1]):  # through columns to allow for vector addition
            x_plus[i, j] = x[i, j] + dx
            jac[i, j] = (f(x_plus) - func)/dx
            x_plus = x.copy()
    return jac

In [242]:
def J_(f, x, dx=1e-8):
    n = x.shape
    func = f(x)
    jac = np.zeros(n)
    x_plus = x.copy()
    for i in range(n[0]):
        x_plus[i] = x[i] + dx
        jac[i] = (f(x_plus) - func)/dx
        x_plus = x.copy()
    return jac

In [243]:
N = 10
P = len(y)
rho = 1e-5
sigma = 1

W = np.random.randn(X.shape[1], N)
b = np.random.randn(N)
v = np.random.randn(N)

In [244]:
def loss_v(v, X=X, y=y, sigma=sigma, N=N, rho=rho, b=b, W=W):
    P = len(y)
    x0 = np.concatenate((W, v, b), axis=None)
    norm = np.linalg.norm(x0)
    pred = feedforward(X, W, b, v, sigma)
    res = ((np.sum((pred-y)**2))*P**(-1) + rho*norm)*0.5    
    
    return res

def loss_b(b, X=X, y=y, sigma=sigma, N=N, rho=rho, W=W, v=v):
    P = len(y)
    x0 = np.concatenate((W, v, b), axis=None)
    norm = np.linalg.norm(x0)
    pred = feedforward(X, W, b, v, sigma)
    res = ((np.sum((pred-y)**2))*P**(-1) + rho*norm)*0.5    
    
    return res

def loss_W(W, X=X, y=y, sigma=sigma, N=N, rho=rho, b=b, v=v):
    P = len(y)
    x0 = np.concatenate((W, v, b), axis=None)
    norm = np.linalg.norm(x0)
    pred = feedforward(X, W, b, v, sigma)
    res = ((np.sum((pred-y)**2))*P**(-1) + rho*norm)*0.5    
    
    return res

In [245]:
def J(f, x, dx=1e-8):
    n = x.shape
    func = f(x)
    jac = np.zeros(n)
    x_plus = x.copy()
    for i in range(n[0]):
        for j in range(n[1]):  # through columns to allow for vector addition
            x_plus[i, j] = x[i, j] + dx
            jac[i, j] = (f(x_plus) - func)/dx
            x_plus = x.copy()
    return jac

In [246]:
grads = {}

linear_layer = (np.dot(X, W) + b)
a_2 = tanh(linear_layer, sigma)
dJdf = (1/P)*(np.dot(a_2, v) - y)
dtanh = 1 - tanh(linear_layer, sigma)**2

dW1_1 = np.tensordot(dJdf, np.transpose(v), axes=0)
dW1_2 = dW1_1*dtanh

grads['v'] = np.dot(dJdf, a_2) + rho*v
grads['b'] = np.sum(dW1_2, axis=0) + rho*b
grads['W'] = np.tensordot(np.transpose(X), dW1_2, axes=1) + rho*W

In [248]:
J_(loss_b, b).T

array([ 1.8528743 , -2.01334771, -0.13348647,  0.2653282 ,  0.08347953,
        0.86578567,  0.37603805, -0.05454677,  0.63909962, -0.47925415])

In [249]:
grads['b'].T

array([ 1.85286697, -2.01334346, -0.133469  ,  0.26534062,  0.0834944 ,
        0.86578087,  0.37602891, -0.05454799,  0.63910002, -0.47924422])

In [250]:
J_(loss_v, v).T

array([-2.17146248,  2.09753619, -1.89946476, -0.85523464, -2.46582719,
       -2.92206916,  2.03785362,  0.87218108,  1.95432097, -3.01721457])

In [251]:
grads['v'].T

array([-2.17147831,  2.09755044, -1.89946608, -0.85523861, -2.46583668,
       -2.92208456,  2.0378613 ,  0.87218206,  1.95430903, -3.01722667])

In [252]:
J(loss_W, W).T

array([[ 2.18423981,  1.01448627],
       [-2.29417552, -3.47231506],
       [ 0.20155877,  0.16223964],
       [ 0.49514366,  0.85626191],
       [ 0.3882425 , -0.06685354],
       [ 0.55433347,  2.10735198],
       [-1.26066908, -1.31668436],
       [-0.12335022, -0.20334312],
       [ 0.90082519,  2.73310832],
       [ 0.49209614,  1.04697211]])

In [253]:
grads['W'].T

array([[ 2.18424679,  1.01449237],
       [-2.29418415, -3.47231551],
       [ 0.20156665,  0.1622423 ],
       [ 0.49515617,  0.85625122],
       [ 0.38824641, -0.06683079],
       [ 0.5543509 ,  2.10735299],
       [-1.26067619, -1.31668368],
       [-0.12336169, -0.20333638],
       [ 0.90080865,  2.73311284],
       [ 0.4921099 ,  1.04697404]])