## Author: Farzan Memarian

In [1]:
# Data generation

import numpy as np
from itertools import permutations
import random
import time
from pdb import set_trace

Nexam = 10**5
Ndim = 20
Nperm = 30
x1 = np.random.multivariate_normal(mean= np.ones(Ndim), cov =  np.identity(Ndim),size = Nexam)
y1 = np.ones(Nexam)
x2 = np.random.multivariate_normal(mean= -np.ones(Ndim), cov =  np.identity(Ndim),size = Nexam)
y2 = -np.ones(Nexam)

X = np.concatenate((x1,x2),axis=0)
y = np.concatenate((y1,y2))

from sklearn import model_selection
X_tr_orig, X_test_orig, y_tr_orig, y_test_orig = model_selection.train_test_split(X,y,test_size=0.5)

# reshaping y
y_tr_orig = y_tr_orig.reshape((len(y_tr_orig),1))
y_test_orig = y_test_orig.reshape((len(y_test_orig),1))

perms = [] # array storing different premutatins of X, Y
for _ in range(Nperm):
    inx = np.random.permutation(Nexam)
    X_perm = X_tr_orig[inx]
    y_perm = y_tr_orig[inx]
    perms.append([X_perm,y_perm])

### a) Batch newton algorithm with the Gauss-Newton approximation

In [2]:
# functions
from numpy import outer, matmul, inner
from numpy.linalg import inv, norm
from scipy.sparse import diags
from sklearn.metrics import mean_squared_error

def func(X, theta):
    return 1.71 * np.tanh(0.66 * matmul(X, theta))

def f_prime(X, theta):
    return 1.71 * 0.66 / np.cosh(0.66 * matmul(X, theta))**2

def gradient_loss(f, f_prime, y, X):
    N = len(y)
    g = np.zeros((Ndim,))
    for i in range(N):
        g += 2 * (f[i] - 1.5*y[i]) * f_prime[i] * X[i,:]
    return g.reshape((Ndim,1))

def hessian(f_prime, X):
    h = np.zeros((Ndim,Ndim))
    N,_ = np.shape(X)
    for i in range(N):
        h += f_prime[i]**2 * np.outer(X[i,:],X[i,:])
    return h

def batch_newton_step(X, y, theta):

    f = func(X, theta)
#     print "mse f, y: {}".format(mean_squared_error(f,y))
    f_p = f_prime(X, theta)
    g = gradient_loss(f, f_p, y, X)
    h = hessian(f_p, X)
    h_inv = inv(h)
    d_theta = -matmul(h_inv, g)
    return d_theta

def batch_newton_iter(X, y, theta_init, thresh):
    theta = theta_init
    keep_iter = True
    counter = 0
    while keep_iter:
        counter += 1
        if counter % 10 == 0:
            print "iter:", counter
            print "error", norm(d_theta)
            print "threshold", thresh
        d_theta = batch_newton_step(X, y, theta)

        if norm(d_theta) > thresh:
            theta += d_theta
        else:
            keep_iter = False
    return theta

In [None]:
# find theta^* on test set
        
# N = len(y_test_orig)
N = 10000
thresh = 1.0/N

X = X_test_orig[:N,:]
y = y_test_orig[:N]

theta_init = np.random.uniform(-0.5, 0.5, size=Ndim).reshape((Ndim,1))
# theta_init = np.asarray([[-0.45148032], [-0.24707166],[0.28715202],[-0.00086336],[ 0.15579776],[ 0.40848191],[-0.0577103 ],[ 0.10378568],[ 0.19488391],[ 0.35499952],[ 0.46115049],[-0.48382997],[ 0.29332625],[-0.14669058],[ 0.00110547],[ 0.44736687],[-0.07875841],[ 0.36241191],[-0.15974534]])
start_time = time.time()
theta_star = batch_newton_iter(X, y, theta_init, thresh)
np.save('theta_star', theta_star)
end_time = time.time()
elapsed_time_test = end_time - start_time
print "elapsed time for test set: {}".format(elapsed_time_test)

iter: 10
error 0.412779359706
threshold 0.0001
iter: 20
error 0.461823991794
threshold 0.0001
iter: 30
error 0.47593678616
threshold 0.0001
iter: 40
error 0.481987079585
threshold 0.0001
iter: 50
error 0.485612140864
threshold 0.0001
iter: 60
error 0.487867995667
threshold 0.0001
iter: 70
error 0.489273271119
threshold 0.0001
iter: 80
error 0.490149407713
threshold 0.0001
iter: 90
error 0.490696334688
threshold 0.0001
iter: 100
error 0.491037380129
threshold 0.0001
iter: 110
error 0.49124867376
threshold 0.0001
iter: 120
error 0.491377490451
threshold 0.0001
iter: 130
error 0.491453438754
threshold 0.0001
iter: 140
error 0.491495252229
threshold 0.0001
iter: 150
error 0.491514918801
threshold 0.0001
iter: 160
error 0.491520202652
threshold 0.0001
iter: 170
error 0.491516194064
threshold 0.0001
iter: 180
error 0.49150626952
threshold 0.0001
iter: 190
error 0.491492692237
threshold 0.0001
iter: 200
error 0.491476992022
threshold 0.0001
iter: 210
error 0.491460208577
threshold 0.0001
iter

In [25]:
# training batch newton algorighm

# Nsizes = 5
# n_ex_float = np.floor(np.logspace(3.0, 5.0, num=Nsizes))
# n_ex = [int(item) for item in n_ex_float]
n_ex = [100000,8000]

theta_store_all = []
time_storage = []
for N in n_ex:
    print "RUNNING FOR {} EXAMPLES".format(N)
    start_time = time.time()
    theta_store = []
    thresh = 1.0/N
    perm_counter = 0
    for X_all,y_all in perms:
        X = X_all[:N,:]
        y = y_all[:N]
        perm_counter += 1
        if perm_counter % 10 == 0:
            print "perm counter:", perm_counter 
        
        theta_init = np.random.uniform(-0.5, 0.5, size=Ndim).reshape((Ndim,1))
        start_time = time.time()
        theta = batch_newton_iter(X, y, theta_init, thresh)
        theta_store.append(theta)
    end_time = time.time()
    elapsed_time = end_time - start_time
    time_storage.append(elapsed_time)
    theta_store_all.append(theta_store)

RUNNING FOR 100000 EXAMPLES
mse f, y: 5.48606588122


LinAlgError: Singular matrix

In [None]:
# save thetas and times
import pickle

with open('thetas_all', 'wb') as fp:
    pickle.dump(theta_store2, fp)
    
with open('times', 'wb') as fb:
    pickle.dump(time_storage, fb)
    
    
# with open ('thetas2', 'rb') as fp:
#     itemlist = pickle.load(fp)

In [9]:
# evaluation of newton method on test set

# find mse error on test data
from sklearn.metrics import mean_squared_error
store_mse = []
X_test = X_test_orig
y_test = y_test_orig
for i, N in enumerate(n_ex):
    mse = 0
    for j in range(Nperm):
        theta = theta_store_all[i][j]
        f = func(X_test, theta)
        mse += mean_squared_error(f,y_test)/Nperm
    f_star = func(X_test, theta_star)
    mse_star = mean_squared_error(f_star,y_test)
    store_mse.append([mse,N])
store_mse



[[0.30339653926408167, 400], [0.30468899982494052, 1000]]