In [1]:
%cd "~/moses-incons-pen-xp"
%load_ext autoreload
%autoreload 2

/home/xabush/moses-incons-pen-xp


In [36]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import autograd.numpy as anp   # Thinly-wrapped version of Numpy
from autograd import grad, elementwise_grad as egrad
from sklearn.model_selection import train_test_split
from mpl_toolkits import mplot3d
import scipy
from notebooks.variable_selection.util import *

In [26]:
def Rosenbrock(X, a=1, b=100):
    return (a + X[0])**2 + b*(X[1] - X[0]**2)**2

def Grad_Rosenbrock(x, y):
    g1 = -400*x*y + 400*x**3 + 2*x - 2
    g2 = 200*y - 200*x**2
    return np.array([g1,g2])

def Hessian_Rosenbrock(x,y):
    h11 = -400*y + 1200*x**2 + 2
    h12 = -400 * x
    h21 = -400 * x
    h22 = 200
    return np.array([[h11,h12],[h21,h22]])

In [17]:

def Gradient_Descent(Grad, x, y, gamma = 0.00125, epsilon=0.0001, max_iter=10000):
    # Initialization
    i = 0
    iter_x, iter_y, iter_count = np.empty(0), np.empty(0), np.empty(0)
    error = 10
    X = np.array([x, y])

       #Looping as long as error is greater than epsilon
    while np.linalg.norm(error) > epsilon and i < max_iter:
        i +=1
        iter_x = np.append(iter_x,x)
        iter_y = np.append(iter_y,y)
        iter_count = np.append(iter_count ,i)
        #print(X)

        X_prev = X
        X = X - gamma * Grad(x,y)
        error = X - X_prev
        x,y = X[0], X[1]

    print(X)
    return X, iter_x,iter_y, iter_count


root,iter_x,iter_y, iter_count = Gradient_Descent(Grad_Rosenbrock,-2,2)

[0.91654302 0.83970004]


In [21]:

def sigmoid(z):
  # z = z.astyp(float)
  return 1/(1 + anp.exp(-z))

def quad_form(b, X, A):
    f = sigmoid(X @ b)
    return anp.dot(anp.dot(f.T, A), f)

def objective_log_loss_l1(b, X, y, L, l1, l2):
    # f = anp.dot(X, b)
    f = X @ b
    # f = f.astype(float)
    m = X.shape[0]
    log_ll = anp.sum((y*f) - anp.log(1 + anp.exp(f))) / m
    return -log_ll + l1 * anp.sum(anp.abs(b)) \
                + l2 * quad_form(b, X, L)

In [76]:
assoc_mat = get_assoc_mat(200, 10)
X, beta, y_log = generate_log_data(200, 10, tf_on=4, val_tf=[5, 5, 5, 5], n=100)
X_train, X_test, y_train, y_test = train_test_split(X, y_log, test_size=0.5, stratify=y_log, random_state=42)
prec = get_emp_covariance(X_train, assoc_mat)
L = get_laplacian_mat(X_train, X_train, prec, 1000, norm=True)

In [77]:
L

array([[ 1.        , -0.02040816, -0.02040817, ..., -0.02040817,
        -0.02040816, -0.02040817],
       [-0.02040816,  1.        , -0.02040816, ..., -0.02040816,
        -0.02040817, -0.02040817],
       [-0.02040817, -0.02040816,  1.        , ..., -0.02040816,
        -0.02040817, -0.02040817],
       ...,
       [-0.02040817, -0.02040816, -0.02040816, ...,  1.        ,
        -0.02040817, -0.02040815],
       [-0.02040816, -0.02040817, -0.02040817, ..., -0.02040817,
         1.        , -0.02040816],
       [-0.02040817, -0.02040817, -0.02040817, ..., -0.02040815,
        -0.02040816,  1.        ]])

In [78]:
beta_t = np.random.rand(beta.shape[0])

In [79]:
grad_loss = egrad(objective_log_loss_l1)
grad_loss(beta_t, X_train, y_train, L, 0.01, 0.00001)

array([-0.16451609, -0.14314487, -0.09203157, ...,  0.04248201,
        0.10604212,  0.03493956])

In [80]:
beta = scipy.optimize.fmin_l_bfgs_b(objective_log_loss_l1, x0=beta_t, fprime=grad_loss,
                                                            args=(X_train, y_train, L, 0.01, 0.00001), maxiter=1000)[0]

In [81]:
get_penalty_comp_log(X_train, y_train, beta, L)

(0.03284373425534282, 1.6901630222692177, 805.0418865327513)

In [82]:
get_penalty_comp_log(X_test, y_test, beta, L)

(0.5251389197741428, 1.6901630222692177, 326.88968562975504)

In [49]:
beta

array([ 3.80013347e-02,  2.61468250e-02,  9.54422742e-03, ...,
       -2.28363402e-06, -2.22346455e-07, -1.29970879e-05])