In [5]:
from tools import *
from experiment import *
from screening import *
from sklearn.model_selection import train_test_split
import time

In [6]:
def make_data_classification(n, p, sparsity):
    X = np.zeros((n,p))
    y = np.zeros(n)
    true_params = rand(p, 1, density = sparsity).A.ravel()
    #print(np.linalg.norm(true_params, ord=1))
    sparse_ones = np.zeros(p)
    for i in range(p):
        if true_params[i] != 0:
            sparse_ones[i] = 1
    true_params = 2 * true_params - sparse_ones

    for i in range(n):
        # sample x in [-1,1]^p box
        x = 2 * np.random.rand(p) - np.ones(p) 
        X[i,:] = x
        out = np.dot(true_params,x)
        w = np.random.randn(1) / 10
        out += w
        if out > 0:
            y[i] = 1
        else:
            y[i] = -1

    return X, y, true_params

def update_ellipsoid_low_dim(z, A, g):
    p = z.size
    A_g = A.dot(g)
    den = np.sqrt(g.dot(A_g))
    g = (1 / den) * g
    A_g = A.dot(g)
    z = z - (1 / (p + 1)) * A_g
    A = (p ** 2 / (p ** 2 - 1)) * (A - (2 / (p + 1)) * np.outer(A_g, A_g))
    return z, A

def iterate_ellipsoids_low_dim(D, y, z_init, A_init, lmbda, mu, loss, penalty, intercept, n_steps=100):
    if intercept:
        X = np.concatenate((D, np.ones(D.shape[0]).reshape(1,-1).T), axis=1)
    else:
        X = D
    start = time.time()
    k = 0
    z = z_init
    A = A_init
    while k < n_steps:
        g = compute_subgradient(z, X, y, lmbda, mu, loss, penalty, intercept)
        z, A = update_ellipsoid_low_dim(z, A, g)
        k += 1
    end = time.time()
    print('Time to compute z and A low dim:', end - start)
    return z, A

In [12]:
nb_points = 1000
sparsity = 1
dim = 10
#X, y, true_params, noise_norm = make_data(nb_points, dim, sparsity)
#X, y, true_params = make_data_classification(nb_points, dim, sparsity)
#X, y = load_higgs('./datasets/')
X, y = load_mnist('./datasets/')
X, X_test, y, y_test = train_test_split(X, y, test_size=0.2)
X = X[:100]
y = y[:100]
nb_points = X.shape[0]
dim = X.shape[1]
nb_points, dim

(60000, 2304)


(100, 2304)

In [13]:
best_est, score = find_best_svm(X, y)
#best_est, score = find_best_lasso(X, y)
print(score)

0.92


In [14]:
#true_params = np.append(best_svm.coef_, best_svm.intercept_)
true_params = best_est.coef_
print(np.linalg.norm(true_params))
best_est_params = best_est.get_params()

16.3544365129


In [16]:
def test(dim, n_steps, lmbda, mu, loss, penalty, intercept):
    print('N_STEPS', n_steps)
    r_init = np.sqrt(dim)
    if intercept:
        z_init = np.array(np.zeros(dim + 1))
        A_init = r_init * np.identity(dim + 1)
    else:
        z_init = np.array(np.zeros(dim))
        A_init = r_init * np.identity(dim)
    z, scaling, L, I_k_vec = iterate_ellipsoids_accelerated_(X, y, z_init, r_init, lmbda=lmbda, mu=mu, 
                                                            loss='hinge', penalty='l2', n_steps=n_steps, 
                                                             intercept=intercept)
    if intercept:
        A = scaling * np.identity(dim + 1) - L.dot(np.dot(np.diag(I_k_vec), L.T))
    else:   
        A = scaling * np.identity(dim) - L.dot(np.dot(np.diag(I_k_vec), L.T))
    radius = np.linalg.eig(A)[0]
    sorted_ = np.sort(radius)[dim - 1]
    print('SORTED', sorted_)
    print('Z', z)
    
    #z, A = iterate_ellipsoids_low_dim(X, y, z_init, A_init, lmbda=lmbda, mu=mu, loss=loss, penalty=penalty, intercept=intercept, n_steps=n_steps)
    #radius = np.linalg.eig(A)[0]
    #sorted_ = np.sort(radius)
    #print('SORTED', sorted_[dim - 1])
    #print('Z', z, len(z))
    #A = scaling * np.identity(dim) - L.dot(np.dot(np.diag(I_k_vec), L.T))
    #radius = np.linalg.eig(A)[0]
    #sorted_ = np.sort(radius)
    #print('SORTED', sorted_)
    #print('Z', z)
    
    print('TRUE PARAMS', true_params, true_params.shape)
    
    #print('DIFF_S', np.linalg.norm((true_params - z_s) / true_params))
    #print('DIFF', np.linalg.norm((true_params - z) / true_params))
    
    return
    
for n_steps in [1000, 10000]:
    test(dim, n_steps, lmbda= 1 / best_est_params['C'], mu=1, loss='hinge', penalty='l2', intercept=best_est_params['fit_intercept']) 

N_STEPS 1000
Time to compute z and A: 4.458199739456177
SORTED (48.0090352524+0j)
Z [  2.87744348e-03   1.89949534e-03   7.19113264e-03 ...,   1.39094775e-03
  -7.58567836e-06  -1.49404598e-03]
TRUE PARAMS [[ 0.0177622  -0.07159198  0.07208677 ...,  0.2471912   0.06214971
   0.023293  ]] (1, 2304)
N_STEPS 10000
Time to compute z and A: 808.2925255298615
SORTED (48.0904290962+0j)
Z [ 0.04472435 -0.00466247  0.06867744 ...,  0.03858875  0.015068   -0.00019063]
TRUE PARAMS [[ 0.0177622  -0.07159198  0.07208677 ...,  0.2471912   0.06214971
   0.023293  ]] (1, 2304)


# Low dim

In [8]:
dim = 28
z_init = np.array(np.zeros(dim))
A_init = dim * np.identity(dim)
z, A = iterate_ellipsoids_low_dim(X_train[:1000], y_train[:1000], z_init, A_init, lmbda=0.1, mu=1, loss='hinge', penalty='l2', intercept=False, n_steps=10000)

Time to compute z and A low dim: 28.719488859176636


In [9]:
print(z, np.sort(np.linalg.eig(A)[0]))

[ 0.10261051 -0.0040001  -0.03394586 -0.2497604   0.08721035  0.14250697
  0.00451449  0.08864186  0.04326214  0.08519501 -0.01096486  0.03194706
  0.01085868  0.06551451  0.06225054 -0.08885447 -0.04542972  0.04044627
 -0.03552061  0.04761069  0.07490089  0.07573072  0.08979203  0.12364314
  0.12746536 -0.20361338  0.0209     -0.06112422] [  1.12612620e-08   5.41617272e-08   7.09837152e-08   9.72097437e-08
   1.51610969e-07   2.62768930e-07   6.72121693e-07   2.73303184e-06
   8.07286997e-04   8.11925098e-04   8.43654662e-04   8.62796045e-04
   9.01505092e-04   9.17969712e-04   9.38461981e-04   9.51477876e-04
   9.56934175e-04   9.61626901e-04   9.93126245e-04   1.00745180e-03
   1.03263915e-03   1.05390093e-03   1.07634493e-03   1.09571179e-03
   1.11841754e-03   1.15144699e-03   1.18906554e-03   1.22721760e-03]


In [81]:
print([1, 2] == None)

False


## MNIST

In [17]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from scipy.sparse import rand
import random
import time
from sklearn.model_selection import train_test_split
import sys
sys.path.append('/nas/home2/g/gmialon/research/safe_datapoints')
from tools import *
from screening import *
from experiment import *
import os
from sklearn.datasets import load_diabetes, load_boston, fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
import os
import pickle
%matplotlib inline

In [19]:
X, y = load_mnist('./datasets/')


(60000, 2304)


In [20]:
X.shape

(11898, 2304)

In [21]:
X, _, y, _ = train_test_split(X, y, test_size=0.01)
print(X.shape, y.shape)
print(find_best_svm(X, y))

(11779, 2304) (11779,)
(LinearSVC(C=100, class_weight=None, dual=True, fit_intercept=False,
     intercept_scaling=1, loss='hinge', max_iter=10000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0), 0.99397232362679344)
