We let the scikit-learn SVM solver compete with our implementation of Platt's original SMO algorithm, the SMO with WSS 1 and WSS 2. As test data, we take different numbers of images from our data set with labels in $\{-1,1\}$ assigned by the first classifier of ECOC. As kernels, we test both the standard scalar product and the Gaussian kernel.

In [None]:
import numpy as np
import scipy
from sklearn.svm import SVC
import pickle # to save data
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics.pairwise import pairwise_kernels
from scipy.optimize import minimize
from scipy.spatial.distance import hamming
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
train = pd.read_csv('Data/train.csv')
#train = pd.read_csv('Data/train.csv', nrows = 42000)
images = ["%s%s" %("pixel",pixel_no) for pixel_no in range(0,28**2)]
train_images = np.array(train[images], dtype=np.float)/100
train_labels = np.array(train['label'])

In [None]:
code_words=np.array([
        [ 1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1,  1,  1, -1,  1],
        [-1, -1,  1,  1,  1,  1, -1,  1, -1,  1,  1, -1, -1,  1, -1],
        [ 1, -1, -1,  1, -1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1],
        [-1, -1,  1,  1, -1,  1,  1,  1, -1, -1, -1, -1,  1, -1,  1],
        [ 1,  1,  1, -1,  1, -1,  1,  1, -1, -1,  1, -1, -1, -1,  1],
        [-1,  1, -1, -1,  1,  1, -1,  1,  1,  1, -1, -1, -1, -1,  1],
        [ 1, -1,  1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1,  1],
        [-1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1,  1, -1, -1,  1],
        [ 1,  1, -1,  1, -1,  1,  1, -1, -1,  1, -1, -1, -1,  1,  1],
        [-1,  1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1,  1,  1]])

In [None]:
classifier = 0
bench_labels = np.empty(42000).astype(int)
for j in range(42000):
    bench_labels[j]=code_words[train_labels[j]][classifier]

In [13]:
no_train = 100
train = train_images[:no_train]
train_l = train_labels[:no_train]
bench_l = bench_labels[:no_train]
lambda_opt = 1./(400*np.array([18,18,18,18,17,18,16,16,16,18,18,20,20,18,18])) #optimal lambdas found via cross validation
lambd = lambda_opt[classifier]
C_list = 1./(2*len(train)*lambda_opt)  #compute optimal C from optimal lambda
C = C_list[classifier]
sigma_list = [0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.0025, 0.005, 0.005, 0.005, 0.0025, 0.005] #optimal sigmas found via cross validation
sigma = sigma_list[classifier]

In [14]:
tol = 1e-3

1.) First the Gaussian kernel.

In [15]:
def kernel(x1, x2):
    return np.exp(-sigma*np.linalg.norm(x1-x2)**2)

1.1) First Platt's SMO.

In [17]:
%%time
from smo_platt import smo
smo(np.transpose(train),bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel')

Wall time: 1.35 s


1.2) Now WSS1.

In [None]:
%%time
from smo_wss1 import smo
smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel')

1.3) Now WSS1 without K caching.

In [None]:
%%time
from smo_wss1_noK import smo
print(smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel'))

1.4) Now WSS2.

In [None]:
%%time
from smo_wss2 import smo
smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel')

1.5) Finally, the scikit-learn solver.

In [None]:
%%time
svm = SVC(C=C, kernel='rbf', gamma=sigma)
svm.fit(train, bench_l)
alphatimeslabeldense = svm.dual_coef_

2.) Now the standard scalar product as kernel.

In [None]:
def kernel(x1, x2):
    return np.dot(x1,x2)

2.1) First Platt's SMO.

In [None]:
%%time
from smo_platt import smo
smo(np.transpose(train),bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')

2.2) Now WSS1 with Gramian caching.

In [None]:
%%time
from smo_wss1 import smo
smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')

2.3) Now WSS1 without Gramian caching.

In [None]:
%%time
from smo_wss1_noK import smo
smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')

2.3) Now WSS2.

In [None]:
%%time
from smo_wss2 import smo
smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')

2.4) Finally, the scikit-learn solver.

In [None]:
%%time
svm = SVC(C=C, kernel='linear')
svm.fit(train, bench_l)
alphatimeslabels = svm.dual_coef_

## Now we loop the above for no_train from 100 to 2000 to generate a plot.

In [1]:
import numpy as np
import scipy
from sklearn.svm import SVC
import pickle # to save data
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics.pairwise import pairwise_kernels
from scipy.optimize import minimize
from scipy.spatial.distance import hamming
import matplotlib.pyplot as plt
%matplotlib inline
import timeit



In [2]:
train = pd.read_csv('Data/train.csv')
images = ["%s%s" %("pixel",pixel_no) for pixel_no in range(0,28**2)]
train_images = np.array(train[images], dtype=np.float)/100
train_labels = np.array(train['label'])

In [3]:
code_words=np.array([
        [ 1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1,  1,  1, -1,  1],
        [-1, -1,  1,  1,  1,  1, -1,  1, -1,  1,  1, -1, -1,  1, -1],
        [ 1, -1, -1,  1, -1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1],
        [-1, -1,  1,  1, -1,  1,  1,  1, -1, -1, -1, -1,  1, -1,  1],
        [ 1,  1,  1, -1,  1, -1,  1,  1, -1, -1,  1, -1, -1, -1,  1],
        [-1,  1, -1, -1,  1,  1, -1,  1,  1,  1, -1, -1, -1, -1,  1],
        [ 1, -1,  1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1,  1],
        [-1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1,  1, -1, -1,  1],
        [ 1,  1, -1,  1, -1,  1,  1, -1, -1,  1, -1, -1, -1,  1,  1],
        [-1,  1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1,  1,  1]])

In [4]:
classifier = 0
bench_labels = np.empty(42000).astype(int)
for j in range(42000):
    bench_labels[j]=code_words[train_labels[j]][classifier]

In [5]:
tol = 1e-3

In [22]:
#no_train_arr = np.arange(100,1100,100)
no_train_arr = np.arange(100,300,100)

plot_array_gauss = np.empty((5,np.size(no_train_arr)))
plot_array_ssp = np.empty((5,np.size(no_train_arr)))


# Gaussian kernel
notraincounter = -1
def kernel(x1, x2):
        return np.exp(-sigma*np.linalg.norm(x1-x2)**2)
    
for no_train in no_train_arr:
    notraincounter += 1
    train = train_images[:no_train]
    train_l = train_labels[:no_train]
    bench_l = bench_labels[:no_train]
    lambda_opt = 1./(400*np.array([18,18,18,18,17,18,16,16,16,18,18,20,20,18,18])) #optimal lambdas found via cross validation
    lambd = lambda_opt[classifier]
    C_list = 1./(2*len(train)*lambda_opt)  #compute optimal C from optimal lambda
    C = C_list[classifier]
    sigma_list = [0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.0025, 0.005, 0.005, 0.005, 0.0025, 0.005] #optimal sigmas found via cross validation
    sigma = sigma_list[classifier]
    
    
    # scikit-learn
    svm = SVC(C=C, kernel='rbf', gamma=sigma)
    start = timeit.default_timer()
    svm.fit(train, bench_l)
    alphatimeslabeldense = svm.dual_coef_
    stop = timeit.default_timer()
    plot_array_gauss[0,notraincounter] = stop - start 
    
    
    # wss1
    from smo_wss1 import smo
    start = timeit.default_timer()
    smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel')
    stop = timeit.default_timer()
    plot_array_gauss[1,notraincounter] = stop - start
    
    # wss1 without K
    from smo_wss1_noK import smo
    start = timeit.default_timer()
    smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel')
    stop = timeit.default_timer()
    plot_array_gauss[2,notraincounter] = stop - start
    
    # wss2
    from smo_wss2 import smo
    start = timeit.default_timer()
    smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel')
    stop = timeit.default_timer()
    plot_array_gauss[3,notraincounter] = stop - start
    
    # Platt
    from smo_platt import smo
    start = timeit.default_timer()
    smo(np.transpose(train),bench_l,C,kernel,tol,'no', kernel_identifier = 'gaussian kernel')
    stop = timeit.default_timer()
    plot_array_gauss[4,notraincounter] = stop - start

    
# standard scalar product
notraincounter = -1
def kernel(x1, x2):
        return np.dot(x1,x2)
    
for no_train in no_train_arr:
    notraincounter += 1
    train = train_images[:no_train]
    train_l = train_labels[:no_train]
    bench_l = bench_labels[:no_train]
    lambda_opt = 1./(400*np.array([18,18,18,18,17,18,16,16,16,18,18,20,20,18,18])) #optimal lambdas found via cross validation
    lambd = lambda_opt[classifier]
    C_list = 1./(2*len(train)*lambda_opt)  #compute optimal C from optimal lambda
    C = C_list[classifier]
    
    
    # scikit-learn
    svm = SVC(C=C, kernel='linear')
    start = timeit.default_timer()
    svm.fit(train, bench_l)
    alphatimeslabels = svm.dual_coef_
    stop = timeit.default_timer()
    plot_array_ssp[0,notraincounter] = stop - start
    
    # wss1
    from smo_wss1 import smo
    start = timeit.default_timer()
    smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')
    stop = timeit.default_timer()
    plot_array_ssp[1,notraincounter] = stop - start
    
    # wss1 without K
    from smo_wss1_noK import smo
    start = timeit.default_timer()
    smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')
    stop = timeit.default_timer()
    plot_array_ssp[2,notraincounter] = stop - start
    
    # wss2
    from smo_wss2 import smo
    start = timeit.default_timer()
    smo(train,bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')
    stop = timeit.default_timer()
    plot_array_ssp[3,notraincounter] = stop - start
    
    # Platt
    from smo_platt import smo
    start = timeit.default_timer()
    smo(np.transpose(train),bench_l,C,kernel,tol,'no', kernel_identifier = 'standard scalar product')
    stop = timeit.default_timer()
    plot_array_ssp[4,notraincounter] = stop - start
    
np.savetxt('plot_array_gauss.txt', plot_array_gauss, delimiter=',')
np.savetxt('plot_array_ssp.txt', plot_array_ssp, delimiter=',')