# SVM Comparison
This notebook runs a comparison between ThunderSVM, LibSVM, Pegasos, ~~liquidSVM~~ and Snacks on 3 binary classification datasets:
 - a9a
 - SUSY
 - HIGGS

In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
import utils
import time
import os
import sys
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
sys.path.append("../")

In [9]:
# Import all SVM solvers, data embedding functions and time method
from svm import Snacks
from pegasos import PegasosSVMClassifier
from thundersvm import *
from sklearn import svm

In [10]:
def prepare_data(data, num_centers, gamma):
    if data == "a9a":
        Xtr, Xts, Ytr, Yts = utils.dataloader('../datasets/a9a', 0.7)
    elif data == "SUSY":
        Xtr, Xts, Ytr, Yts = utils.dataloader('../datasets/SUSY', 0.7)
    elif data == "HIGGS":
        Xtr, Xts, Ytr, Yts = utils.dataloader('../datasets/HIGGS', 0.7)
    else:
        print(f"You asked for dataset {data} while Snacks only support a9a, SUSY and HIGGS")
    Xtr, Ytr, Xts, Yts = utils.kernel_embedding(Xtr, Ytr, Xts, Yts, False, num_centers, gamma = gamma)
    return Xtr, Ytr, Xts, Yts


In [11]:
def run_snacks(data, num_centers, gamma, nb_iterations, lambda_reg, stepsize):
    Xtr, Ytr, Xts, Yts = prepare_data(data, num_centers, gamma)
    model = Snacks(nb_iterations = nb_iterations, lambda_reg = lambda_reg, stepsize = stepsize)
    ts = time.time()
    model.fit(Xtr, Ytr)
    te = time.time()
    score = model.score(Xts, Yts)
    print(f"in {(te - ts):.2f}s, C-err is {100 - score * 100:.2f}%")
    t_fit, score = te - ts, 1 - score
    return t_fit, score

def run_sklearn(data, num_centers, gamma, lambda_reg):
    Xtr, Ytr, Xts, Yts = prepare_data(data, num_centers, gamma)
    C = 1 / (2 * Xtr.shape[0] * lambda_reg)
    model = svm.LinearSVC(loss = "hinge", C = C)
    ts = time.time()
    model.fit(Xtr, Ytr)
    te = time.time()
    score = model.score(Xts, Yts)
    print(f"in {(te - ts):.2f}s, C-err is {100 - score * 100:.2f}%")
    t_fit, score = te - ts, 1 - score
    return t_fit, score

def run_thundersvm(data, num_centers, gamma, lambda_reg):
    Xtr, Ytr, Xts, Yts = prepare_data(data, num_centers, gamma)
    C = 1 / (2 * Xtr.shape[0] * lambda_reg)
    model = SVC(kernel = "linear", C = C)
    ts = time.time()
    model.fit(Xtr, Ytr)
    te = time.time()
    score = model.score(Xts, Yts)
    print(f"in {(te - ts):.2f}s, C-err is {100 - score * 100:.2f}%")
    t_fit, score = te - ts, 1 - score
    return t_fit, score

def run_pegasos(data, num_centers, gamma, nb_iterations, lambda_reg):
    Xtr, Ytr, Xts, Yts = prepare_data(data, num_centers, gamma)
    C = 1 / (2 * Xtr.shape[0] * lambda_reg)
    model = PegasosSVMClassifier(iterations = nb_iterations, lambda_reg = lambda_reg)
    ts = time.time()
    model.fit(Xtr, Ytr)
    te = time.time()
    score = model.score(Xts, Yts)
    print(f"in {(te - ts):.2f}s, C-err is {100 - score * 100:.2f}%")
    t_fit, score = te - ts, 1 - score
    return t_fit, score

In [7]:
run_snacks("a9a", 1600, 1e-1, 45000, 1e-5, 1.)
run_sklearn("a9a", 1600, 1e-1, 1e-5)
run_pegasos("a9a", 1600, 1e-1, 45000 * 3, 1e-5)
# run_thundersvm("a9a", 1600, 1e-1, 1e-5)

in 0.42s, C-err is 15.18%




in 8.66s, C-err is 15.25%
in 1.52s, C-err is 17.01%


In [12]:
run_snacks("SUSY", 800, 5e-2, 45000, 1e-6, 1.)