forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bench_svm.py
164 lines (133 loc) · 4.33 KB
/
bench_svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
To run this, you'll need to have installed.
* pymvpa
* libsvm and it's python bindings
* scikit-learn (of course)
Does two benchmarks
First, we fix a training set, increase the number of
samples to classify and plot number of classified samples as a
function of time.
In the second benchmark, we increase the number of dimensions of the
training set, classify a sample and plot the time taken as a function
of the number of dimensions.
"""
import numpy as np
import pylab as pl
import gc
from datetime import datetime
# to store the results
scikit_results = []
svm_results = []
mvpa_results = []
mu_second = 0.0 + 10**6 # number of microseconds in a second
def bench_scikit(X, Y):
"""
bench with scikit-learn bindings on libsvm
"""
import scikits.learn
from scikits.learn.svm import SVC
gc.collect()
# start time
tstart = datetime.now()
clf = SVC(kernel='rbf')
clf.fit(X, Y).predict(X)
delta = (datetime.now() - tstart)
# stop time
scikit_results.append(delta.seconds + delta.microseconds/mu_second)
def bench_svm(X, Y):
"""
bench with swig-generated wrappers that come with libsvm
"""
import svmutil
X1 = X.tolist()
Y1 = Y.tolist()
gc.collect()
# start time
tstart = datetime.now()
problem = svmutil.svm_problem(Y1, X1)
param = svmutil.svm_parameter()
param.svm_type=0
param.kernel_type=2
model = svmutil.svm_train(problem, param)
svmutil.svm_predict([0]*len(X1), X1, model)
delta = (datetime.now() - tstart)
# stop time
svm_results.append(delta.seconds + delta.microseconds/mu_second)
def bench_pymvpa(X, Y):
"""
bench with pymvpa (by default uses a custom swig-generated wrapper
around libsvm)
"""
from mvpa.datasets import Dataset
from mvpa.clfs import svm
gc.collect()
# start time
tstart = datetime.now()
data = Dataset(samples=X, labels=Y)
clf = svm.RbfCSVMC(C=1.)
clf.train(data)
Z = clf.predict(X)
delta = (datetime.now() - tstart)
# stop time
mvpa_results.append(delta.seconds + delta.microseconds/mu_second)
if __name__ == '__main__':
n = 5
step = 100
n_samples = 200
dim = 200
for i in range(n):
print '============================================'
print 'Entering iteration %s of %s' % (i, n)
print '============================================'
n_samples += step
X = np.random.randn(n_samples, dim)
Y = np.random.randn(n_samples)
bench_scikit(X, Y)
bench_pymvpa(X, Y)
bench_svm(X, Y)
import pylab as pl
xx = range(0, n*step, step)
pl.figure(1)
pl.subplot(211)
pl.title('SVM with varying number of samples')
pl.plot(xx, mvpa_results, 'g-', label='pymvpa')
pl.plot(xx, svm_results, 'r-', label='libsvm (ctypes binding)')
pl.plot(xx, scikit_results, 'b-', label='scikit-learn')
pl.legend()
pl.xlabel('number of samples to classify')
pl.ylabel('time (in microseconds)')
# now do a bench where the number of points is fixed
# and the variable is the number of dimensions
from scikits.learn.datasets.samples_generator import friedman, \
sparse_uncorrelated
scikit_results = []
svm_results = []
mvpa_results = []
n = 10
step = 500
start_dim = 100
print '============================================'
print 'Warning: this is going to take a looong time'
print '============================================'
dim = start_dim
for i in range(0, n):
print '============================================'
print 'Entering iteration %s of %s' % (i, n)
print '============================================'
dim += step
X, Y = np.random.randn(100, dim), np.random.randn(100)
Y = (10*Y).astype(np.int)
bench_scikit(X, Y)
bench_svm(X, Y)
bench_pymvpa(X, Y)
xx = np.arange(start_dim, start_dim+n*step, step)
pl.subplot(212)
pl.title('Classification in high dimensional spaces')
pl.plot(xx, mvpa_results, 'g-', label='pymvpa')
pl.plot(xx, svm_results, 'r-', label='libsvm (ctypes binding)')
pl.plot(xx, scikit_results, 'b-', label='scikit-learn')
pl.legend()
pl.xlabel('number of dimensions')
pl.ylabel('time (in seconds)')
pl.axis('tight')
pl.show()