Tweak documentation and tests

- adapt README.md to new interface - add some general documentation - increase test coverage - fix Mercer map: enforce that kernel matrix is considered as symmetric and thus eigenvalues are real - add pretty print for models
JuliaStats · Jun 29, 2014 · 76c8bec · 76c8bec
1 parent d0c68af
commit 76c8bec
Show file tree

Hide file tree

Showing 10 changed files with 75 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+<img src="http://bigcrunsh.github.io/images/logo.png" alt="RegERMs Logo" width="210" height="125"></img>
+
 RegERMs.jl
 ==========
 [![Build Status](https://travis-ci.org/BigCrunsh/RegERMs.jl.svg?branch=master)](https://travis-ci.org/BigCrunsh/RegERMs.jl)
@@ -10,18 +12,23 @@ This package implements several machine learning algorithms in a regularised emp
 Some examples:
 
 ```julia
-# define some toy data
-X = [1 1; 2 2;  1 -1]; # (3 examples with 2 features)
-y = [-1; -1; 1];       # binary class values for the 3 examples
-
-# choose SVM as learning algorithm (regularization parameter is 0.1)
-model = SVM(X, y, 0.1)
-
-# get solution
-w = optimize(model)
-
-# make predictions
-ybar = sign(X*w)
+# define some toy data (XOR - example)
+np = 100
+nn = 100
+X = [randn(int(np/2),1)+1 randn(int(np/2),1)+1; randn(int(np/2-0.5),1)-1 randn(int(np/2-0.5),1)-1;
+     randn(int(nn/2),1)+1 randn(int(nn/2),1)-1; randn(int(nn/2-0.5),1)-1 randn(int(nn/2-0.5),1)+1] # examples with 2 features
+y = vec([ones(np,1); -ones(nn,1)])       # binary class values
+
+# choose SVM as learning algorithm
+svm = SVM(X, y; kernel=:rbf)
+
+# get solution (regularization parameter is 0.1)
+regParam = 0.1
+model = optimize(svm, regParam)
+
+# make predictions and compute accuracy
+ybar = classify(model, X)
+acc = mean(ybar .== y)
 
 ```
 

diff --git a/TODO b/TODO
@@ -1,6 +1,6 @@
 The following issues should be addressed (not ordered by priority):
+- propagate kernel parameters
 - cross-validation and use it if regularisation parameter is not provided
-- dual optimization and kernel functions
 - sparse representation of data
 - multiclass version
 - documentation

diff --git a/doc/index.rst b/doc/index.rst
@@ -1,7 +1,7 @@
 RegERMs.jl
 ===================================
 
-Regularized empirical risk minimization (RegERM) is a general concept that defines a family of optimization problems in machine learning, as, e.g., Support Vector Machine, Logistic Regression, and Linear Regression. 
+Regularized empirical risk minimization (RegERM) is a general concept that defines a family of optimization problems in machine learning, as, e.g., Support Vector Machine, Logistic Regression, and Ridge Regression. 
 
 Contents:
 
@@ -18,6 +18,10 @@ Let :math:`{\bf x}_i` be a vector of features describing an instance i and :math
 
 The loss function :math:`\ell` measures the disagreement between the true label :math:`y` and the model prediction and the regularizer :math:`\Omega` penalizes the model's complexity.
 
+.. function:: optimize(method::RegERM, λ::Float64, optimizer::Symbol=:l_bfgs)
+
+	Perform the optimization of ``method`` for a given regularization parameter ``λ`` and return a prediction model that can be used for classification. Stochastic gradient descent (``:svg``) and Limited-memory BFGS (``:l_bfgs``) are valid optimizer. 
+
 Indices and tables
 ==================
 

diff --git a/doc/logistic_regression.rst b/doc/logistic_regression.rst
@@ -3,4 +3,8 @@ Logistic Regression
 
 Logistic regression models the relationship between an input variable :math:`{\bf x}` and a binary output variable :math:`y` by fitting a logistic function.
 
-Implements: ``optimize``, ``objective``
+.. function:: LogReg(X::Matrix, y::Vector; kernel::Symbol=:linear)
+
+	Initialize a logistic regression object with a data matrix :math:`{\bf X} \in \mathbb{R}^{n\times m}`, a label binary label vector :math:`{\bf y} \in \mathbb{R}^{n}` of :math:`n` :math:`m`-dimensional examples, and a kernel function.
+
+Implements: ``optimize``
diff --git a/doc/ridge_regression.rst b/doc/ridge_regression.rst
@@ -1,6 +1,10 @@
-Linear Regression
+Ridge Regression
 ===================================
 
-Linear regression models the relationship between an input variable :math:`{\bf x}` and a continous output variable :math:`y` by fitting a linear function.
+Ridge regression models the relationship between an input variable :math:`{\bf x}` and a continous output variable :math:`y` by fitting a linear function.
 
-Implements: ``optimize``, ``objective``
+.. function:: LinReg(X::Matrix, y::Vector; kernel::Symbol=:linear)
+
+	Initialize a ridge regression object with a data matrix :math:`{\bf X} \in \mathbb{R}^{n\times m}`, a label binary label vector :math:`{\bf y} \in \mathbb{R}^{n}` of :math:`n` :math:`m`-dimensional examples, and a kernel function.
+
+Implements: ``optimize``
diff --git a/doc/svm.rst b/doc/svm.rst
@@ -3,4 +3,8 @@ Support Vector Machine
 
 Support vector machines model the relationship between an input variable :math:`{\bf x}` and a continous output variable :math:`y` by finding a hyperplane separating examples belonging to different classes with maximal margin.
 
-Implements: ``optimize``, ``objective``
+.. function:: SVM(X::Matrix, y::Vector; kernel::Symbol=:linear)
+
+	Initialize an SVM object with a data matrix :math:`{\bf X} \in \mathbb{R}^{n\times m}`, a label binary label vector :math:`{\bf y} \in \mathbb{R}^{n}` of :math:`n` :math:`m`-dimensional examples, and a kernel function.
+
+Implements: ``optimize``
diff --git a/src/RegERMs.jl b/src/RegERMs.jl
@@ -53,6 +53,7 @@ function Base.show(io::IO, model::RegERM)
 	println(io, repeat("-", length(methodname(model))))
 	println(io, "number of examples:       $(model.n)")
 	println(io, "number of features:       $(model.m)")
+	println(io, "kernel function:          $(model.kernel)")
 end
 
 # include 

diff --git a/src/mercer_map.jl b/src/mercer_map.jl
@@ -15,7 +15,7 @@ function MercerMap(X::Matrix, kernel::Symbol)
     kernelfcn = eval(kernel)
     K = center(kernelfcn(X, X))
 
-    d, V = eig(K)
+    d, V = eig(Symmetric(K))
 
     # consider dimensions with eigenvalues > 1e-9
     i = d .> 1e-9

diff --git a/src/model.jl b/src/model.jl
@@ -30,6 +30,13 @@ end
 
 classify(model::PrimalModel, X::Matrix) = sign(X*model.w)
 
+# Pretty-print
+function Base.show(io::IO, model::PrimalModel)
+	println(io, "Primal Model")
+	println(io, repeat("-", length("Primal Model")))
+	println(io, "number of dimensions: $(length(model.w))")
+end
+
 ## Dual model
 
 # TOOD(cs): map should be immutable
@@ -38,4 +45,13 @@ type DualModel
 	map::MercerMap # dual model is implemented via Mercer map
 end
 
-classify(model::DualModel, X::Matrix)  = sign(apply(model.map, X)*model.w)
+classify(model::DualModel, X::Matrix)  = sign(apply(model.map, X)*model.w)
+
+# Pretty-print
+function Base.show(io::IO, model::DualModel)
+	println(io, "Dual Model")
+	println(io, repeat("-", length("Dual Model")))
+	println(io, "number of dimensions: $(length(model.w))")
+	println(io, "number of examples:   $(size(model.map.K,1))")
+	println(io, "kernel function:      $(model.map.kernel)")
+end
diff --git a/test/svm.jl b/test/svm.jl
@@ -19,8 +19,21 @@ show(IOBuffer(), SVM(X, y))
 @test_throws DimensionMismatch SVM(X', y) 
 @test_throws ArgumentError SVM(X, [3; 3; 2]) 
 
+# check automatic selection of model 
+X = [1 1; -1 -1;  1 -1; -1 1]
+y = [1; 1; -1; -1]
+model = optimize(SVM(X, y), 10.0, optimizer=:sgd)
+@test isa(model, PrimalModel)
+show(IOBuffer(), model)
+
+X = [1 1 -1 0.1; 2 2 0.5 2;  1 -1 -0.1 0.1]
+y = [-1; -1; 1]
+model = optimize(SVM(X, y), 10.0, optimizer=:sgd)
+@test isa(model, DualModel)
+show(IOBuffer(), model)
+
 # check kernelized solution
 X = [1 1; -1 -1;  1 -1; -1 1]
 y = [1; 1; -1; -1]
 model = optimize(SVM(X, y, kernel=:rbf), 0.1, optimizer=:l_bfgs)
-@test classify(model, X) == y
+@test classify(model, X) == y