<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -35,7 +35,7 @@
 (defn from-reader
    &quot;Returns a lazy sequence of examples parsed from the given Reader&quot;
    [reader]
-   (map parse (-&gt; reader BufferedReader. line-seq) (iterate inc 1)))
+   (pmap parse (-&gt; reader BufferedReader. line-seq) (iterate inc 1)))
 
 ;; --- Small RCV Data Set - 2000 examples ----
 (def rcv-small-file &quot;../data/train2000.dat.gz&quot;)   </diff>
      <filename>online/data.clj</filename>
    </modified>
    <modified>
      <diff>@@ -6,7 +6,7 @@
 (ns loss
    (:use vec (clojure.contrib profile)))
 
-(def *max-deriv* 100000)
+(def *max-deriv* 10)
 
 (defn deriv
    &quot;Returns the (clamped) derivative of the loss l evaluated at y v&quot;
@@ -26,7 +26,7 @@
 
 (defn exp
    ([y v]   (prof :exp (Math/pow Math/E (- (* y v)))))
-   ([y v _] (prof :dexp (- (* y (exp y v))))))
+   ([y v _] (prof :dexp (* (- y) (exp y v)))))
 
 (defn logistic
    ([y v]   (prof :logistic  (Math/log (+ 1 (exp y v)))))</diff>
      <filename>online/loss.clj</filename>
    </modified>
    <modified>
      <diff>@@ -1,3 +1,14 @@
+Using Different Losses
+======================
+
+It seems hinge loss outperforms logistic as a surrogate for 0-1 loss.
+Error rates for full training set and initial 1000 as test set:
+
+* Logistic:	6.1%
+* Hinge:  	3.9%
+
+(Notes: Projection frequency = 1, Max. derivative = 10)
+
 Profiling Notes
 ===============
 </diff>
      <filename>online/notes.md</filename>
    </modified>
    <modified>
      <diff>@@ -22,25 +22,23 @@
 
 ; Notes
 ; -----
-; * Parsing all of the examples in the full 781,000+ data set takes 2:40 on my
-;   2.66GHz 2Gb RAM iMac
 ;
 ; * Memory uses rises quickly to around 160Mb of real memory then stablises.
 ;
 ; To Do
 ; -----
-; * Optimise gradient calculations in SGD via rearranging calls to add &amp; scale.
+; [_] Improve the parsing speed so that lines are parsed char-by-char into maps.
 ;
-; * Improve the parsing speed so that lines are parsed char-by-char into maps.
+; [X] Optimise gradient calculations in SGD via rearranging calls to add &amp; scale.
 ;
-; * Make the model vector in SGD dense and update vector methods to handle
+; [X] Make the model vector in SGD dense and update vector methods to handle
 ;   sparse/dense updates. 
 
 (ns run
    (:use (clojure.contrib profile)))
 
 (ns clojure.contrib.profile)
-(def *enable-profiling* true)
+(def *enable-profiling* false)
 
 (ns run
    (:require data learner sgd loss))
@@ -51,7 +49,7 @@
 
 (def *loss* loss/hinge)
 (def *lambda* 0.0001)
-(def *projection-freq* 10)
+(def *projection-freq* 1)
 
 (time
 ;   (profile
@@ -59,4 +57,4 @@
          (sgd/make-learner *loss* *lambda* *projection-freq*) 
          (take (+ *num-train* *num-test*) (data/stdin))
          *num-test*    
-         *report-freq*)) ;)
+         *report-freq*));)</diff>
      <filename>online/run.clj</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>fa89732a9805bbbae2ae05a5d7dc16a02f7e29fa</id>
    </parent>
  </parents>
  <author>
    <name>mreid</name>
    <email>mark@reid.name</email>
  </author>
  <url>http://github.com/mreid/injuce/commit/60a5a78696b195868e2712bc98058b915ddf6017</url>
  <id>60a5a78696b195868e2712bc98058b915ddf6017</id>
  <committed-date>2009-10-09T03:00:14-07:00</committed-date>
  <authored-date>2009-10-09T03:00:14-07:00</authored-date>
  <message>Parsing now uses pmap for a 33% speed boost on my dual core machine.</message>
  <tree>00ca87a071636814e6b909ab7d48838ae02608de</tree>
  <committer>
    <name>mreid</name>
    <email>mark@reid.name</email>
  </committer>
</commit>
