Permalink
Browse files

Add rprop+ and rprop- variants

  • Loading branch information...
1 parent ea60049 commit d0c9ee0d8308959bb7449f9690e4439802d9c144 @Jekub Jekub committed Apr 1, 2011
Showing with 21 additions and 7 deletions.
  1. +3 −1 doc/wapiti.1
  2. +2 −2 src/options.c
  3. +14 −4 src/rprop.c
  4. +2 −0 src/wapiti.c
View
@@ -172,13 +172,15 @@ This the more memory economical algorithm as it only requires to keep the featur
This flexibility has a price: don't use it if your features are not sparse, as it will be very slow in this case.
-.B rprop
+.B rprop (rprop+ / rprop-)
This algorithm use the gradient only to find a good search direction, not for choosing the step to make in that direction. It can be verry effective on some dataset.
Compared to quasi-newton methods, rprop reach the neighboorhood of the optimum more quickly but the lack of second order information and the restricted use of the first order one make the fine tunning slower.
Memory requirement are quite light as it require 4 vectors of the size of the feature set, and an additional vector for each thread after the first.
+The rprop- is a variant of rprop+ without backtracking, its performance compared to rprop+ is task dependent and it require one less vector so for very large model it can be better.
+
.SH DATAFILES
Data files are plain text files containing sequence separated by empty lines. Each sequence is a set of non-empty lines where each of these represents one position in the sequence.
View
@@ -56,7 +56,7 @@ static void opt_help(const char *pname) {
"\n"
"Training mode:\n"
" %1$s train [options] [input data] [model file]\n"
- " | --me force maxent mode\n"
+ "\t | --me force maxent mode\n"
"\t-a | --algo STRING training algorithm to use\n"
"\t-p | --pattern FILE patterns for extracting features\n"
"\t-m | --model FILE model file to preload\n"
@@ -84,7 +84,7 @@ static void opt_help(const char *pname) {
"\n"
"Labelling mode:\n"
" %1$s label [options] [input data] [output data]\n"
- " | --me force maxent mode\n"
+ "\t | --me force maxent mode\n"
"\t-m | --model FILE model file to load\n"
"\t-l | --label output only labels\n"
"\t-c | --check input is already labeled\n"
View
@@ -78,6 +78,7 @@ static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
const double stpmax = mdl->opt->rprop.stpmax;
const double stpinc = mdl->opt->rprop.stpinc;
const double stpdec = mdl->opt->rprop.stpdec;
+ const bool wbt = st->dlt != NULL;
const double rho1 = mdl->opt->rho1;
const bool l1 = rho1 != 0.0;
double *x = mdl->theta;
@@ -101,14 +102,19 @@ static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
// previous gradient values and update the weight. if
// there is l1 penalty, we have to project back the
// update in the choosen orthant.
- if (gp[f] * pg > 0.0) {
+ if (gp[f] * pg > 0.0)
stp[f] = min(stp[f] * stpinc, stpmax);
+ else if (gp[f] * pg < 0.0)
+ stp[f] = max(stp[f] * stpdec, stpmin);
+
+ if (!wbt) {
+ x[f] -= stp[f] * sign(g[f]);
+ } else if (gp[f] * pg > 0.0) {
dlt[f] = stp[f] * -sign(g[f]);
if (l1 && dlt[f] * pg >= 0.0)
dlt[f] = 0.0;
x[f] += dlt[f];
} else if (gp[f] * pg < 0.0) {
- stp[f] = max(stp[f] * stpdec, stpmin);
x[f] = x[f] - dlt[f];
g[f] = 0.0;
} else {
@@ -125,9 +131,11 @@ void trn_rprop(mdl_t *mdl) {
const size_t F = mdl->nftr;
const int K = mdl->opt->maxiter;
const size_t W = mdl->opt->nthread;
+ const bool wbt = strcmp(mdl->opt->algo, "rprop-");
// Allocate state memory and initialize it
double *g = xvm_new(F), *gp = xvm_new(F);
- double *stp = xvm_new(F), *dlt = xvm_new(F);
+ double *stp = xvm_new(F);
+ double *dlt = wbt ? xvm_new(F) : NULL;
for (unsigned f = 0; f < F; f++) {
gp[f] = 0.0;
stp[f] = 0.1;
@@ -158,7 +166,9 @@ void trn_rprop(mdl_t *mdl) {
}
// Free all allocated memory
xvm_free(g); xvm_free(gp);
- xvm_free(stp); xvm_free(dlt);
+ xvm_free(stp);
+ if (wbt)
+ xvm_free(dlt);
for (size_t w = 1; w < W; w++)
xvm_free(grds[w]->g);
for (size_t w = 0; w < W; w++)
View
@@ -60,6 +60,8 @@ static const struct {
{"sgd-l1", trn_sgdl1},
{"bcd", trn_bcd },
{"rprop", trn_rprop},
+ {"rprop+", trn_rprop},
+ {"rprop-", trn_rprop},
{"auto", trn_auto }
};
static const int trn_cnt = sizeof(trn_lst) / sizeof(trn_lst[0]);

0 comments on commit d0c9ee0

Please sign in to comment.