From 10bdba4f140ae134e512058e0b9e310ba9dbe6a5 Mon Sep 17 00:00:00 2001 From: "Thomas L. Redman" Date: Fri, 25 Aug 2017 09:43:15 -0500 Subject: [PATCH 1/5] This includes the code to invoke feature pruning as well as the implementation. --- .gitignore | 1 + lbjava-examples/pom.xml | 6 +- lbjava-mvn-plugin/pom.xml | 4 +- lbjava/pom.xml | 2 +- .../edu/illinois/cs/cogcomp/lbjava/Train.java | 92 ++++----- .../cs/cogcomp/lbjava/learn/Learner.java | 33 +++- .../cs/cogcomp/lbjava/learn/Lexicon.java | 43 ++++- .../lbjava/learn/LinearThresholdUnit.java | 87 ++++++++- .../learn/SparseAveragedPerceptron.java | 50 +++++ .../lbjava/learn/SparseNetworkLearner.java | 44 ++--- .../lbjava/learn/SparseWeightVector.java | 59 +++++- .../lbjava/learn/SupportVectorMachine.java | 133 ++++++++++++- .../featurepruning/LexiconOptimizer.java | 181 ++++++++++++++++++ .../LinearThresholdUnitOptimizer.java | 144 ++++++++++++++ .../SparseNetworkOptimizer.java | 156 +++++++++++++++ .../SupportVectorMachineOptimizer.java | 171 +++++++++++++++++ .../learn/featurepruning/package-info.java | 19 ++ .../cs/cogcomp/lbjava/util/FVector.java | 27 +++ .../SparseNetworkLearningPruneTest.java | 20 ++ pom.xml | 2 +- 20 files changed, 1167 insertions(+), 107 deletions(-) create mode 100644 lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java create mode 100644 lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java create mode 100644 lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java create mode 100644 lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java create mode 100644 lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java create mode 100644 lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java diff --git a/.gitignore b/.gitignore index e60ae439..c12f7871 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,4 @@ lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/SpamC lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyFeatures.java lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyLabel.java lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/SGDClassifier.java +/.metadata/ diff --git a/lbjava-examples/pom.xml b/lbjava-examples/pom.xml index e236e380..cdef5169 100755 --- a/lbjava-examples/pom.xml +++ b/lbjava-examples/pom.xml @@ -3,7 +3,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.2.26 + 1.3.0 4.0.0 @@ -27,12 +27,12 @@ edu.illinois.cs.cogcomp LBJava - 1.2.26 + 1.3.0 edu.illinois.cs.cogcomp lbjava-maven-plugin - 1.2.26 + 1.3.0 diff --git a/lbjava-mvn-plugin/pom.xml b/lbjava-mvn-plugin/pom.xml index 52302723..151ad16e 100644 --- a/lbjava-mvn-plugin/pom.xml +++ b/lbjava-mvn-plugin/pom.xml @@ -5,7 +5,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.2.26 + 1.3.0 lbjava-maven-plugin @@ -76,7 +76,7 @@ edu.illinois.cs.cogcomp LBJava - 1.2.26 + 1.3.0 jar compile diff --git a/lbjava/pom.xml b/lbjava/pom.xml index 062fe6cf..9a311eaa 100644 --- a/lbjava/pom.xml +++ b/lbjava/pom.xml @@ -3,7 +3,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.2.26 + 1.3.0 4.0.0 diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java index ab0dd37c..15a28665 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java @@ -880,53 +880,57 @@ public void run() { if (!lce.onlyCodeGeneration) { // If there's a "from" clause, train. try { - if (lce.parser != null) { - System.out.println("Training " + getName()); - if (preExtract) { - preExtractAndPrune(); - System.gc(); - } else - learner.saveLexicon(); - int trainingRounds = 1; - - if (tuningParameters) { - String parametersPath = getName(); - if (Main.classDirectory != null) - parametersPath = - Main.classDirectory + File.separator + parametersPath; - parametersPath += ".p"; - - Learner.Parameters bestParameters = tune(); - trainingRounds = bestParameters.rounds; - Learner.writeParameters(bestParameters, parametersPath); - System.out.println(" " + getName() - + ": Training on entire training set"); - } else { - if (lce.rounds != null) - trainingRounds = Integer.parseInt(((Constant) lce.rounds).value); - - if (lce.K != null) { - int[] rounds = {trainingRounds}; - int k = Integer.parseInt(lce.K.value); - double alpha = Double.parseDouble(lce.alpha.value); - trainer.crossValidation(rounds, k, lce.splitPolicy, alpha, - testingMetric, true); + learner.beginTraining(); + try { + if (lce.parser != null) { + System.out.println("Training " + getName()); + if (preExtract) { + preExtractAndPrune(); + System.gc(); + } else + learner.saveLexicon(); + int trainingRounds = 1; + + if (tuningParameters) { + String parametersPath = getName(); + if (Main.classDirectory != null) + parametersPath = + Main.classDirectory + File.separator + parametersPath; + parametersPath += ".p"; + + Learner.Parameters bestParameters = tune(); + trainingRounds = bestParameters.rounds; + Learner.writeParameters(bestParameters, parametersPath); System.out.println(" " + getName() + ": Training on entire training set"); + } else { + if (lce.rounds != null) + trainingRounds = Integer.parseInt(((Constant) lce.rounds).value); + + if (lce.K != null) { + int[] rounds = {trainingRounds}; + int k = Integer.parseInt(lce.K.value); + double alpha = Double.parseDouble(lce.alpha.value); + trainer.crossValidation(rounds, k, lce.splitPolicy, alpha, + testingMetric, true); + System.out.println(" " + getName() + + ": Training on entire training set"); + } } - } - - trainer.train(lce.startingRound, trainingRounds); - - if (testParser != null) { - System.out.println("Testing " + getName()); - new Accuracy(true).test(learner, learner.getLabeler(), testParser); - } - - System.out.println("Writing " + getName()); - } else - learner.saveLexicon(); // Writes .lex even if lexicon is empty. - + trainer.train(lce.startingRound, trainingRounds); + } else + learner.saveLexicon(); // Writes .lex even if lexicon is empty. + } finally { + learner.doneTraining(); + } + + if (lce.parser != null && testParser != null) { + System.out.println("Testing " + getName()); + new Accuracy(true).test(learner, learner.getLabeler(), testParser); + } + + // save the final model. + System.out.println("Writing " + getName()); learner.save(); // Doesn't write .lex if lexicon is empty. } catch (Exception e) { System.err.println("LBJava ERROR: Exception while training " + getName() + ":"); diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java index 5705301a..1728143b 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java @@ -66,6 +66,9 @@ public abstract class Learner extends Classifier { /** The number of candidate examples when a global object is passed here. */ protected int candidates = 1; + + /** this is set while training. */ + protected boolean intraining = false; /** * This constructor is used by the LBJava compiler; it should never be called by a programmer. @@ -259,7 +262,6 @@ public URL getModelLocation() { return lcFilePath; } - /** * Sets the location of the lexicon as a regular file on this file system. * @@ -289,7 +291,6 @@ public URL getLexiconLocation() { return lexFilePath; } - /** * Establishes a new feature counting policy for this learner's lexicon. * @@ -304,7 +305,6 @@ public void countFeatures(Lexicon.CountPolicy policy) { lexicon.countFeatures(policy); } - /** * Returns this learner's feature lexicon after discarding any feature counts it may have been * storing. This method is likely only useful when the lexicon and its counts are currently @@ -320,7 +320,6 @@ public Lexicon getLexiconDiscardCounts() { return lexicon; } - /** * Returns a new, emtpy learner into which all of the parameters that control the behavior of * the algorithm have been copied. Here, "emtpy" means no learning has taken place. @@ -331,7 +330,6 @@ public Learner emptyClone() { return clone; } - /** * Trains the learning algorithm given an object as an example. By default, this simply converts * the example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])} @@ -345,7 +343,6 @@ public void learn(Object example) { (double[]) exampleArray[3]); } - /** * Trains the learning algorithm given a feature vector as an example. This simply converts the * example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])}. @@ -633,6 +630,15 @@ public double realValue(int[] f, double[] v) { + getClass().getName() + "'."); } + /** + * Start training, this might involve training many models, for cross validation, + * parameter tuning and so on. + **/ + public void beginTraining() { + intraining = true; + } + + /** * Overridden by subclasses to perform any required post-processing computations after all @@ -642,6 +648,21 @@ public double realValue(int[] f, double[] v) { public void doneLearning() {} + /** + * Overridden by subclasses to perform any required post-training computations optimizations, + * in particular, feature subset reduction. This default method does nothing. + */ + public void doneTraining() { + if (intraining) { + intraining = false; + } else { + throw new RuntimeException("calling doneLearning without previously calling beginTraining" + + " violates the lifecycle contract. Or perhaps the subclass does not call the superclass " + + "method. Contact the developer."); + } + } + + /** * This method is sometimes called before training begins, although it is not guaranteed to be * called at all. It allows the number of examples and number of features to be passed to the diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java index 1941b64f..bb55b6a7 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java @@ -9,12 +9,15 @@ import java.io.Serializable; import java.net.URL; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import edu.illinois.cs.cogcomp.core.datastructures.vectors.*; +import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils; import edu.illinois.cs.cogcomp.lbjava.util.FVector; @@ -305,7 +308,7 @@ public boolean contains(Feature f) { * * @param f The feature to look up. * @return The integer key that the feature maps to. - **/ + **/ public int lookup(Feature f) { return lookup(f, false, -1); } @@ -661,6 +664,36 @@ public void discardPrunedFeatures() { pruneCutoff = -1; } + /** + * Discard features at the provided indices. This operation is performed + * last to first so we can do it in place. This method will sort the input + * array. + * @param dumpthese the indexes of the features to dump. + */ + public void discardPrunedFeatures(int [] dumpthese) { + Arrays.sort(dumpthese); + lexiconInv.remove(dumpthese); + + // this compresses the FVector + lexiconInv = new FVector(lexiconInv); + if (lexicon != null) { + + // reconstitute the lexicon. + lexicon.clear(); + for (int i = 0; i < lexiconInv.size();i++) { + lexicon.put(lexiconInv.get(i), new Integer(i)); + } + + // sanity check, make sure the indices in the lexicon map matches the index in the feature vector + for (int i = 0; i < lexiconInv.size();i++) { + if (i != ((Integer)lexicon.get(lexiconInv.get(i))).intValue()) { + throw new RuntimeException("After optimization pruning, the index in the lexicon did " + + "not match the inverted index."); + } + } + } + } + /** * Returns a deep clone of this lexicon implemented as a HashMap. @@ -742,10 +775,9 @@ public int compare(int i1, int i2) { ByteString previousBSIdentifier = null; out.writeInt(indexes.length); out.writeInt(pruneCutoff); - for (int i = 0; i < indexes.length; ++i) { Feature f = inverse.get(indexes[i]); - previousClassName = + previousClassName = f.lexWrite(out, this, previousClassName, previousPackage, previousClassifier, previousSIdentifier, previousBSIdentifier); previousPackage = f.getPackage(); @@ -757,7 +789,6 @@ else if (f.hasByteStringIdentifier()) out.writeInt(indexes[i]); } - if (featureCounts == null) out.writeInt(0); else @@ -801,14 +832,12 @@ public void read(ExceptionlessInputStream in, boolean readCounts) { pruneCutoff = in.readInt(); lexicon = null; lexiconInv = new FVector(N); - for (int i = 0; i < N; ++i) { Feature f = Feature.lexReadFeature(in, this, previousClass, previousPackage, previousClassifier, previousSIdentifier, previousBSIdentifier); int index = in.readInt(); lexiconInv.set(index, f); - previousClass = f.getClass(); previousPackage = f.getPackage(); previousClassifier = f.getGeneratingClassifier(); @@ -817,7 +846,7 @@ public void read(ExceptionlessInputStream in, boolean readCounts) { else if (f.hasByteStringIdentifier()) previousBSIdentifier = f.getByteStringIdentifier(); } - + if (readCounts) { featureCounts = new IVector(); featureCounts.read(in); diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java index 106bb475..e0abd3ae 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java @@ -16,6 +16,7 @@ import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; +import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LinearThresholdUnitOptimizer; import edu.illinois.cs.cogcomp.lbjava.util.FVector; @@ -57,6 +58,7 @@ * * @author Nick Rizzolo **/ +@SuppressWarnings("serial") public abstract class LinearThresholdUnit extends Learner { /** Default for {@link #initialWeight}. */ public static final double defaultInitialWeight = 0; @@ -68,6 +70,8 @@ public abstract class LinearThresholdUnit extends Learner { public static final double defaultLearningRate = 0.1; /** Default for {@link #weightVector}. */ public static final SparseWeightVector defaultWeightVector = new SparseWeightVector(); + /** any weight less than this is considered irrelevant. This is for prunning. */ + public static final double defaultFeaturePruningThreshold = 0.000001; /** * The rate at which weights are updated; default {@link #defaultLearningRate}. @@ -100,6 +104,8 @@ public abstract class LinearThresholdUnit extends Learner { protected double negativeThickness; /** The label producing classifier's allowable values. */ protected String[] allowableValues; + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; /** * Default constructor. The learning rate and threshold take default values, while the name of @@ -159,6 +165,21 @@ public LinearThresholdUnit(double r, double t, double pt, double nt) { this("", r, t, pt, nt); } + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses, while the name of the classifier + * gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param fpt The feature pruning threshold. + */ + public LinearThresholdUnit(double r, double t, double pt, double nt, double fpt) { + this("", r, t, pt, nt, fpt); + } + /** * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to * their default values. @@ -217,7 +238,21 @@ protected LinearThresholdUnit(String n, double r, double t, double pt) { * @param nt The desired negative thickness. **/ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt) { - this(n, r, t, pt, nt, (SparseWeightVector) defaultWeightVector.clone()); + this(n, r, t, pt, nt, defaultFeaturePruningThreshold); + } + + /** + * Takes the rate, threshold, positive thickness, and negative thickness and vector. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An initial weight vector. + **/ + protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, SparseWeightVector v) { + this(n, r, t, pt, nt, defaultFeaturePruningThreshold, v); } /** @@ -229,9 +264,25 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt * @param t The desired value for the threshold. * @param pt The desired positive thickness. * @param nt The desired negative thickness. + * @param fpt The feature pruning threshold. + **/ + protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt) { + this(n, r, t, pt, nt, fpt, (SparseWeightVector) defaultWeightVector.clone()); + } + + /** + * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to + * the specified values. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param fpt The feature pruning threshold. * @param v An initial weight vector. **/ - protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, + protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt, SparseWeightVector v) { super(n); Parameters p = new Parameters(); @@ -240,6 +291,7 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt p.learningRate = r; p.positiveThickness = pt; p.negativeThickness = nt; + p.featurePruningThreshold = fpt; setParameters(p); } @@ -291,6 +343,7 @@ public void setParameters(Parameters p) { bias = p.initialWeight; positiveThickness = p.thickness + p.positiveThickness; negativeThickness = p.thickness + p.negativeThickness; + featurePruningThreshold = p.featurePruningThreshold; } /** @@ -307,6 +360,7 @@ public Learner.Parameters getParameters() { p.threshold = threshold; p.positiveThickness = positiveThickness; p.negativeThickness = negativeThickness; + p.featurePruningThreshold = featurePruningThreshold; return p; } @@ -531,6 +585,15 @@ public void initialize(int numExamples, int numFeatures) { } + /** + * When training is complete, optimize the feature set by discarding low value + * weights. + */ + public void doneTraining() { + super.doneTraining(); + LinearThresholdUnitOptimizer ltuo = new LinearThresholdUnitOptimizer(this); + ltuo.optimize(); + } /** * An LTU returns two scores; one for the negative classification and one for the positive * classification. By default, the score for the positive classification is the result of @@ -751,7 +814,8 @@ public static class Parameters extends Learner.Parameters { public double positiveThickness; /** The thickness of the hyperplane on the negative side; default 0. */ public double negativeThickness; - + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; /** Sets all the default values. */ public Parameters() { @@ -760,6 +824,7 @@ public Parameters() { initialWeight = defaultInitialWeight; threshold = defaultThreshold; thickness = defaultThickness; + featurePruningThreshold = defaultFeaturePruningThreshold; } @@ -781,6 +846,7 @@ public Parameters(Parameters p) { thickness = p.thickness; positiveThickness = p.positiveThickness; negativeThickness = p.negativeThickness; + featurePruningThreshold = p.featurePruningThreshold; } @@ -801,7 +867,6 @@ public void setParameters(Learner l) { **/ public String nonDefaultString() { String result = super.nonDefaultString(); - if (learningRate != LinearThresholdUnit.defaultLearningRate) result += ", learningRate = " + learningRate; if (initialWeight != LinearThresholdUnit.defaultInitialWeight) @@ -814,10 +879,22 @@ public String nonDefaultString() { result += ", positiveThickness = " + positiveThickness; if (negativeThickness != 0) result += ", negativeThickness = " + negativeThickness; - + if (featurePruningThreshold != LinearThresholdUnit.defaultFeaturePruningThreshold) + result += ", featurePruningThreshold = " + featurePruningThreshold; if (result.startsWith(", ")) result = result.substring(2); return result; } } + + + /** + * Given the index of the weights to prune, discard them, then shrink the weight vector down + * to save memory. + * @param uselessfeatures the features being pruned. + * @param numberFeatures the total number of features before pruning. + */ + public void pruneWeights(int[] uselessfeatures, int numberFeatures) { + this.getWeightVector().pruneWeights(uselessfeatures, numberFeatures); + } } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java index c9b74899..4e76223a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java @@ -61,6 +61,15 @@ public class SparseAveragedPerceptron extends SparsePerceptron { * {@link SparseAveragedPerceptron.AveragedWeightVector}. **/ protected AveragedWeightVector awv; + + /** + * @return the awv the averaged weight vector + */ + public AveragedWeightVector getAveragedWeightVector() { + return awv; + } + + /** Keeps the extra information necessary to compute the averaged bias. */ protected double averagedBias; @@ -729,5 +738,46 @@ public Object clone() { public SparseWeightVector emptyClone() { return new AveragedWeightVector(); } + + /** + * If we prune worthless weights, we must also prune useless averages. + * @param uselessfeatures useless features. + * @param numfeatures since this weight vec does not know how many features there are, it must be passed in + */ + public void pruneWeights(int[] uselessfeatures, int numfeatures) { + if (uselessfeatures.length == 0) + return; + super.pruneWeights(uselessfeatures, numfeatures); + + // create a new smaller weight vector for the pruned weights. + int oldsize = this.averagedWeights.size(); + if (oldsize > numfeatures) { + throw new RuntimeException("There was an averaged weight vector with more weights("+oldsize+ + ") than the number of features("+numfeatures+")!"); + } + int newsize = numfeatures - uselessfeatures.length; + double [] newvec = new double[newsize]; + + // copy the weights from the old vector. + int uselessindex = 0; + int newvecindex = 0; + for (int oldindex = 0; oldindex < oldsize; oldindex++) { + if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) { + // this is a useless feature, we will skip it. + uselessindex++; + } else { + newvec[newvecindex] = averagedWeights.get(oldindex); + newvecindex++; + } + } + + // compress the array. + if (newvecindex != newsize) { + double[] tmp = new double[newvecindex]; + System.arraycopy(newvec, 0, tmp, 0, newvecindex);; + newvec = tmp; + } + this.averagedWeights = new DVector(newvec); + } } } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java index b63d0b0f..822fc1fd 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java @@ -10,6 +10,7 @@ import java.io.PrintStream; import java.util.Collection; import java.util.Iterator; +import java.util.Map.Entry; import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; @@ -19,6 +20,7 @@ import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; +import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SparseNetworkOptimizer; /** * A SparseNetworkLearner uses multiple {@link LinearThresholdUnit}s to make a @@ -44,7 +46,6 @@ public class SparseNetworkLearner extends Learner { private static final long serialVersionUID = 1L; - /** Default for {@link #baseLTU}. */ public static final LinearThresholdUnit defaultBaseLTU = new SparseAveragedPerceptron(); @@ -70,7 +71,6 @@ public class SparseNetworkLearner extends Learner { /** Whether or not this learner's labeler produces conjunctive features. */ protected boolean conjunctiveLabels; - /** * Instantiates this multi-class learner with the default learning algorithm: * {@link #defaultBaseLTU}. @@ -185,7 +185,6 @@ public void setParameters(Parameters p) { setLTU(p.baseLTU); } - /** * Retrieves the parameters that are set in this learner. * @@ -198,7 +197,6 @@ public Learner.Parameters getParameters() { return p; } - /** * Sets the baseLTU variable. This method will not have any effect on the * LTUs that already exist in the network. However, new LTUs created after this method is @@ -211,7 +209,6 @@ public void setLTU(LinearThresholdUnit ltu) { baseLTU.name = name + "$baseLTU"; } - /** * Sets the labeler. * @@ -229,7 +226,6 @@ public void setLabeler(Classifier l) { super.setLabeler(l); } - /** * Sets the extractor. * @@ -255,7 +251,6 @@ public void setNetworkLabel(int label) { network.set(label, ltu); } - /** * Each example is treated as a positive example for the linear threshold unit associated with * the label's value that is active for the example and as a negative example for all other @@ -290,7 +285,17 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa ltu.learn(exampleFeatures, exampleValues, l, labelValues); } } - + + /** + * When we complete learning, we will do an optimization. + */ + public void doneTraining() { + super.doneTraining(); + + // do the optimization + SparseNetworkOptimizer optimizer = new SparseNetworkOptimizer(this); + optimizer.optimize(); + } /** Simply calls doneLearning() on every LTU in the network. */ public void doneLearning() { @@ -304,14 +309,12 @@ public void doneLearning() { } } - /** Sets the number of examples and features. */ public void initialize(int ne, int nf) { numExamples = ne; numFeatures = nf; } - /** * Simply calls {@link LinearThresholdUnit#doneWithRound()} on every LTU in the network. */ @@ -326,14 +329,12 @@ public void doneWithRound() { } } - /** Clears the network. */ public void forget() { super.forget(); network = new OVector(); } - /** * Returns scores for only those labels in the given collection. If the given collection is * empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit} @@ -352,7 +353,6 @@ public ScoreSet scores(Object example, Collection candidates) { return scores((int[]) exampleArray[0], (double[]) exampleArray[1], candidates); } - /** * Returns scores for only those labels in the given collection. If the given collection is * empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit} @@ -404,7 +404,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, Collection return result; } - /** * This method is a surrogate for {@link #scores(int[],double[],Collection)} when the labeler is * known to produce conjunctive features. It is necessary because when given a string label from @@ -438,7 +437,6 @@ protected ScoreSet conjunctiveScores(int[] exampleFeatures, double[] exampleValu return result; } - /** * Produces a set of scores indicating the degree to which each possible discrete classification * value is associated with the given example object. These scores are just the scores of each @@ -465,7 +463,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { return result; } - /** * Returns the classification of the given example as a single feature instead of a * {@link FeatureVector}. @@ -494,7 +491,6 @@ public Feature featureValue(int[] f, double[] v) { return bestValue == -1 ? null : predictions.get(bestValue); } - /** * This implementation uses a winner-take-all comparison of the outputs from the individual * linear threshold units' score methods. @@ -507,7 +503,6 @@ public String discreteValue(int[] exampleFeatures, double[] exampleValues) { return featureValue(exampleFeatures, exampleValues).getStringValue(); } - /** * This implementation uses a winner-take-all comparison of the outputs from the individual * linear threshold units' score methods. @@ -520,7 +515,6 @@ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { return new FeatureVector(featureValue(exampleFeatures, exampleValues)); } - /** * Using this method, the winner-take-all competition is narrowed to involve only those labels * contained in the specified list. The list must contain only Strings. @@ -535,7 +529,6 @@ public Feature valueOf(Object example, Collection candidates) { return valueOf((int[]) exampleArray[0], (double[]) exampleArray[1], candidates); } - /** * Using this method, the winner-take-all competition is narrowed to involve only those labels * contained in the specified list. The list must contain only Strings. @@ -596,7 +589,6 @@ public Feature valueOf(int[] exampleFeatures, double[] exampleValues, Collection return predictions.get(bestValue); } - /** * This method is a surrogate for {@link #valueOf(int[],double[],Collection)} when the labeler * is known to produce conjunctive features. It is necessary because when given a string label @@ -634,7 +626,6 @@ protected Feature conjunctiveValueOf(int[] exampleFeatures, double[] exampleValu return predictions.get(bestValue); } - /** * Writes the algorithm's internal representation as text. * @@ -659,7 +650,6 @@ public void write(PrintStream out) { out.close(); } - /** * Writes the learned function's internal representation in binary form. * @@ -682,7 +672,6 @@ public void write(ExceptionlessOutputStream out) { out.close(); } - /** * Reads the binary representation of a learner with this object's run-time type, overwriting * any and all learned or manually specified parameters as well as the label lexicon but without @@ -700,7 +689,6 @@ public void read(ExceptionlessInputStream in) { network.add(Learner.readLearner(in)); } - /** Returns a deep clone of this learning algorithm. */ public Object clone() { SparseNetworkLearner clone = null; @@ -727,7 +715,6 @@ public Object clone() { return clone; } - /** * Simply a container for all of {@link SparseNetworkLearner}'s configurable parameters. Using * instances of this class should make code more readable and constructors less complicated. @@ -743,13 +730,11 @@ public static class Parameters extends Learner.Parameters { **/ public LinearThresholdUnit baseLTU; - /** Sets all the default values. */ public Parameters() { baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone(); } - /** * Sets the parameters from the parent's parameters object, giving defaults to all * parameters declared in this object. @@ -759,14 +744,12 @@ public Parameters(Learner.Parameters p) { baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone(); } - /** Copy constructor. */ public Parameters(Parameters p) { super(p); baseLTU = p.baseLTU; } - /** * Calls the appropriate Learner.setParameters(Parameters) method for this * Parameters object. @@ -777,7 +760,6 @@ public void setParameters(Learner l) { ((SparseNetworkLearner) l).setParameters(this); } - /** * Creates a string representation of these parameters in which only those parameters that * differ from their default values are mentioned. diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java index defe1001..0353daba 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java @@ -29,7 +29,9 @@ * @author Nick Rizzolo **/ public class SparseWeightVector implements Cloneable, Serializable { - /** + /** default. */ + private static final long serialVersionUID = 1L; + /** * When a feature appears in an example but not in this vector, it is assumed to have this * weight. **/ @@ -40,7 +42,7 @@ public class SparseWeightVector implements Cloneable, Serializable { /** The weights in the vector indexed by their {@link Lexicon} key. */ protected DVector weights; - /** Simply instantiates {@link #weights}. */ + /** Simply instantiates {@link #weights}. */ public SparseWeightVector() { this(new DVector(defaultCapacity)); } @@ -106,6 +108,13 @@ public void setWeight(int featureIndex, double w, double defaultW) { weights.set(featureIndex, w, defaultW); } + /** + * For those cases where we need the raw weights (during model optimization). + * @return the unmolested weights. + */ + public DVector getRawWeights() { + return weights; + } /** * Takes the dot product of this SparseWeightVector with the argument vector, using @@ -317,7 +326,8 @@ public void toStringJustWeights(PrintStream out) { * @param min Sets the minimum width for the textual representation of all features. * @param lex The feature lexicon. **/ - public void toStringJustWeights(PrintStream out, int min, Lexicon lex) { + @SuppressWarnings({ "rawtypes", "unchecked" }) + public void toStringJustWeights(PrintStream out, int min, Lexicon lex) { Map map = lex.getMap(); Map.Entry[] entries = (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]); Arrays.sort(entries, new Comparator() { @@ -420,7 +430,8 @@ public static SparseWeightVector readWeightVector(ExceptionlessInputStream in) { String name = in.readString(); if (name == null) return null; - Class c = ClassUtils.getClass(name); + @SuppressWarnings("rawtypes") + Class c = ClassUtils.getClass(name); SparseWeightVector result = null; try { @@ -482,4 +493,44 @@ public Object clone() { public SparseWeightVector emptyClone() { return new SparseWeightVector(); } + + /** + * delete all irrelevant feature weights. + * @param uselessfeatures useless features. + * @param numfeatures since this weight vec does not know how many features there are, it must be passed in + */ + public void pruneWeights(int[] uselessfeatures, int numfeatures) { + if (uselessfeatures.length == 0) + return; + + // create a new smaller weight vector for the pruned weights. + int oldsize = weights.size(); + if (oldsize > numfeatures) { + throw new RuntimeException("There was a weight vector with more weights("+oldsize+ + ") than the number of features("+numfeatures+")!"); + } + int newsize = numfeatures - uselessfeatures.length; + double [] newvec = new double[newsize]; + + // copy the weights from the old vector. + int uselessindex = 0; + int newvecindex = 0; + for (int oldindex = 0; oldindex < oldsize; oldindex++) { + if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) { + // this is a useless feature, we will skip it. + uselessindex++; + } else { + newvec[newvecindex] = weights.get(oldindex); + newvecindex++; + } + } + + // compress the array. + if (newvecindex != newsize) { + double[] tmp = new double[newvecindex]; + System.arraycopy(newvec, 0, tmp, 0, newvecindex);; + newvec = tmp; + } + this.weights = new DVector(newvec); + } } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java index aec40db5..2c930b57 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java @@ -24,6 +24,7 @@ import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; +import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SupportVectorMachineOptimizer; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; import edu.illinois.cs.cogcomp.lbjava.util.FVector; @@ -64,6 +65,10 @@ * @author Michael Paul **/ public class SupportVectorMachine extends Learner { + /** + * + */ + private static final long serialVersionUID = 1L; /** Default for {@link #solverType}. */ public static final String defaultSolverType = "L2LOSS_SVM"; /** Default for {@link #C}. */ @@ -72,6 +77,11 @@ public class SupportVectorMachine extends Learner { public static final double defaultEpsilon = 0.1; /** Default for {@link #bias}. */ public static final double defaultBias = 1.0; + /** any weight less than this is considered irrelevant. This is for prunning. */ + public static final double defaultFeaturePruningThreshold = 0.000001; + + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; /** * Keeps track of whether the doneLearning() warning message has been printed. @@ -114,6 +124,14 @@ public class SupportVectorMachine extends Learner { /** The number of bias features; there are either 0 or 1 of them. */ protected int biasFeatures; + /** + * @return the biasFeatures + */ + public int getBiasFeatures() { + return biasFeatures; + } + + /** Controls if liblinear-related messages are output */ protected boolean displayLL = false; @@ -210,6 +228,20 @@ public SupportVectorMachine(double c, double e, double b, String s, boolean d) { this("", c, e, b, s, d); } + /** + * Initializing constructor. The name of the classifier gets the empty string. + * + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + * @param d Toggles if the liblinear-related output should be displayed. + * @param fpt the feature pruning threshold. + **/ + public SupportVectorMachine(double c, double e, double b, String s, boolean d, double fpt) { + this("", c, e, b, s, d, fpt); + } + /** * Initializing constructor. C, epsilon, the bias, and the solver type take the default values. * @@ -276,6 +308,20 @@ public SupportVectorMachine(String n, double c, double e, double b, String s) { * @param d Toggles if the liblinear-related output should be displayed. **/ public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d) { + this(n, c, e, b, s, d, SupportVectorMachine.defaultFeaturePruningThreshold); + } + + /** + * Initializing constructor. + * + * @param n The name of the classifier. + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + * @param d Toggles if the liblinear-related output should be displayed. + **/ + public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d, double fpt) { super(n); newLabelLexicon = labelLexicon; Parameters p = new Parameters(); @@ -284,10 +330,12 @@ public SupportVectorMachine(String n, double c, double e, double b, String s, bo p.bias = b; p.solverType = s; p.displayLL = d; + p.featurePruningThreshold = fpt; allowableValues = new String[0]; setParameters(p); } + /** * Initializing constructor. Sets all member variables to their associated settings in the * {@link SupportVectorMachine.Parameters} object. The name of the classifier gets the empty @@ -317,9 +365,23 @@ public double[] getWeights() { return weights; } + /** + * @return the numFeatures + */ + public int getNumFeatures() { + return numFeatures; + } + public int getNumClasses() { return numClasses; } + + /** + * @return the solverType + */ + public String getSolverType() { + return solverType; + } /** * Sets the values of parameters that control the behavior of this learning algorithm. @@ -333,6 +395,7 @@ public void setParameters(Parameters p) { biasFeatures = (bias >= 0) ? 1 : 0; solverType = p.solverType; displayLL = p.displayLL; + featurePruningThreshold = p.featurePruningThreshold; } @@ -349,6 +412,7 @@ public Learner.Parameters getParameters() { p.bias = bias; p.solverType = solverType; p.displayLL = displayLL; + p.featurePruningThreshold = this.featurePruningThreshold; return p; } @@ -403,6 +467,7 @@ public void initialize(int ne, int nf) { * @param exampleLabels The example's array of label indices. * @param labelValues The example's array of label values. **/ + @SuppressWarnings({ "unchecked", "rawtypes" }) public void learn(final int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, double[] labelValues) { // Expand the size of the example arrays if they are full. @@ -600,12 +665,22 @@ else if (newLabelLexicon.size() > labelLexicon.size()) { weights = trainedModel.getFeatureWeights(); allExamples = null; allLabels = null; - if (displayLL) System.out.println(" Finished training at " + new Date()); } + /** + * Optimize the model by doing feature pruning, drop the low value weights. + */ + public void doneTraining() { + super.doneTraining(); + + // optimize the resulting model by discarding low weight features. + SupportVectorMachineOptimizer svmo = new SupportVectorMachineOptimizer(this); + svmo.optimize(); + } + /** * Writes the algorithm's internal representation as text. In the first line of output, the name * of the classifier is printed, followed by {@link #C}, {@link #epsilon}, {@link #bias}, and @@ -895,7 +970,7 @@ public double score(int[] exampleFeatures, double[] exampleValues, int label) { numClasses = 1; label = 0; } - + for (int i = 0; i < exampleFeatures.length; i++) { int f = exampleFeatures[i]; @@ -929,6 +1004,43 @@ public Feature valueOf(Object example, Collection candidates) { } + /** + * Given the index of the weights to prune, discard them, then shrink the weight vector down + * to save memory. + * @param uselessfeatures the indices of the features being pruned. + * @param numberFeatures the total number of features before pruning. + */ + public void pruneWeights(int[] uselessfeatures, int numberFeatures) { + int sz = numberFeatures - uselessfeatures.length; + double[] newweights = new double[sz+biasFeatures]; + int nextToPrune = 0; + int newweightindex = 0; + for (int i = 0; i < weights.length; i++) { + if (nextToPrune < uselessfeatures.length && i == uselessfeatures[nextToPrune]) { + if (Math.abs(weights[i]) > this.featurePruningThreshold) + throw new IllegalArgumentException("Pruning a high value weight : "+weights[i]+" at "+i); + nextToPrune++; + } else { + if (newweightindex >= newweights.length) + throw new IllegalArgumentException("Attempted to overpopulate the new weight : indx=" + +i+" features="+numberFeatures+" useless="+uselessfeatures.length); + newweights[newweightindex] = weights[i]; + newweightindex++; + } + } + + // do some sanity checks. + if (newweightindex != newweights.length) + throw new IllegalArgumentException("The new pruned weight vector was not fully populated!"); + if (nextToPrune != uselessfeatures.length) + throw new IllegalArgumentException("Not all the prunable features were pruned!"); + + // all good, do the replacement. + System.out.println("SVM.pruneWeights: "+sz+" features, "+newweights.length+" weights size"); + numFeatures = sz; + weights = newweights; + } + /** * Using this method, the winner-take-all competition is narrowed to involve only those labels * contained in the specified list. The list must contain only Strings. @@ -1062,6 +1174,14 @@ public static class Parameters extends Learner.Parameters { * **/ public String solverType; + + /** + * @return the solverType + */ + public String getSolverType() { + return solverType; + } + /** * The cost parameter C; default {@link SupportVectorMachine#defaultC} **/ @@ -1081,7 +1201,9 @@ public static class Parameters extends Learner.Parameters { * false **/ public boolean displayLL; - + + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; /** Sets all the default values. */ public Parameters() { @@ -1090,6 +1212,7 @@ public Parameters() { epsilon = defaultEpsilon; bias = defaultBias; displayLL = false; + featurePruningThreshold = defaultFeaturePruningThreshold; } @@ -1104,6 +1227,7 @@ public Parameters(Learner.Parameters p) { epsilon = defaultEpsilon; bias = defaultBias; displayLL = false; + featurePruningThreshold = defaultFeaturePruningThreshold; } @@ -1115,6 +1239,7 @@ public Parameters(Parameters p) { epsilon = p.epsilon; bias = p.bias; displayLL = p.displayLL; + featurePruningThreshold = p.featurePruningThreshold; } @@ -1168,6 +1293,8 @@ public String nonDefaultString() { result += ", epsilon = " + epsilon; if (bias != SupportVectorMachine.defaultBias) result += ", bias = " + bias; + if (featurePruningThreshold != defaultFeaturePruningThreshold) + result += ", feature pruning threshold = " + featurePruningThreshold; if (result.startsWith(", ")) result = result.substring(2); diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java new file mode 100644 index 00000000..f6a68d15 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java @@ -0,0 +1,181 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature; +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; + +/** + * This class defines the life cycle methods for pruning useless features + * from a lexicon. Features for example that carry zero weights with them are + * not useful to the model, so can be eliminated saving space and execution time, without + * affecting accuracy (much). + * @author redman + */ +abstract public class LexiconOptimizer { + + /** any weight less than this is considered irrelevant. This is for prunning. */ + private static final double PRUNING_THRESHOLD = 0.000001; + + /** lexicon contains the features we will operate on. */ + protected Lexicon lexicon; + + /** this also for testing, save feature names we will delete, check the names when we do. */ + final protected ArrayList uselessFeatureNames = new ArrayList(); + + /** this is the threshold we use to discard useless features. */ + protected double threshold = PRUNING_THRESHOLD; + + /** + * We must have a lexicon to perform this operation. + * @param lexicon the lexicon object. + * @param threshold the feature pruning threshold. + */ + protected LexiconOptimizer(Lexicon lexicon, double threshold) { + this.lexicon = lexicon; + this.threshold = threshold; + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + abstract protected boolean hasWeight(Lexicon lex, Feature f); + + /** + * This method returns the number of features. This implementation assumes the + * lexicon is populated, but that's not always the case (with SVM for example appears + * to not always have a populated lexicon). In these cases, this method may be overriden. + * @return the number of featues. + */ + protected int getNumberFeatures() { + return lexicon.size(); + } + + /** + * do the optimization + */ + public void optimize () { + + int originalNumFeatures = this.getNumberFeatures(); + int [] uselessfeatures = identifyUselessFeatures(); + pruneWeights(uselessfeatures, originalNumFeatures); + pruneLexicon(uselessfeatures); + + System.out.println("LexiconOptimizer optimization complete, pruned " + +uselessfeatures.length+" features of "+originalNumFeatures+", leaving "+(originalNumFeatures - uselessfeatures.length)+ + " at threshold of "+threshold); + } + + /** + * @param f the feature. + * @return true if the feature is conjunctive. + */ + static private boolean isConjunctive(Feature f) { + return (f instanceof DiscreteConjunctiveFeature || f instanceof RealConjunctiveFeature); + } + + /** + * If this conjunctive feature has weight, add it and all it's children to the white list. + * @param lex the lexicon maps feature to index. + * @param whitelist the white list we will add to. + * @param f the conjunctive feature. + */ + private void traverseConjunctiveTree(HashSet whitelist, Feature f) { + + // add the conjunctive feature. + whitelist.add(f); + + if (f instanceof DiscreteConjunctiveFeature) { + + // add it's direct children + DiscreteConjunctiveFeature dcf = (DiscreteConjunctiveFeature) f; + whitelist.add(dcf.getLeft()); + whitelist.add(dcf.getRight()); + + // possible add any children of children. + if (isConjunctive(dcf.getLeft())) + traverseConjunctiveTree(whitelist, dcf.getLeft()); + if (isConjunctive(dcf.getRight())) + traverseConjunctiveTree(whitelist, dcf.getRight()); + } else { + + // add it's direct children + RealConjunctiveFeature rcf = (RealConjunctiveFeature) f; + whitelist.add(rcf.getLeft()); + whitelist.add(rcf.getRight()); + + // possible add any children of children. + if (isConjunctive(rcf.getLeft())) + traverseConjunctiveTree(whitelist, rcf.getLeft()); + if (isConjunctive(rcf.getRight())) + traverseConjunctiveTree(whitelist, rcf.getRight()); + } + } + + /** + * Find all features we must whitelist. For each conjunctive feature that has weight, we must keep + * all it's children, regardless of weight, and the rest of the tree from there on down. + * @param lex the lexicon. + * @return the conjunctive features. + */ + protected HashSet compileWhitelist(Lexicon lex) { + HashSet whitelist = new HashSet(); + for (Object e : lex.getMap().entrySet()) { + @SuppressWarnings("unchecked") + Entry entry = (Entry) e; + Feature f = entry.getKey(); + if (isConjunctive(f) && this.hasWeight(lex, f)) { + + // add this conjunctive feature and all it's kids to the whitelist. + traverseConjunctiveTree(whitelist, f); + } + } + return whitelist; + } + + + /** + * Given a list of useless features, prune the entries from the lexicon. + * @param uselessfeatures + */ + protected void pruneLexicon(int[] uselessfeatures) { + lexicon.discardPrunedFeatures(uselessfeatures); + for (Feature f : this.uselessFeatureNames) { + if (lexicon.contains(f)) { + throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier()); + } + } + } + + /** + * This method selects the features to be pruned. If weights + * are needed, they must be passed to the constructor and stored in fields of + * the implementing class. In this way, we make no assumptions about the + * structure of the weight classes. + * @return + */ + abstract protected int[] identifyUselessFeatures(); + + /** + * Once we have identified the useless entries, we need to optimize the + * model components. + * @param uselessfeatures the indices of those features with no significant weights. + * @param originalNumFeatures the number of features in the original lexicon. + */ + abstract public void pruneWeights(int[] uselessfeatures, int originalNumFeatures); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java new file mode 100644 index 00000000..a3b058f4 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java @@ -0,0 +1,144 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; +import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit; +import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron; +import gnu.trove.set.hash.TIntHashSet; + +/** + * This class will optimize any working LinearThresholdUnit or subclass by pruning + * low value features. + * @author redman + */ +public class LinearThresholdUnitOptimizer extends LexiconOptimizer { + + /** the LTU learner we want to optimize. */ + private LinearThresholdUnit ltuLearner; + + /** this also for testing, save feature names we will delete, check the names when we do. */ + final ArrayList uselessFeatureNames = new ArrayList(); + + /** + * Given the LTU learner to optimize. + * @param snl the LTU learner. + */ + public LinearThresholdUnitOptimizer(LinearThresholdUnit ltu) { + super(ltu.demandLexicon(), ltu.featurePruningThreshold); + ltuLearner = ltu; + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + protected boolean hasWeight(Lexicon lex, Feature f) { + int featureindex = lex.lookup(f); + + // we assume each element of the network is of the same type, if that type is sparse averaged + // perceptron, we check both the averaged and current weight + double sum; + if (this.ltuLearner instanceof SparseAveragedPerceptron) { + SparseAveragedPerceptron sap = (SparseAveragedPerceptron) this.ltuLearner; + double wt = sap.getWeightVector().getRawWeights().get(featureindex); + double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex); + sum = Math.abs(wt); + sum += Math.abs(avg); + } else { + double wt = this.ltuLearner.getWeightVector().getRawWeights().get(featureindex); + sum = Math.abs(wt); + } + + // if the value is sufficiently large, then we have a good weight and should keep. + if (sum > this.threshold) + return true; + else + return false; + } + + /** + * In this case, we must check, for each feature, the associated set of weight in each weight + * vector, if they are all very small, it is useless. The array returned is sorted ascending. + * @return the set of useless features. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures() + */ + @SuppressWarnings("unchecked") + @Override + protected int[] identifyUselessFeatures() { + Lexicon lex = this.ltuLearner.demandLexicon(); + if (lex != null) { + HashSet whitelist = compileWhitelist(lex); + + // we have the conjunctive features, if left, right, or the parent itself has a non zero weight, + // consider non of the features (parent, left or right) useless, whitelist them. + int count = 0; + int numberfeatures = lex.size(); + int[] all = new int[numberfeatures]; + TIntHashSet defunct = new TIntHashSet(); + for (Object e : lex.getMap().entrySet()) { + Entry entry = (Entry) e; + int fi = entry.getValue(); + if (!whitelist.contains(entry.getKey())) { + double wt = Math.abs(this.ltuLearner.getWeightVector().getRawWeights().get(fi)); + + // if the value is sufficiently large, then we have a good weight and should keep. + if (wt < this.threshold) { + + // This is a useless feature + all[count] = fi; + if (defunct.contains(fi)) { + System.err.println("There was a feature discarded twice during feature pruning!"); + } else { + defunct.add(fi); + } + this.uselessFeatureNames.add(entry.getKey()); + count++; + } + } + } + + int[] useless = new int[count]; + System.arraycopy(all, 0, useless, 0, count); + Arrays.sort(useless); + return useless; + } else + return new int[0]; + } + + /** + * Check it out when done, make sure it worked. + */ + protected void pruneLexicon(int[] uselessfeatures) { + super.pruneLexicon(uselessfeatures); + for (Feature f : this.uselessFeatureNames) { + if (lexicon.contains(f)) { + throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier()); + } + } + } + + /** + * Not we remove the useless weights from ALL weight vectors. There must be the same number + * of entries in each weight vector as there is in the lexicon. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[]) + */ + @Override + public void pruneWeights(int[] uselessfeatures, int origNumFeatures) { + this.ltuLearner.pruneWeights(uselessfeatures, origNumFeatures); + } +} \ No newline at end of file diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java new file mode 100644 index 00000000..f7fe2690 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java @@ -0,0 +1,156 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; +import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit; +import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron; +import edu.illinois.cs.cogcomp.lbjava.learn.SparseNetworkLearner; +import gnu.trove.set.hash.TIntHashSet; + +/** + * This class will optimize the SparseNetworkLearner by discarding all features + * associated with no sufficiently high weight values. + * @author redman + */ +public class SparseNetworkOptimizer extends LexiconOptimizer { + + /** the network learner we want to optimize. */ + private SparseNetworkLearner networkLearner; + + /** + * Given the sparse net learner to optimize. + * @param snl the sparse net learner. + */ + public SparseNetworkOptimizer(SparseNetworkLearner snl) { + super(snl.demandLexicon(), snl.getBaseLTU().featurePruningThreshold); + networkLearner = snl; + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + protected boolean hasWeight(Lexicon lex, Feature f) { + OVector net = networkLearner.getNetwork(); + if (net.size() == 0) + return false; + int numberclasses = net.size(); + int i = 0; + double sum = 0; + int featureindex = lex.lookup(f); + + // we assume each element of the network is of the same type, if that type is sparse averaged + // perceptron, we check both the averaged and current weight + if (net.get(0) instanceof SparseAveragedPerceptron) { + for (; i < numberclasses; ++i) { + SparseAveragedPerceptron sap = (SparseAveragedPerceptron) net.get(i); + double wt = sap.getWeightVector().getRawWeights().get(featureindex); + double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex); + sum += Math.abs(wt); + sum += Math.abs(avg); + + // if the value is sufficiently large, then we have a good weight and should keep. + if (sum > this.threshold) + return true; + } + } else { + for (; i < numberclasses; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) net.get(i); + double wt = ltu.getWeightVector().getRawWeights().get(featureindex); + sum += Math.abs(wt); + + // if the value is sufficiently large, then we have a good weight and should keep. + if (sum > this.threshold) + return true; + } + } + return false; + } + + /** + * In this case, we must check, for each feature, the associated set of weight in each weight + * vector, if they are all very small, it is useless. The array returned is sorted ascending. + * @return the set of useless features. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures() + */ + @SuppressWarnings("unchecked") + @Override + protected int[] identifyUselessFeatures() { + Lexicon lex = networkLearner.demandLexicon(); + if (lex != null) { + HashSet whitelist = compileWhitelist(lex); + + // we have the conjunctive features, if left, right, or the parent itself has a non zero weight, + // consider non of the features (parent, left or right) useless, whitelist them. + int count = 0; + int numberfeatures = lex.size(); + int numberclasses = networkLearner.getNetwork().size(); + int[] all = new int[numberfeatures]; + TIntHashSet defunct = new TIntHashSet(); + for (Object e : lex.getMap().entrySet()) { + Entry entry = (Entry) e; + int fi = entry.getValue(); + if (!whitelist.contains(entry.getKey())) { + int i = 0; + for (; i < numberclasses; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) networkLearner.getNetwork().get(i); + if (ltu == null) { + System.out.println("THERE WAS NO LTU AT " + i); + continue; + } + double wt = ltu.getWeightVector().getRawWeights().get(fi); + + // if the value is sufficiently large, then we have a good weight and should keep. + if ((wt > 0 && wt > this.threshold) || (wt < 0 && wt < -this.threshold)) + break; + } + if (i == numberclasses) { + all[count] = fi; + if (defunct.contains(fi)) { + System.err.println("There was a feature discarded twice during feature pruning!"); + } else { + defunct.add(fi); + } + this.uselessFeatureNames.add(entry.getKey()); + count++; + } + } + } + + int[] useless = new int[count]; + System.arraycopy(all, 0, useless, 0, count); + Arrays.sort(useless); + return useless; + } else + return new int[0]; + } + + /** + * Not we remove the useless weights from ALL weight vectors. There must be the same number + * of entries in each weight vector as there is in the lexicon. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[]) + */ + @Override + public void pruneWeights(int[] uselessfeatures, int origNumFeatures) { + OVector ltus = networkLearner.getNetwork(); + for (int i = 0; i < ltus.size(); i++) { + LinearThresholdUnit ltu = (LinearThresholdUnit) ltus.get(i); + ltu.pruneWeights(uselessfeatures, origNumFeatures); + } + } +} \ No newline at end of file diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java new file mode 100644 index 00000000..9642a89c --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java @@ -0,0 +1,171 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.*; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; +import edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine; + +/** + * Optimized a support vector machine by discarding any sufficiently low weights. + * @author redman + */ +public class SupportVectorMachineOptimizer extends LexiconOptimizer { + + /** the model we are going to optimize. */ + private SupportVectorMachine svm = null; + + /** the number of classes, if the numclasses is two, consider it binary and change to one. */ + public int numberclasses = -1; + + /** turn this on to produce TONS of diagnostics, lists what feature are pruned and what remain. */ + final private boolean debug = false; + + /** the biasfeatures are 0 for no added bias features, or 1 if bias is added. */ + public int biasfeatures = 0; + + /** + * Take lex and model, and optimize the model by pruning the weights. Any zero weights get pruned. + * @param lexicon the lexicon with the feature map. + * @param s the support vector machine. + */ + public SupportVectorMachineOptimizer(SupportVectorMachine s) { + super(s.demandLexicon(), s.featurePruningThreshold); + this.svm = s; + + // the numClasses field gets change in the write method to allow for the binary case + // which is actually two classes to behave as one class (binary). + if (!s.getSolverType().equals("MCSVM_CS") && s.getNumClasses() <= 2) + numberclasses = 1; + else + numberclasses = s.getNumClasses(); + + // we need to figure out if we have a bias feature introduced + this.biasfeatures = svm.getBiasFeatures(); + } + + /** + * When done, check the results to make sure none of the feature weights have changed. + */ + public void optimize () { + class FeatureFeatures { + Feature feature; + double processedweight; + double realweight; + FeatureFeatures (Feature feature, double pw, double rw, int i) { + this.feature = feature; + this.processedweight = pw; + this.realweight = rw; + } + public String toString() { + return this.feature.toStringNoPackage()+":"+processedweight+":"+realweight; + } + } + /* the feature weights are used to validate the result at the end. */ + ArrayList featureweights = new ArrayList<>(); + + // Get all the feature weights so we can make sure they line up when done. + for (int i = 0; i < lexicon.size();i++) + featureweights.add(new FeatureFeatures(lexicon.lookupKey(i),getWeight(i),svm.getWeights()[i], i)); + super.optimize(); + + // get each feature, if it's gone, make sure it sucks, if it's not, ensure it doesn't + int kept = 0; + int discarded = 0; + for (FeatureFeatures entry : featureweights) { + if (lexicon.contains(entry.feature)) { + int newindex = lexicon.lookup(entry.feature); + kept++; + if (debug) + System.out.println("Kept "+entry+" lexicon feature:"+lexicon.lookupKey(newindex).toStringNoPackage()+":"+svm.getWeights()[newindex]); + } else { + discarded++; + if (debug) + System.out.println("Discarded "+entry); + } + } + System.out.println("SVM optimization @ t="+this.threshold+" resulted in "+discarded+" discarded features of "+(discarded+kept)+" total features."); + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + protected boolean hasWeight(Lexicon lex, Feature f) { + int index = lex.lookup(f); + return getWeight(index) > this.threshold; + } + + /** + * Compute the single weight at the index as the sum of all weights for all classes. + * @param index the index of the feature + * @return the sum of the absolute value of all weights for the feature. + */ + private double getWeight(int index) { + double sum = 0; + for (int i = 0; i < this.numberclasses; i++) { + sum += Math.abs(svm.getWeights()[index]); + index += (this.lexicon.size() + biasfeatures); + } + return sum; + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures() + */ + @Override + protected int[] identifyUselessFeatures() { + + // compile the whitelist + HashSet whitelist = compileWhitelist(lexicon); + + // look at each feature in the lexicon, any with zero weights can be safely discarded. + int [] all = new int [this.lexicon.size()]; + int count = 0; + for (Object e : lexicon.getMap().entrySet()) { + @SuppressWarnings("unchecked") + Entry entry = (Entry) e; + if (!whitelist.contains(entry.getKey())) { + int fi = entry.getValue(); + double wt = getWeight(fi); + if (wt < this.threshold) { + all[count] = fi; + count++; + } + } + } + int[] useless = new int[count]; + System.arraycopy(all, 0, useless, 0, count); + Arrays.sort(useless); + return useless; + } + + /** + * This method returns the number of features. This implementation assumes the + * lexicon is populated, but that's not always the case (with SVM for example appears + * to not always have a populated lexicon). In these cases, this method may be overriden. + * @return the number of featues. + */ + protected int getNumberFeatures() { + return this.svm.getNumFeatures(); + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[], int) + */ + @Override + public void pruneWeights(int[] uselessfeatures, int originalNumFeatures) { + this.svm.pruneWeights(uselessfeatures, originalNumFeatures); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java new file mode 100644 index 00000000..36369b7b --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java @@ -0,0 +1,19 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +/** + *

+ * For sparse learners, it is often the case that the array of features you learn against + * contains only a subset of useful features. When we leave these features in the lexicon, + * we end up with bloated lexicons and weight vectors. This leads to larger than necessary + * models.

+ * This package contains an interface that defines the lifecycle for the pruning + * process, as well as some implementations, one that takes multiple weight vectors (for + * multi-class network learners), and one that takes only one weight vector. + * @author redman + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; \ No newline at end of file diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java index 67545bf3..b530eb9a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java @@ -7,6 +7,7 @@ */ package edu.illinois.cs.cogcomp.lbjava.util; +import java.lang.reflect.Array; import java.util.Arrays; import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; @@ -190,6 +191,32 @@ public Feature remove(int i) { } + /** + * Remove all the features specfied by the indices. This is MUCH faster + * than removing them one at a time. + * + * @param indexes The indexes of the elements to remove. + **/ + public void remove(int[] indexes) { + Arrays.sort(indexes); + int sourceindex = 0; + int discardindex = 0; + for (int targetindex = 0; targetindex < size; targetindex++) { + if (discardindex < indexes.length && targetindex == indexes[discardindex]) { + // skip this one (by simply not coping it and not inc the sourceindex), inc discardindex + discardindex++; + } else { + vector[sourceindex] = vector[targetindex]; + sourceindex++; + } + } + if (discardindex != indexes.length) + // this should nver happen. + throw new RuntimeException("There was a problem removing some of the indexes!"); + size -= indexes.length; + } + + /** Returns the value of {@link #size}. */ public int size() { return size; diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java new file mode 100644 index 00000000..e5202d51 --- /dev/null +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java @@ -0,0 +1,20 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class SparseNetworkLearningPruneTest { + + @Test + public void test() { + } + +} diff --git a/pom.xml b/pom.xml index 19dea5ef..9ae2f769 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ edu.illinois.cs.cogcomp lbjava-project pom - 1.2.26 + 1.3.0 lbjava From 156fd5ff275109b8453764d6eb623fba83fdb01d Mon Sep 17 00:00:00 2001 From: "Thomas L. Redman" Date: Fri, 25 Aug 2017 10:43:56 -0500 Subject: [PATCH 2/5] Fixed documentation, did some cleanup. --- .../LinearThresholdUnitOptimizer.java | 2 +- .../SparseNetworkOptimizer.java | 4 +- .../SupportVectorMachineOptimizer.java | 46 ------------------- .../learn/featurepruning/package-info.java | 19 ++++++-- 4 files changed, 20 insertions(+), 51 deletions(-) diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java index a3b058f4..44101b33 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java @@ -19,7 +19,7 @@ import gnu.trove.set.hash.TIntHashSet; /** - * This class will optimize any working LinearThresholdUnit or subclass by pruning + * This class will optimize any working LinearThresholdUnit subclass by pruning * low value features. * @author redman */ diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java index f7fe2690..9584b8a3 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java @@ -21,7 +21,9 @@ /** * This class will optimize the SparseNetworkLearner by discarding all features - * associated with no sufficiently high weight values. + * associated with no sufficiently high weight values. For the network learner, we + * much check the weights across all the binary learners to determin the value + * of a particular feature. * @author redman */ public class SparseNetworkOptimizer extends LexiconOptimizer { diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java index 9642a89c..5b1fa976 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java @@ -26,9 +26,6 @@ public class SupportVectorMachineOptimizer extends LexiconOptimizer { /** the number of classes, if the numclasses is two, consider it binary and change to one. */ public int numberclasses = -1; - /** turn this on to produce TONS of diagnostics, lists what feature are pruned and what remain. */ - final private boolean debug = false; - /** the biasfeatures are 0 for no added bias features, or 1 if bias is added. */ public int biasfeatures = 0; @@ -51,50 +48,7 @@ public SupportVectorMachineOptimizer(SupportVectorMachine s) { // we need to figure out if we have a bias feature introduced this.biasfeatures = svm.getBiasFeatures(); } - - /** - * When done, check the results to make sure none of the feature weights have changed. - */ - public void optimize () { - class FeatureFeatures { - Feature feature; - double processedweight; - double realweight; - FeatureFeatures (Feature feature, double pw, double rw, int i) { - this.feature = feature; - this.processedweight = pw; - this.realweight = rw; - } - public String toString() { - return this.feature.toStringNoPackage()+":"+processedweight+":"+realweight; - } - } - /* the feature weights are used to validate the result at the end. */ - ArrayList featureweights = new ArrayList<>(); - // Get all the feature weights so we can make sure they line up when done. - for (int i = 0; i < lexicon.size();i++) - featureweights.add(new FeatureFeatures(lexicon.lookupKey(i),getWeight(i),svm.getWeights()[i], i)); - super.optimize(); - - // get each feature, if it's gone, make sure it sucks, if it's not, ensure it doesn't - int kept = 0; - int discarded = 0; - for (FeatureFeatures entry : featureweights) { - if (lexicon.contains(entry.feature)) { - int newindex = lexicon.lookup(entry.feature); - kept++; - if (debug) - System.out.println("Kept "+entry+" lexicon feature:"+lexicon.lookupKey(newindex).toStringNoPackage()+":"+svm.getWeights()[newindex]); - } else { - discarded++; - if (debug) - System.out.println("Discarded "+entry); - } - } - System.out.println("SVM optimization @ t="+this.threshold+" resulted in "+discarded+" discarded features of "+(discarded+kept)+" total features."); - } - /** * Determine if the provided feature has sum of weights greater than a threshold value, * and discard the feature if it falls below. diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java index 36369b7b..00d9952a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java @@ -7,13 +7,26 @@ */ /** *

- * For sparse learners, it is often the case that the array of features you learn against + * For sparse learners, it is often the case that the array of features you learn * contains only a subset of useful features. When we leave these features in the lexicon, * we end up with bloated lexicons and weight vectors. This leads to larger than necessary * models.

- * This package contains an interface that defines the lifecycle for the pruning + * + * This package contains an interface that defines the life cycle for the pruning * process, as well as some implementations, one that takes multiple weight vectors (for - * multi-class network learners), and one that takes only one weight vector. + * multi-class network learners), and some that takes only one weight vector.

+ * + * All optimizers should subclass @see LexiconOptimizer which implements most of the + * optimization. Subclass will need to provide methods to compute the weight value to compare + * against the threshold, a method to identify the useless features, and a method to prune + * those features.

+ * + * The optimizers are invoked by the doneTraining method of the Learner class when all learning + * is complete. For those who have build their own training procedure, they are required to invoke + * the doneTraining and startTraining method during their training process.

+ * + * The pruning threshold value is provided by the specific learner, and should be, in one way or + * another, parameterized.

* @author redman */ package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; \ No newline at end of file From b0993ecc73fb76f4985d158525ebac778885bdc9 Mon Sep 17 00:00:00 2001 From: "Thomas L. Redman" Date: Fri, 25 Aug 2017 10:57:00 -0500 Subject: [PATCH 3/5] Hopefully fix an issue with the mvn plugin. --- lbjava-examples/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbjava-examples/pom.xml b/lbjava-examples/pom.xml index cdef5169..cebfb148 100755 --- a/lbjava-examples/pom.xml +++ b/lbjava-examples/pom.xml @@ -63,7 +63,7 @@ edu.illinois.cs.cogcomp lbjava-maven-plugin - 1.2.26 + 1.3.0 ${project.basedir}/src/main/java ${project.basedir}/target/classes From e7a2aeacc319b07b38939040381d1f9714ba0e0c Mon Sep 17 00:00:00 2001 From: "Thomas L. Redman" Date: Thu, 7 Sep 2017 11:41:22 -0500 Subject: [PATCH 4/5] Sparse net optimizer had a bit when used with SparseAveragedPerceptron, and other LTUs. --- .../SparseNetworkOptimizer.java | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java index 9584b8a3..2ed26cb2 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java @@ -95,33 +95,23 @@ protected boolean hasWeight(Lexicon lex, Feature f) { protected int[] identifyUselessFeatures() { Lexicon lex = networkLearner.demandLexicon(); if (lex != null) { - HashSet whitelist = compileWhitelist(lex); // we have the conjunctive features, if left, right, or the parent itself has a non zero weight, // consider non of the features (parent, left or right) useless, whitelist them. + HashSet whitelist = compileWhitelist(lex); int count = 0; int numberfeatures = lex.size(); - int numberclasses = networkLearner.getNetwork().size(); int[] all = new int[numberfeatures]; TIntHashSet defunct = new TIntHashSet(); + + // For each feature, determin it's value. We will interate over a map with features as key + // and the integer index of the feature. If the feature is whitelisted, we keep, otherwise + // check for uselessness and if so, add to the list. for (Object e : lex.getMap().entrySet()) { Entry entry = (Entry) e; - int fi = entry.getValue(); if (!whitelist.contains(entry.getKey())) { - int i = 0; - for (; i < numberclasses; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) networkLearner.getNetwork().get(i); - if (ltu == null) { - System.out.println("THERE WAS NO LTU AT " + i); - continue; - } - double wt = ltu.getWeightVector().getRawWeights().get(fi); - - // if the value is sufficiently large, then we have a good weight and should keep. - if ((wt > 0 && wt > this.threshold) || (wt < 0 && wt < -this.threshold)) - break; - } - if (i == numberclasses) { + int fi = entry.getValue(); + if (!hasWeight(lexicon, entry.getKey())) { all[count] = fi; if (defunct.contains(fi)) { System.err.println("There was a feature discarded twice during feature pruning!"); From 51ae957cb43d75e44afd173a098cb747e03b72ca Mon Sep 17 00:00:00 2001 From: "Thomas L. Redman" Date: Fri, 8 Sep 2017 09:24:18 -0500 Subject: [PATCH 5/5] Fixed up the docs a bit, and changes fixed an issue in the optimizer. --- .../SparseNetworkOptimizer.java | 5 ++-- .../learn/featurepruning/package-info.java | 30 +++++++++++-------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java index 2ed26cb2..45cfe812 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java @@ -67,7 +67,7 @@ protected boolean hasWeight(Lexicon lex, Feature f) { sum += Math.abs(avg); // if the value is sufficiently large, then we have a good weight and should keep. - if (sum > this.threshold) + if (sum >= this.threshold) return true; } } else { @@ -77,7 +77,7 @@ protected boolean hasWeight(Lexicon lex, Feature f) { sum += Math.abs(wt); // if the value is sufficiently large, then we have a good weight and should keep. - if (sum > this.threshold) + if (sum >= this.threshold) return true; } } @@ -118,6 +118,7 @@ protected int[] identifyUselessFeatures() { } else { defunct.add(fi); } + this.uselessFeatureNames.add(entry.getKey()); count++; } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java index 00d9952a..c9b185bc 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java @@ -6,27 +6,33 @@ * http://cogcomp.cs.illinois.edu/ */ /** - *

- * For sparse learners, it is often the case that the array of features you learn + *

For sparse learners, it is often the case that the array of features you learn * contains only a subset of useful features. When we leave these features in the lexicon, * we end up with bloated lexicons and weight vectors. This leads to larger than necessary - * models.

+ * models.

* - * This package contains an interface that defines the life cycle for the pruning + *

This package contains an interface that defines the life cycle for the feature pruning * process, as well as some implementations, one that takes multiple weight vectors (for - * multi-class network learners), and some that takes only one weight vector.

+ * multi-class network learners), and some that takes only one weight vector.

* - * All optimizers should subclass @see LexiconOptimizer which implements most of the + *

All optimizers should subclass @see LexiconOptimizer which implements most of the * optimization. Subclass will need to provide methods to compute the weight value to compare * against the threshold, a method to identify the useless features, and a method to prune - * those features.

+ * those features.

* - * The optimizers are invoked by the doneTraining method of the Learner class when all learning - * is complete. For those who have build their own training procedure, they are required to invoke - * the doneTraining and startTraining method during their training process.

+ *

The optimizers are invoked by the {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#doneTraining} + * method of the Learner class when all learning is complete. For those learners that include a feature + * pruning implementation, they must override this method to invoke the optimizer. In this way, during the + * normal LBJava compile and model build cycle, the optimization is performed automatically. For those + * who have build their own training procedure, they are required to invoke the doneTraining and + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#startTraining} method at appropriate points during + * their training process.

* - * The pruning threshold value is provided by the specific learner, and should be, in one way or - * another, parameterized.

+ *

The learner classes typically have a parameter that can be set to change the default feature + * pruning threshold to any the user might choose, or it can be set to 0.0 to disable.

+ * + *

The pruning threshold value is provided by the specific learner, and should be, in one way or + * another, parameterized.

* @author redman */ package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; \ No newline at end of file