HashMap.
@@ -742,10 +775,9 @@ public int compare(int i1, int i2) {
ByteString previousBSIdentifier = null;
out.writeInt(indexes.length);
out.writeInt(pruneCutoff);
-
for (int i = 0; i < indexes.length; ++i) {
Feature f = inverse.get(indexes[i]);
- previousClassName =
+ previousClassName =
f.lexWrite(out, this, previousClassName, previousPackage, previousClassifier,
previousSIdentifier, previousBSIdentifier);
previousPackage = f.getPackage();
@@ -757,7 +789,6 @@ else if (f.hasByteStringIdentifier())
out.writeInt(indexes[i]);
}
-
if (featureCounts == null)
out.writeInt(0);
else
@@ -801,14 +832,12 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
pruneCutoff = in.readInt();
lexicon = null;
lexiconInv = new FVector(N);
-
for (int i = 0; i < N; ++i) {
Feature f =
Feature.lexReadFeature(in, this, previousClass, previousPackage,
previousClassifier, previousSIdentifier, previousBSIdentifier);
int index = in.readInt();
lexiconInv.set(index, f);
-
previousClass = f.getClass();
previousPackage = f.getPackage();
previousClassifier = f.getGeneratingClassifier();
@@ -817,7 +846,7 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
else if (f.hasByteStringIdentifier())
previousBSIdentifier = f.getByteStringIdentifier();
}
-
+
if (readCounts) {
featureCounts = new IVector();
featureCounts.read(in);
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
index 106bb475..e0abd3ae 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java
@@ -16,6 +16,7 @@
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LinearThresholdUnitOptimizer;
import edu.illinois.cs.cogcomp.lbjava.util.FVector;
@@ -57,6 +58,7 @@
*
* @author Nick Rizzolo
**/
+@SuppressWarnings("serial")
public abstract class LinearThresholdUnit extends Learner {
/** Default for {@link #initialWeight}. */
public static final double defaultInitialWeight = 0;
@@ -68,6 +70,8 @@ public abstract class LinearThresholdUnit extends Learner {
public static final double defaultLearningRate = 0.1;
/** Default for {@link #weightVector}. */
public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
+ /** any weight less than this is considered irrelevant. This is for prunning. */
+ public static final double defaultFeaturePruningThreshold = 0.000001;
/**
* The rate at which weights are updated; default {@link #defaultLearningRate}.
@@ -100,6 +104,8 @@ public abstract class LinearThresholdUnit extends Learner {
protected double negativeThickness;
/** The label producing classifier's allowable values. */
protected String[] allowableValues;
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/**
* Default constructor. The learning rate and threshold take default values, while the name of
@@ -159,6 +165,21 @@ public LinearThresholdUnit(double r, double t, double pt, double nt) {
this("", r, t, pt, nt);
}
+ /**
+ * Use this constructor to fit a thick separator, where the positive and negative sides of the
+ * hyperplane will be given the specified separate thicknesses, while the name of the classifier
+ * gets the empty string.
+ *
+ * @param r The desired learning rate value.
+ * @param t The desired threshold value.
+ * @param pt The desired positive thickness.
+ * @param nt The desired negative thickness.
+ * @param fpt The feature pruning threshold.
+ */
+ public LinearThresholdUnit(double r, double t, double pt, double nt, double fpt) {
+ this("", r, t, pt, nt, fpt);
+ }
+
/**
* Initializing constructor. Sets the threshold, positive thickness, and negative thickness to
* their default values.
@@ -217,7 +238,21 @@ protected LinearThresholdUnit(String n, double r, double t, double pt) {
* @param nt The desired negative thickness.
**/
protected LinearThresholdUnit(String n, double r, double t, double pt, double nt) {
- this(n, r, t, pt, nt, (SparseWeightVector) defaultWeightVector.clone());
+ this(n, r, t, pt, nt, defaultFeaturePruningThreshold);
+ }
+
+ /**
+ * Takes the rate, threshold, positive thickness, and negative thickness and vector.
+ *
+ * @param n The name of the classifier.
+ * @param r The desired learning rate.
+ * @param t The desired value for the threshold.
+ * @param pt The desired positive thickness.
+ * @param nt The desired negative thickness.
+ * @param v An initial weight vector.
+ **/
+ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, SparseWeightVector v) {
+ this(n, r, t, pt, nt, defaultFeaturePruningThreshold, v);
}
/**
@@ -229,9 +264,25 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt
* @param t The desired value for the threshold.
* @param pt The desired positive thickness.
* @param nt The desired negative thickness.
+ * @param fpt The feature pruning threshold.
+ **/
+ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt) {
+ this(n, r, t, pt, nt, fpt, (SparseWeightVector) defaultWeightVector.clone());
+ }
+
+ /**
+ * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to
+ * the specified values.
+ *
+ * @param n The name of the classifier.
+ * @param r The desired learning rate.
+ * @param t The desired value for the threshold.
+ * @param pt The desired positive thickness.
+ * @param nt The desired negative thickness.
+ * @param fpt The feature pruning threshold.
* @param v An initial weight vector.
**/
- protected LinearThresholdUnit(String n, double r, double t, double pt, double nt,
+ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt,
SparseWeightVector v) {
super(n);
Parameters p = new Parameters();
@@ -240,6 +291,7 @@ protected LinearThresholdUnit(String n, double r, double t, double pt, double nt
p.learningRate = r;
p.positiveThickness = pt;
p.negativeThickness = nt;
+ p.featurePruningThreshold = fpt;
setParameters(p);
}
@@ -291,6 +343,7 @@ public void setParameters(Parameters p) {
bias = p.initialWeight;
positiveThickness = p.thickness + p.positiveThickness;
negativeThickness = p.thickness + p.negativeThickness;
+ featurePruningThreshold = p.featurePruningThreshold;
}
/**
@@ -307,6 +360,7 @@ public Learner.Parameters getParameters() {
p.threshold = threshold;
p.positiveThickness = positiveThickness;
p.negativeThickness = negativeThickness;
+ p.featurePruningThreshold = featurePruningThreshold;
return p;
}
@@ -531,6 +585,15 @@ public void initialize(int numExamples, int numFeatures) {
}
+ /**
+ * When training is complete, optimize the feature set by discarding low value
+ * weights.
+ */
+ public void doneTraining() {
+ super.doneTraining();
+ LinearThresholdUnitOptimizer ltuo = new LinearThresholdUnitOptimizer(this);
+ ltuo.optimize();
+ }
/**
* An LTU returns two scores; one for the negative classification and one for the positive
* classification. By default, the score for the positive classification is the result of
@@ -751,7 +814,8 @@ public static class Parameters extends Learner.Parameters {
public double positiveThickness;
/** The thickness of the hyperplane on the negative side; default 0. */
public double negativeThickness;
-
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/** Sets all the default values. */
public Parameters() {
@@ -760,6 +824,7 @@ public Parameters() {
initialWeight = defaultInitialWeight;
threshold = defaultThreshold;
thickness = defaultThickness;
+ featurePruningThreshold = defaultFeaturePruningThreshold;
}
@@ -781,6 +846,7 @@ public Parameters(Parameters p) {
thickness = p.thickness;
positiveThickness = p.positiveThickness;
negativeThickness = p.negativeThickness;
+ featurePruningThreshold = p.featurePruningThreshold;
}
@@ -801,7 +867,6 @@ public void setParameters(Learner l) {
**/
public String nonDefaultString() {
String result = super.nonDefaultString();
-
if (learningRate != LinearThresholdUnit.defaultLearningRate)
result += ", learningRate = " + learningRate;
if (initialWeight != LinearThresholdUnit.defaultInitialWeight)
@@ -814,10 +879,22 @@ public String nonDefaultString() {
result += ", positiveThickness = " + positiveThickness;
if (negativeThickness != 0)
result += ", negativeThickness = " + negativeThickness;
-
+ if (featurePruningThreshold != LinearThresholdUnit.defaultFeaturePruningThreshold)
+ result += ", featurePruningThreshold = " + featurePruningThreshold;
if (result.startsWith(", "))
result = result.substring(2);
return result;
}
}
+
+
+ /**
+ * Given the index of the weights to prune, discard them, then shrink the weight vector down
+ * to save memory.
+ * @param uselessfeatures the features being pruned.
+ * @param numberFeatures the total number of features before pruning.
+ */
+ public void pruneWeights(int[] uselessfeatures, int numberFeatures) {
+ this.getWeightVector().pruneWeights(uselessfeatures, numberFeatures);
+ }
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
index c9b74899..4e76223a 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java
@@ -61,6 +61,15 @@ public class SparseAveragedPerceptron extends SparsePerceptron {
* {@link SparseAveragedPerceptron.AveragedWeightVector}.
**/
protected AveragedWeightVector awv;
+
+ /**
+ * @return the awv the averaged weight vector
+ */
+ public AveragedWeightVector getAveragedWeightVector() {
+ return awv;
+ }
+
+
/** Keeps the extra information necessary to compute the averaged bias. */
protected double averagedBias;
@@ -729,5 +738,46 @@ public Object clone() {
public SparseWeightVector emptyClone() {
return new AveragedWeightVector();
}
+
+ /**
+ * If we prune worthless weights, we must also prune useless averages.
+ * @param uselessfeatures useless features.
+ * @param numfeatures since this weight vec does not know how many features there are, it must be passed in
+ */
+ public void pruneWeights(int[] uselessfeatures, int numfeatures) {
+ if (uselessfeatures.length == 0)
+ return;
+ super.pruneWeights(uselessfeatures, numfeatures);
+
+ // create a new smaller weight vector for the pruned weights.
+ int oldsize = this.averagedWeights.size();
+ if (oldsize > numfeatures) {
+ throw new RuntimeException("There was an averaged weight vector with more weights("+oldsize+
+ ") than the number of features("+numfeatures+")!");
+ }
+ int newsize = numfeatures - uselessfeatures.length;
+ double [] newvec = new double[newsize];
+
+ // copy the weights from the old vector.
+ int uselessindex = 0;
+ int newvecindex = 0;
+ for (int oldindex = 0; oldindex < oldsize; oldindex++) {
+ if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) {
+ // this is a useless feature, we will skip it.
+ uselessindex++;
+ } else {
+ newvec[newvecindex] = averagedWeights.get(oldindex);
+ newvecindex++;
+ }
+ }
+
+ // compress the array.
+ if (newvecindex != newsize) {
+ double[] tmp = new double[newvecindex];
+ System.arraycopy(newvec, 0, tmp, 0, newvecindex);;
+ newvec = tmp;
+ }
+ this.averagedWeights = new DVector(newvec);
+ }
}
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
index b63d0b0f..822fc1fd 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
@@ -10,6 +10,7 @@
import java.io.PrintStream;
import java.util.Collection;
import java.util.Iterator;
+import java.util.Map.Entry;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream;
@@ -19,6 +20,7 @@
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SparseNetworkOptimizer;
/**
* A SparseNetworkLearner uses multiple {@link LinearThresholdUnit}s to make a
@@ -44,7 +46,6 @@
public class SparseNetworkLearner extends Learner {
private static final long serialVersionUID = 1L;
-
/** Default for {@link #baseLTU}. */
public static final LinearThresholdUnit defaultBaseLTU = new SparseAveragedPerceptron();
@@ -70,7 +71,6 @@ public class SparseNetworkLearner extends Learner {
/** Whether or not this learner's labeler produces conjunctive features. */
protected boolean conjunctiveLabels;
-
/**
* Instantiates this multi-class learner with the default learning algorithm:
* {@link #defaultBaseLTU}.
@@ -185,7 +185,6 @@ public void setParameters(Parameters p) {
setLTU(p.baseLTU);
}
-
/**
* Retrieves the parameters that are set in this learner.
*
@@ -198,7 +197,6 @@ public Learner.Parameters getParameters() {
return p;
}
-
/**
* Sets the baseLTU variable. This method will not have any effect on the
* LTUs that already exist in the network. However, new LTUs created after this method is
@@ -211,7 +209,6 @@ public void setLTU(LinearThresholdUnit ltu) {
baseLTU.name = name + "$baseLTU";
}
-
/**
* Sets the labeler.
*
@@ -229,7 +226,6 @@ public void setLabeler(Classifier l) {
super.setLabeler(l);
}
-
/**
* Sets the extractor.
*
@@ -255,7 +251,6 @@ public void setNetworkLabel(int label) {
network.set(label, ltu);
}
-
/**
* Each example is treated as a positive example for the linear threshold unit associated with
* the label's value that is active for the example and as a negative example for all other
@@ -290,7 +285,17 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa
ltu.learn(exampleFeatures, exampleValues, l, labelValues);
}
}
-
+
+ /**
+ * When we complete learning, we will do an optimization.
+ */
+ public void doneTraining() {
+ super.doneTraining();
+
+ // do the optimization
+ SparseNetworkOptimizer optimizer = new SparseNetworkOptimizer(this);
+ optimizer.optimize();
+ }
/** Simply calls doneLearning() on every LTU in the network. */
public void doneLearning() {
@@ -304,14 +309,12 @@ public void doneLearning() {
}
}
-
/** Sets the number of examples and features. */
public void initialize(int ne, int nf) {
numExamples = ne;
numFeatures = nf;
}
-
/**
* Simply calls {@link LinearThresholdUnit#doneWithRound()} on every LTU in the network.
*/
@@ -326,14 +329,12 @@ public void doneWithRound() {
}
}
-
/** Clears the network. */
public void forget() {
super.forget();
network = new OVector();
}
-
/**
* Returns scores for only those labels in the given collection. If the given collection is
* empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit}
@@ -352,7 +353,6 @@ public ScoreSet scores(Object example, Collection candidates) {
return scores((int[]) exampleArray[0], (double[]) exampleArray[1], candidates);
}
-
/**
* Returns scores for only those labels in the given collection. If the given collection is
* empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit}
@@ -404,7 +404,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, Collection
return result;
}
-
/**
* This method is a surrogate for {@link #scores(int[],double[],Collection)} when the labeler is
* known to produce conjunctive features. It is necessary because when given a string label from
@@ -438,7 +437,6 @@ protected ScoreSet conjunctiveScores(int[] exampleFeatures, double[] exampleValu
return result;
}
-
/**
* Produces a set of scores indicating the degree to which each possible discrete classification
* value is associated with the given example object. These scores are just the scores of each
@@ -465,7 +463,6 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
return result;
}
-
/**
* Returns the classification of the given example as a single feature instead of a
* {@link FeatureVector}.
@@ -494,7 +491,6 @@ public Feature featureValue(int[] f, double[] v) {
return bestValue == -1 ? null : predictions.get(bestValue);
}
-
/**
* This implementation uses a winner-take-all comparison of the outputs from the individual
* linear threshold units' score methods.
@@ -507,7 +503,6 @@ public String discreteValue(int[] exampleFeatures, double[] exampleValues) {
return featureValue(exampleFeatures, exampleValues).getStringValue();
}
-
/**
* This implementation uses a winner-take-all comparison of the outputs from the individual
* linear threshold units' score methods.
@@ -520,7 +515,6 @@ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
return new FeatureVector(featureValue(exampleFeatures, exampleValues));
}
-
/**
* Using this method, the winner-take-all competition is narrowed to involve only those labels
* contained in the specified list. The list must contain only Strings.
@@ -535,7 +529,6 @@ public Feature valueOf(Object example, Collection candidates) {
return valueOf((int[]) exampleArray[0], (double[]) exampleArray[1], candidates);
}
-
/**
* Using this method, the winner-take-all competition is narrowed to involve only those labels
* contained in the specified list. The list must contain only Strings.
@@ -596,7 +589,6 @@ public Feature valueOf(int[] exampleFeatures, double[] exampleValues, Collection
return predictions.get(bestValue);
}
-
/**
* This method is a surrogate for {@link #valueOf(int[],double[],Collection)} when the labeler
* is known to produce conjunctive features. It is necessary because when given a string label
@@ -634,7 +626,6 @@ protected Feature conjunctiveValueOf(int[] exampleFeatures, double[] exampleValu
return predictions.get(bestValue);
}
-
/**
* Writes the algorithm's internal representation as text.
*
@@ -659,7 +650,6 @@ public void write(PrintStream out) {
out.close();
}
-
/**
* Writes the learned function's internal representation in binary form.
*
@@ -682,7 +672,6 @@ public void write(ExceptionlessOutputStream out) {
out.close();
}
-
/**
* Reads the binary representation of a learner with this object's run-time type, overwriting
* any and all learned or manually specified parameters as well as the label lexicon but without
@@ -700,7 +689,6 @@ public void read(ExceptionlessInputStream in) {
network.add(Learner.readLearner(in));
}
-
/** Returns a deep clone of this learning algorithm. */
public Object clone() {
SparseNetworkLearner clone = null;
@@ -727,7 +715,6 @@ public Object clone() {
return clone;
}
-
/**
* Simply a container for all of {@link SparseNetworkLearner}'s configurable parameters. Using
* instances of this class should make code more readable and constructors less complicated.
@@ -743,13 +730,11 @@ public static class Parameters extends Learner.Parameters {
**/
public LinearThresholdUnit baseLTU;
-
/** Sets all the default values. */
public Parameters() {
baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone();
}
-
/**
* Sets the parameters from the parent's parameters object, giving defaults to all
* parameters declared in this object.
@@ -759,14 +744,12 @@ public Parameters(Learner.Parameters p) {
baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone();
}
-
/** Copy constructor. */
public Parameters(Parameters p) {
super(p);
baseLTU = p.baseLTU;
}
-
/**
* Calls the appropriate Learner.setParameters(Parameters) method for this
* Parameters object.
@@ -777,7 +760,6 @@ public void setParameters(Learner l) {
((SparseNetworkLearner) l).setParameters(this);
}
-
/**
* Creates a string representation of these parameters in which only those parameters that
* differ from their default values are mentioned.
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
index defe1001..0353daba 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java
@@ -29,7 +29,9 @@
* @author Nick Rizzolo
**/
public class SparseWeightVector implements Cloneable, Serializable {
- /**
+ /** default. */
+ private static final long serialVersionUID = 1L;
+ /**
* When a feature appears in an example but not in this vector, it is assumed to have this
* weight.
**/
@@ -40,7 +42,7 @@ public class SparseWeightVector implements Cloneable, Serializable {
/** The weights in the vector indexed by their {@link Lexicon} key. */
protected DVector weights;
- /** Simply instantiates {@link #weights}. */
+ /** Simply instantiates {@link #weights}. */
public SparseWeightVector() {
this(new DVector(defaultCapacity));
}
@@ -106,6 +108,13 @@ public void setWeight(int featureIndex, double w, double defaultW) {
weights.set(featureIndex, w, defaultW);
}
+ /**
+ * For those cases where we need the raw weights (during model optimization).
+ * @return the unmolested weights.
+ */
+ public DVector getRawWeights() {
+ return weights;
+ }
/**
* Takes the dot product of this SparseWeightVector with the argument vector, using
@@ -317,7 +326,8 @@ public void toStringJustWeights(PrintStream out) {
* @param min Sets the minimum width for the textual representation of all features.
* @param lex The feature lexicon.
**/
- public void toStringJustWeights(PrintStream out, int min, Lexicon lex) {
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ public void toStringJustWeights(PrintStream out, int min, Lexicon lex) {
Map map = lex.getMap();
Map.Entry[] entries = (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]);
Arrays.sort(entries, new Comparator() {
@@ -420,7 +430,8 @@ public static SparseWeightVector readWeightVector(ExceptionlessInputStream in) {
String name = in.readString();
if (name == null)
return null;
- Class c = ClassUtils.getClass(name);
+ @SuppressWarnings("rawtypes")
+ Class c = ClassUtils.getClass(name);
SparseWeightVector result = null;
try {
@@ -482,4 +493,44 @@ public Object clone() {
public SparseWeightVector emptyClone() {
return new SparseWeightVector();
}
+
+ /**
+ * delete all irrelevant feature weights.
+ * @param uselessfeatures useless features.
+ * @param numfeatures since this weight vec does not know how many features there are, it must be passed in
+ */
+ public void pruneWeights(int[] uselessfeatures, int numfeatures) {
+ if (uselessfeatures.length == 0)
+ return;
+
+ // create a new smaller weight vector for the pruned weights.
+ int oldsize = weights.size();
+ if (oldsize > numfeatures) {
+ throw new RuntimeException("There was a weight vector with more weights("+oldsize+
+ ") than the number of features("+numfeatures+")!");
+ }
+ int newsize = numfeatures - uselessfeatures.length;
+ double [] newvec = new double[newsize];
+
+ // copy the weights from the old vector.
+ int uselessindex = 0;
+ int newvecindex = 0;
+ for (int oldindex = 0; oldindex < oldsize; oldindex++) {
+ if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) {
+ // this is a useless feature, we will skip it.
+ uselessindex++;
+ } else {
+ newvec[newvecindex] = weights.get(oldindex);
+ newvecindex++;
+ }
+ }
+
+ // compress the array.
+ if (newvecindex != newsize) {
+ double[] tmp = new double[newvecindex];
+ System.arraycopy(newvec, 0, tmp, 0, newvecindex);;
+ newvec = tmp;
+ }
+ this.weights = new DVector(newvec);
+ }
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
index aec40db5..2c930b57 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java
@@ -24,6 +24,7 @@
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SupportVectorMachineOptimizer;
import edu.illinois.cs.cogcomp.lbjava.util.ByteString;
import edu.illinois.cs.cogcomp.lbjava.util.FVector;
@@ -64,6 +65,10 @@
* @author Michael Paul
**/
public class SupportVectorMachine extends Learner {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
/** Default for {@link #solverType}. */
public static final String defaultSolverType = "L2LOSS_SVM";
/** Default for {@link #C}. */
@@ -72,6 +77,11 @@ public class SupportVectorMachine extends Learner {
public static final double defaultEpsilon = 0.1;
/** Default for {@link #bias}. */
public static final double defaultBias = 1.0;
+ /** any weight less than this is considered irrelevant. This is for prunning. */
+ public static final double defaultFeaturePruningThreshold = 0.000001;
+
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/**
* Keeps track of whether the doneLearning() warning message has been printed.
@@ -114,6 +124,14 @@ public class SupportVectorMachine extends Learner {
/** The number of bias features; there are either 0 or 1 of them. */
protected int biasFeatures;
+ /**
+ * @return the biasFeatures
+ */
+ public int getBiasFeatures() {
+ return biasFeatures;
+ }
+
+
/** Controls if liblinear-related messages are output */
protected boolean displayLL = false;
@@ -210,6 +228,20 @@ public SupportVectorMachine(double c, double e, double b, String s, boolean d) {
this("", c, e, b, s, d);
}
+ /**
+ * Initializing constructor. The name of the classifier gets the empty string.
+ *
+ * @param c The desired C value.
+ * @param e The desired epsilon value.
+ * @param b The desired bias.
+ * @param s The solver type.
+ * @param d Toggles if the liblinear-related output should be displayed.
+ * @param fpt the feature pruning threshold.
+ **/
+ public SupportVectorMachine(double c, double e, double b, String s, boolean d, double fpt) {
+ this("", c, e, b, s, d, fpt);
+ }
+
/**
* Initializing constructor. C, epsilon, the bias, and the solver type take the default values.
*
@@ -276,6 +308,20 @@ public SupportVectorMachine(String n, double c, double e, double b, String s) {
* @param d Toggles if the liblinear-related output should be displayed.
**/
public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d) {
+ this(n, c, e, b, s, d, SupportVectorMachine.defaultFeaturePruningThreshold);
+ }
+
+ /**
+ * Initializing constructor.
+ *
+ * @param n The name of the classifier.
+ * @param c The desired C value.
+ * @param e The desired epsilon value.
+ * @param b The desired bias.
+ * @param s The solver type.
+ * @param d Toggles if the liblinear-related output should be displayed.
+ **/
+ public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d, double fpt) {
super(n);
newLabelLexicon = labelLexicon;
Parameters p = new Parameters();
@@ -284,10 +330,12 @@ public SupportVectorMachine(String n, double c, double e, double b, String s, bo
p.bias = b;
p.solverType = s;
p.displayLL = d;
+ p.featurePruningThreshold = fpt;
allowableValues = new String[0];
setParameters(p);
}
+
/**
* Initializing constructor. Sets all member variables to their associated settings in the
* {@link SupportVectorMachine.Parameters} object. The name of the classifier gets the empty
@@ -317,9 +365,23 @@ public double[] getWeights() {
return weights;
}
+ /**
+ * @return the numFeatures
+ */
+ public int getNumFeatures() {
+ return numFeatures;
+ }
+
public int getNumClasses() {
return numClasses;
}
+
+ /**
+ * @return the solverType
+ */
+ public String getSolverType() {
+ return solverType;
+ }
/**
* Sets the values of parameters that control the behavior of this learning algorithm.
@@ -333,6 +395,7 @@ public void setParameters(Parameters p) {
biasFeatures = (bias >= 0) ? 1 : 0;
solverType = p.solverType;
displayLL = p.displayLL;
+ featurePruningThreshold = p.featurePruningThreshold;
}
@@ -349,6 +412,7 @@ public Learner.Parameters getParameters() {
p.bias = bias;
p.solverType = solverType;
p.displayLL = displayLL;
+ p.featurePruningThreshold = this.featurePruningThreshold;
return p;
}
@@ -403,6 +467,7 @@ public void initialize(int ne, int nf) {
* @param exampleLabels The example's array of label indices.
* @param labelValues The example's array of label values.
**/
+ @SuppressWarnings({ "unchecked", "rawtypes" })
public void learn(final int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
double[] labelValues) {
// Expand the size of the example arrays if they are full.
@@ -600,12 +665,22 @@ else if (newLabelLexicon.size() > labelLexicon.size()) {
weights = trainedModel.getFeatureWeights();
allExamples = null;
allLabels = null;
-
if (displayLL)
System.out.println(" Finished training at " + new Date());
}
+ /**
+ * Optimize the model by doing feature pruning, drop the low value weights.
+ */
+ public void doneTraining() {
+ super.doneTraining();
+
+ // optimize the resulting model by discarding low weight features.
+ SupportVectorMachineOptimizer svmo = new SupportVectorMachineOptimizer(this);
+ svmo.optimize();
+ }
+
/**
* Writes the algorithm's internal representation as text. In the first line of output, the name
* of the classifier is printed, followed by {@link #C}, {@link #epsilon}, {@link #bias}, and
@@ -895,7 +970,7 @@ public double score(int[] exampleFeatures, double[] exampleValues, int label) {
numClasses = 1;
label = 0;
}
-
+
for (int i = 0; i < exampleFeatures.length; i++) {
int f = exampleFeatures[i];
@@ -929,6 +1004,43 @@ public Feature valueOf(Object example, Collection candidates) {
}
+ /**
+ * Given the index of the weights to prune, discard them, then shrink the weight vector down
+ * to save memory.
+ * @param uselessfeatures the indices of the features being pruned.
+ * @param numberFeatures the total number of features before pruning.
+ */
+ public void pruneWeights(int[] uselessfeatures, int numberFeatures) {
+ int sz = numberFeatures - uselessfeatures.length;
+ double[] newweights = new double[sz+biasFeatures];
+ int nextToPrune = 0;
+ int newweightindex = 0;
+ for (int i = 0; i < weights.length; i++) {
+ if (nextToPrune < uselessfeatures.length && i == uselessfeatures[nextToPrune]) {
+ if (Math.abs(weights[i]) > this.featurePruningThreshold)
+ throw new IllegalArgumentException("Pruning a high value weight : "+weights[i]+" at "+i);
+ nextToPrune++;
+ } else {
+ if (newweightindex >= newweights.length)
+ throw new IllegalArgumentException("Attempted to overpopulate the new weight : indx="
+ +i+" features="+numberFeatures+" useless="+uselessfeatures.length);
+ newweights[newweightindex] = weights[i];
+ newweightindex++;
+ }
+ }
+
+ // do some sanity checks.
+ if (newweightindex != newweights.length)
+ throw new IllegalArgumentException("The new pruned weight vector was not fully populated!");
+ if (nextToPrune != uselessfeatures.length)
+ throw new IllegalArgumentException("Not all the prunable features were pruned!");
+
+ // all good, do the replacement.
+ System.out.println("SVM.pruneWeights: "+sz+" features, "+newweights.length+" weights size");
+ numFeatures = sz;
+ weights = newweights;
+ }
+
/**
* Using this method, the winner-take-all competition is narrowed to involve only those labels
* contained in the specified list. The list must contain only Strings.
@@ -1062,6 +1174,14 @@ public static class Parameters extends Learner.Parameters {
*
**/
public String solverType;
+
+ /**
+ * @return the solverType
+ */
+ public String getSolverType() {
+ return solverType;
+ }
+
/**
* The cost parameter C; default {@link SupportVectorMachine#defaultC}
**/
@@ -1081,7 +1201,9 @@ public static class Parameters extends Learner.Parameters {
* false
**/
public boolean displayLL;
-
+
+ /** feature pruning threshold caps magnitude of useful features. */
+ public double featurePruningThreshold;
/** Sets all the default values. */
public Parameters() {
@@ -1090,6 +1212,7 @@ public Parameters() {
epsilon = defaultEpsilon;
bias = defaultBias;
displayLL = false;
+ featurePruningThreshold = defaultFeaturePruningThreshold;
}
@@ -1104,6 +1227,7 @@ public Parameters(Learner.Parameters p) {
epsilon = defaultEpsilon;
bias = defaultBias;
displayLL = false;
+ featurePruningThreshold = defaultFeaturePruningThreshold;
}
@@ -1115,6 +1239,7 @@ public Parameters(Parameters p) {
epsilon = p.epsilon;
bias = p.bias;
displayLL = p.displayLL;
+ featurePruningThreshold = p.featurePruningThreshold;
}
@@ -1168,6 +1293,8 @@ public String nonDefaultString() {
result += ", epsilon = " + epsilon;
if (bias != SupportVectorMachine.defaultBias)
result += ", bias = " + bias;
+ if (featurePruningThreshold != defaultFeaturePruningThreshold)
+ result += ", feature pruning threshold = " + featurePruningThreshold;
if (result.startsWith(", "))
result = result.substring(2);
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java
new file mode 100644
index 00000000..f6a68d15
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java
@@ -0,0 +1,181 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature;
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature;
+import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+
+/**
+ * This class defines the life cycle methods for pruning useless features
+ * from a lexicon. Features for example that carry zero weights with them are
+ * not useful to the model, so can be eliminated saving space and execution time, without
+ * affecting accuracy (much).
+ * @author redman
+ */
+abstract public class LexiconOptimizer {
+
+ /** any weight less than this is considered irrelevant. This is for prunning. */
+ private static final double PRUNING_THRESHOLD = 0.000001;
+
+ /** lexicon contains the features we will operate on. */
+ protected Lexicon lexicon;
+
+ /** this also for testing, save feature names we will delete, check the names when we do. */
+ final protected ArrayListFor sparse learners, it is often the case that the array of features you learn + * contains only a subset of useful features. When we leave these features in the lexicon, + * we end up with bloated lexicons and weight vectors. This leads to larger than necessary + * models.
+ * + *This package contains an interface that defines the life cycle for the feature pruning + * process, as well as some implementations, one that takes multiple weight vectors (for + * multi-class network learners), and some that takes only one weight vector.
+ * + *All optimizers should subclass @see LexiconOptimizer which implements most of the + * optimization. Subclass will need to provide methods to compute the weight value to compare + * against the threshold, a method to identify the useless features, and a method to prune + * those features.
+ * + *The optimizers are invoked by the {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#doneTraining} + * method of the Learner class when all learning is complete. For those learners that include a feature + * pruning implementation, they must override this method to invoke the optimizer. In this way, during the + * normal LBJava compile and model build cycle, the optimization is performed automatically. For those + * who have build their own training procedure, they are required to invoke the doneTraining and + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#startTraining} method at appropriate points during + * their training process.
+ * + *The learner classes typically have a parameter that can be set to change the default feature + * pruning threshold to any the user might choose, or it can be set to 0.0 to disable.
+ * + *The pruning threshold value is provided by the specific learner, and should be, in one way or + * another, parameterized.
+ * @author redman + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; \ No newline at end of file diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java index 67545bf3..b530eb9a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java @@ -7,6 +7,7 @@ */ package edu.illinois.cs.cogcomp.lbjava.util; +import java.lang.reflect.Array; import java.util.Arrays; import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; @@ -190,6 +191,32 @@ public Feature remove(int i) { } + /** + * Remove all the features specfied by the indices. This is MUCH faster + * than removing them one at a time. + * + * @param indexes The indexes of the elements to remove. + **/ + public void remove(int[] indexes) { + Arrays.sort(indexes); + int sourceindex = 0; + int discardindex = 0; + for (int targetindex = 0; targetindex < size; targetindex++) { + if (discardindex < indexes.length && targetindex == indexes[discardindex]) { + // skip this one (by simply not coping it and not inc the sourceindex), inc discardindex + discardindex++; + } else { + vector[sourceindex] = vector[targetindex]; + sourceindex++; + } + } + if (discardindex != indexes.length) + // this should nver happen. + throw new RuntimeException("There was a problem removing some of the indexes!"); + size -= indexes.length; + } + + /** Returns the value of {@link #size}. */ public int size() { return size; diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java new file mode 100644 index 00000000..e5202d51 --- /dev/null +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java @@ -0,0 +1,20 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class SparseNetworkLearningPruneTest { + + @Test + public void test() { + } + +} diff --git a/pom.xml b/pom.xml index 19dea5ef..9ae2f769 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@