Use CMA-ES for polishing

MaxHalford · Jun 16, 2018 · 3045caf · 3045caf
1 parent 3b2d4fb
commit 3045caf
Show file tree

Hide file tree

Showing 9 changed files with 188 additions and 176 deletions.
diff --git a/cmd/xgp/cmd/fit.go b/cmd/xgp/cmd/fit.go
@@ -20,6 +20,7 @@ var (
 	fitLossMetricName string
 	fitEvalMetricName string
 	fitParsimonyCoeff float64
+	fitPolishBest     bool
 
 	// Function parameters
 	fitFuncs     string
@@ -32,15 +33,14 @@ var (
 	fitMaxHeight uint
 
 	// Genetic algorithm parameters
-	fitNPopulations       uint
-	fitNIndividuals       uint
-	fitNGenerations       uint
-	fitNPolishGenerations uint
-	fitPHoistMutation     float64
-	fitPSubtreeMutation   float64
-	fitPPointMutation     float64
-	fitPointMutationRate  float64
-	fitPSubtreeCrossover  float64
+	fitNPopulations      uint
+	fitNIndividuals      uint
+	fitNGenerations      uint
+	fitPHoistMutation    float64
+	fitPSubtreeMutation  float64
+	fitPPointMutation    float64
+	fitPointMutationRate float64
+	fitPSubtreeCrossover float64
 
 	// Other
 	fitSeed int64
@@ -67,6 +67,7 @@ func init() {
 	fitCmd.Flags().StringVarP(&fitLossMetricName, "loss", "", "mae", "metric used for scoring program; determines the task to perform")
 	fitCmd.Flags().StringVarP(&fitEvalMetricName, "eval", "", "", "metric used for monitoring progress; defaults to loss_metric if not provided")
 	fitCmd.Flags().Float64VarP(&fitParsimonyCoeff, "parsimony", "", 0.00001, "parsimony coefficient by which a program's height is multiplied to decrease it's fitness")
+	fitCmd.Flags().BoolVarP(&fitPolishBest, "polish", "", true, "whether or not to polish the best program")
 
 	fitCmd.Flags().StringVarP(&fitFuncs, "funcs", "", "add,sub,mul,div", "comma-separated set of authorised functions")
 	fitCmd.Flags().Float64VarP(&fitConstMin, "const_min", "", -5, "lower bound used for generating random constants")
@@ -80,7 +81,6 @@ func init() {
 	fitCmd.Flags().UintVarP(&fitNPopulations, "pops", "", 1, "number of populations used in the GA")
 	fitCmd.Flags().UintVarP(&fitNIndividuals, "indis", "", 100, "number of individuals used for each population in the GA")
 	fitCmd.Flags().UintVarP(&fitNGenerations, "gens", "", 30, "number of generations used in the GA")
-	fitCmd.Flags().UintVarP(&fitNPolishGenerations, "polish_gens", "", 0, "number of generations used to polish the best program")
 	fitCmd.Flags().Float64VarP(&fitPHoistMutation, "p_hoist_mut", "", 0.1, "probability of applying hoist mutation")
 	fitCmd.Flags().Float64VarP(&fitPSubtreeMutation, "p_sub_mut", "", 0.1, "probability of applying subtree mutation")
 	fitCmd.Flags().Float64VarP(&fitPPointMutation, "p_point_mut", "", 0.1, "probability of applying point mutation")
@@ -138,6 +138,7 @@ var fitCmd = &cobra.Command{
 			LossMetric:     lossMetric,
 			EvalMetric:     evalMetric,
 			ParsimonyCoeff: fitParsimonyCoeff,
+			PolishBest:     fitPolishBest,
 
 			Funcs:     fitFuncs,
 			ConstMin:  fitConstMin,
@@ -148,15 +149,14 @@ var fitCmd = &cobra.Command{
 			MinHeight: fitMinHeight,
 			MaxHeight: fitMaxHeight,
 
-			NPopulations:       fitNPopulations,
-			NIndividuals:       fitNIndividuals,
-			NGenerations:       fitNGenerations,
-			NPolishGenerations: fitNPolishGenerations,
-			PHoistMutation:     fitPHoistMutation,
-			PSubtreeMutation:   fitPSubtreeMutation,
-			PPointMutation:     fitPPointMutation,
-			PointMutationRate:  fitPointMutationRate,
-			PSubtreeCrossover:  fitPSubtreeCrossover,
+			NPopulations:      fitNPopulations,
+			NIndividuals:      fitNIndividuals,
+			NGenerations:      fitNGenerations,
+			PHoistMutation:    fitPHoistMutation,
+			PSubtreeMutation:  fitPSubtreeMutation,
+			PPointMutation:    fitPPointMutation,
+			PointMutationRate: fitPointMutationRate,
+			PSubtreeCrossover: fitPSubtreeCrossover,
 
 			RNG: rng,
 		}

diff --git a/config.go b/config.go
@@ -19,6 +19,7 @@ type Config struct {
 	LossMetric     metrics.Metric
 	EvalMetric     metrics.Metric
 	ParsimonyCoeff float64
+	PolishBest     bool
 	// Function parameters
 	Funcs     string
 	ConstMin  float64
@@ -29,15 +30,14 @@ type Config struct {
 	MinHeight uint
 	MaxHeight uint
 	// Genetic algorithm parameters
-	NPopulations       uint
-	NIndividuals       uint
-	NGenerations       uint
-	NPolishGenerations uint
-	PHoistMutation     float64
-	PSubtreeMutation   float64
-	PPointMutation     float64
-	PointMutationRate  float64
-	PSubtreeCrossover  float64
+	NPopulations      uint
+	NIndividuals      uint
+	NGenerations      uint
+	PHoistMutation    float64
+	PSubtreeMutation  float64
+	PPointMutation    float64
+	PointMutationRate float64
+	PSubtreeCrossover float64
 	// Other
 	RNG *rand.Rand
 }
@@ -51,6 +51,7 @@ func (c Config) String() string {
 			[]string{"Loss metric", c.LossMetric.String()},
 			[]string{"Evaluation metric", c.EvalMetric.String()},
 			[]string{"Parsimony coefficient", strconv.FormatFloat(c.ParsimonyCoeff, 'g', -1, 64)},
+			[]string{"Polish the best program", strconv.FormatBool(c.PolishBest)},
 
 			[]string{"Functions", c.Funcs},
 			[]string{"Constant minimum", strconv.FormatFloat(c.ConstMin, 'g', -1, 64)},
@@ -64,7 +65,6 @@ func (c Config) String() string {
 			[]string{"Number of populations", strconv.Itoa(int(c.NPopulations))},
 			[]string{"Number of individuals per population", strconv.Itoa(int(c.NIndividuals))},
 			[]string{"Number of generations", strconv.Itoa(int(c.NGenerations))},
-			[]string{"Number of tuning generations", strconv.Itoa(int(c.NPolishGenerations))},
 			[]string{"Hoist mutation probability", strconv.FormatFloat(c.PHoistMutation, 'g', -1, 64)},
 			[]string{"Subtree mutation probability", strconv.FormatFloat(c.PSubtreeMutation, 'g', -1, 64)},
 			[]string{"Point mutation probability", strconv.FormatFloat(c.PPointMutation, 'g', -1, 64)},
@@ -192,8 +192,10 @@ func (c Config) NewEstimator() (*Estimator, error) {
 // NewConfigWithDefaults returns a Config with default values.
 func NewConfigWithDefaults() Config {
 	return Config{
-		LossMetric: metrics.MeanSquaredError{},
-		EvalMetric: metrics.MeanSquaredError{},
+		LossMetric:     metrics.MeanSquaredError{},
+		EvalMetric:     metrics.MeanSquaredError{},
+		ParsimonyCoeff: 0,
+		PolishBest:     true,
 
 		Funcs:     "add,sub,mul,div",
 		ConstMin:  -5,
@@ -204,16 +206,13 @@ func NewConfigWithDefaults() Config {
 		PFull:     0.5,
 		PLeaf:     0.3,
 
-		NPopulations:       1,
-		NIndividuals:       100,
-		NGenerations:       30,
-		NPolishGenerations: 0,
-		PHoistMutation:     0.1,
-		PPointMutation:     0.1,
-		PSubtreeMutation:   0.1,
-		PointMutationRate:  0.3,
-		PSubtreeCrossover:  0.5,
-
-		ParsimonyCoeff: 0,
+		NPopulations:      1,
+		NIndividuals:      100,
+		NGenerations:      30,
+		PHoistMutation:    0.1,
+		PPointMutation:    0.1,
+		PSubtreeMutation:  0.1,
+		PointMutationRate: 0.3,
+		PSubtreeCrossover: 0.5,
 	}
 }
diff --git a/docs/how-it-works.md b/docs/how-it-works.md
@@ -81,12 +81,13 @@ type Program struct {
 }
 ```
 
-The `Estimator` gives the `Program` context about what it is it has to learn. The `Estimator` contains a `LossMetric` field with determines how to score each `Program` and if the task is classification or regression. The `Estimator` is also the global structure that organizes the programs and handles the learning process. If you want to use XGP with Go then you'll be working with the `Estimator` struct.  importantly
+The `Estimator` gives the `Program` context about what it is it has to learn. The `Estimator` contains a `LossMetric` field with determines how to score each `Program` and if the task is classification or regression. The `Estimator` is also the global structure that organizes the programs and handles the learning process. If you want to use XGP with Go then you'll be working with the `Estimator` struct. However you shouldn't directly instantiate an `Estimator`; instead you should use the `Config` struct where you can speficify training parameters before calling the `NewEstimator` method.
 
 The [`metrics` package](https://github.com/MaxHalford/xgp/tree/master/metrics) is a completely independent package that contains implementations of machine learning metrics (such as accuracy and logarithmic loss). In theory it could be traded for another package if something standardized comes up.
 
-XGP does a few fancy tricks to make it competitive:
+XGP does a few fancy tricks to be competitive:
 
-- Tree simplication: because programs are randomly modified it can occur that some parts of the program can be simplified. For example the formula `sum(mul(2, 3), 4)` can simply be replaced by `10`. In practice catching these simplifications and avoiding unnecessary computations helps a lot.
+- Tree simplication: because programs are randomly modified it can occur that some parts of the program can be simplified. For example the formula `add(mul(2, 3), 4)` can simply be replaced by `10`. In practice catching these simplifications and avoiding unnecessary computations helps a lot.
 - Regularization: [bloat](http://dces.essex.ac.uk/staff/poli/gp-field-guide/113Bloat.html) is an unavoidable problem in genetic program. As the generations go on the programs will have a tendency to grow in complexity. First of all this increases the running time. It also produces complex programs that tend to overfit. By default XGP uses a **parsimony coefficient** to penalize programs based on the number of operators they possess.
+- Constant optimisation: the constants of the best program are "polished" using [CMA-ES](https://www.wikiwand.com/en/CMA-ES). This usually takes a negligible amount of time and helps a lot in practice.
 - More coming!
diff --git a/docs/training-parameters.md b/docs/training-parameters.md
@@ -11,6 +11,7 @@ The following tables gives an overview of all the parameters that can be used fo
 | Loss metric; is used to if the task is classification or regression | `loss` | `LossMetricName` | `loss_metric` | mae (for Python `XGPClassifier` defaults to logloss) |
 | Evaluation metric | `eval` | `EvalMetricName` | `eval_metric` (in `fit`) | Same as loss metric |
 | Parsimony coefficient | `parsimony` | `ParsimonyCoefficient` | `parsimony_coeff` | 0.00001 |
+| Polish the best program | `polish` | `PolishBest` | `polish_best` | true |
 
 Because XGP doesn't require the loss metric to be differentiable you can use any loss metric available. If you don't specify an evaluation metric then it will default to using the loss metric.
 
@@ -36,7 +37,6 @@ These parameters are used to generate the initial set of programs. They will als
 | Number of populations | `pops` | `NPopulations` | `n_populations` | 1 |
 | Number of individuals per population | `indis` | `NIndividuals` | `n_individuals` | 50 |
 | Number of generations | `gens` | `NGenerations` | `n_generations` | 30 |
-| Number of polish generations | `polish_gens` | `NPolishGenerations` | `n_polish_generations` | 0 |
 | Hoist mutation probability | `p_hoist_mut` | `PHoistMutation` | `p_hoist_mutation` | 0.1 |
 | Subtree mutation probability | `p_sub_mut` | `PSubtreeMutation` | `p_sub_tree_mutation` | 0.1 |
 | Point mutation probability | `p_point_mut` | `PPointMutation` | `p_point_mutation` | 0.1 |

diff --git a/estimator.go b/estimator.go
@@ -24,7 +24,6 @@ type Estimator struct {
 	Functions        []op.Operator
 	Initializer      Initializer
 	GA               *gago.GA
-	PolishGA         *gago.GA
 	PointMutation    PointMutation
 	SubtreeMutation  SubtreeMutation
 	HoistMutation    HoistMutation
@@ -50,6 +49,56 @@ func (est Estimator) BestProgram() Program {
 	return *est.GA.HallOfFame[0].Genome.(*Program)
 }
 
+func (est Estimator) progress(start time.Time) string {
+	// Add time spent
+	var message = fmtDuration(time.Since(start))
+	// Add training error
+	var (
+		best            = est.BestProgram()
+		yTrainPred, err = best.Predict(est.XTrain, est.EvalMetric.NeedsProbabilities())
+	)
+	if err != nil {
+		return ""
+	}
+	trainScore, err := est.EvalMetric.Apply(est.YTrain, yTrainPred, nil)
+	if err != nil {
+		return ""
+	}
+	message += fmt.Sprintf(", train %s: %.5f", est.EvalMetric.String(), trainScore)
+	// Add validation error
+	if est.XVal != nil && est.YVal != nil {
+		yEvalPred, err := best.Predict(est.XVal, est.EvalMetric.NeedsProbabilities())
+		if err != nil {
+			return ""
+		}
+		evalScore, err := est.EvalMetric.Apply(est.YVal, yEvalPred, est.WVal)
+		if err != nil {
+			return ""
+		}
+		message += fmt.Sprintf(", val %s: %.5f", est.EvalMetric.String(), evalScore)
+	}
+	return message
+}
+
+// polishBest takes the best Program and polishes it.
+func (est *Estimator) polishBest() error {
+	var (
+		best          = *est.GA.HallOfFame[0].Genome.(*Program)
+		polished, err = polishProgram(best)
+	)
+	if err != nil {
+		return err
+	}
+	fitness, err := polished.Evaluate()
+	if err != nil {
+		return err
+	}
+	if fitness < est.GA.HallOfFame[0].Fitness {
+		est.GA.HallOfFame[0].Genome = &polished
+	}
+	return nil
+}
+
 // Fit an Estimator to a dataset.
 func (est *Estimator) Fit(
 	// Required arguments
@@ -97,76 +146,48 @@ func (est *Estimator) Fit(
 		var start = time.Now()
 		progress = uiprogress.New()
 		progress.Start()
-		bar = progress.AddBar(int(est.NGenerations))
+		var steps = int(est.NGenerations)
+		if est.PolishBest {
+			steps++
+		}
+		bar = progress.AddBar(steps)
 		bar.PrependCompleted()
 		bar.AppendFunc(func(b *uiprogress.Bar) string {
-			// Add time spent
-			var message = fmtDuration(time.Since(start))
-			// Add training error
-			var (
-				best            = est.BestProgram()
-				yTrainPred, err = best.Predict(est.XTrain, est.EvalMetric.NeedsProbabilities())
-			)
-			if err != nil {
-				return ""
-			}
-			trainScore, err := est.EvalMetric.Apply(est.YTrain, yTrainPred, nil)
-			if err != nil {
-				return ""
-			}
-			message += fmt.Sprintf(", train %s: %.5f", est.EvalMetric.String(), trainScore)
-			// Add validation error
-			if est.XVal != nil && est.YVal != nil {
-				yEvalPred, err := best.Predict(est.XVal, est.EvalMetric.NeedsProbabilities())
-				if err != nil {
-					return ""
-				}
-				evalScore, err := est.EvalMetric.Apply(est.YVal, yEvalPred, est.WVal)
-				if err != nil {
-					return ""
-				}
-				message += fmt.Sprintf(", val %s: %.5f", est.EvalMetric.String(), evalScore)
-			}
-			return message
+			return est.progress(start)
 		})
 	}
 
+	// Make sure the progress bar will stop
+	if verbose {
+		defer func() { progress.Stop() }()
+	}
+
 	for i := uint(0); i < est.NGenerations; i++ {
+		// Update progress
 		if verbose {
 			bar.Incr()
 		}
-
-		// Make sure each tree has at least a height of 2
-		/*for j, pop := range est.GA.Populations {
-			for k, indi := range pop.Individuals {
-				var prog = indi.Genome.(*Program)
-				if prog.Tree.Height() < 2 { // MAGIC
-					est.SubtreeMutation.Apply(&prog.Tree, pop.RNG)
-					est.GA.Populations[j].Individuals[k].Evaluate()
-				}
-			}
-		}*/
-
+		// Evolve a new generation
 		err = est.GA.Evolve()
 		if err != nil {
-			return prog, err
+			return
 		}
 	}
 
-	// Close the progress bar
-	if verbose {
-		progress.Stop()
+	// Polish the best Program
+	if est.PolishBest {
+		err = est.polishBest()
+		if err != nil {
+			return
+		}
+		if verbose {
+			bar.Incr()
+		}
 	}
 
 	// Extract the best Program
 	var best = est.BestProgram()
 
-	// Polish the best Program
-	best, err = polishProgram(best)
-	if err != nil {
-		return best, err
-	}
-
 	return best, nil
 }
 
@@ -185,7 +206,11 @@ func (est Estimator) newFunction(rng *rand.Rand) op.Operator {
 }
 
 func (est Estimator) newFunctionOfArity(arity uint, rng *rand.Rand) op.Operator {
-	return est.fm[arity][rng.Intn(len(est.fm[arity]))]
+	n := len(est.fm[arity])
+	if n == 0 {
+		return nil
+	}
+	return est.fm[arity][rng.Intn(n)]
 }
 
 func (est Estimator) newOperator(rng *rand.Rand) op.Operator {
@@ -220,6 +245,11 @@ func (est Estimator) mutateOperator(operator op.Operator, rng *rand.Rand) op.Ope
 		return est.newVar(rng)
 	default:
 		newOp := est.newFunctionOfArity(operator.Arity(), rng)
+		// newFunctionOfArity might return nil if there are no available
+		// operators of the given arity
+		if newOp == nil {
+			return operator
+		}
 		// Don't forget to set the new Operator's operands
 		for i := uint(0); i < operator.Arity(); i++ {
 			newOp = newOp.SetOperand(i, operator.Operand(i))