In [1]:

    var SparkConf = require('eclairjs/SparkConf');
    var SparkContext = require('eclairjs/SparkContext');
    var sparkConf = new SparkConf().setAppName("Random Forest Regression Example");
    var sc = new SparkContext(sparkConf);
    var MLUtils = require("eclairjs/mllib/MLUtils");
    var RandomForest = require('eclairjs/mllib/tree/RandomForest');
    var Tuple = require('eclairjs/Tuple');

var datapath =  "../../data/mllib/sample_libsvm_data.txt";


In [2]:
    var data = MLUtils.loadLibSVMFile(sc, datapath);


 Split the data into training and test sets (30% held out for testing)


In [3]:
    var splits = data.randomSplit([0.7, 0.3]);
    var trainingData = splits[0];
    var testData = splits[1];


 Set parameters.
 Empty categoricalFeaturesInfo indicates all features are continuous.


In [4]:
    var categoricalFeaturesInfo = {};
    var numTrees = 3; // Use more in practice.
    var featureSubsetStrategy = "auto"; // Let the algorithm choose.
    var impurity = "variance";
    var maxDepth = 4;
    var maxBins = 32;
    var seed = 12345;


 Train a RandomForest model.


In [5]:
    var model = RandomForest.trainRegressor(
        trainingData,
        categoricalFeaturesInfo,
        numTrees,
        featureSubsetStrategy,
        impurity,
        maxDepth,
        maxBins,
        seed
    );


 Evaluate model on test instances and compute test error


In [6]:
    var predictionAndLabel = testData.mapToPair(function (p, model) {
        return new Tuple(model.predict(p.getFeatures()), p.getLabel());
    }, [model]);

    var testMSE = predictionAndLabel.map(function (tup) {
            var diff = tup[0] - tup[1];
            return diff * diff;
        }).reduce(function (a, b) {
            return a + b;
        }) / testData.count();

    var ret = {};
    ret.testMSE = testMSE;
    ret.model = model;
var result = ret;
    print("Test Mean Squared Error: " + result.testMSE);
    print("Learned regression forest model:\n" + result.model.toDebugString());


 Save and load model


In [7]:
    result.model.save(sc, "target/tmp/myRandomForestRegressionModel");
    var RandomForestModel = require('eclairjs/mllib/tree/model/RandomForestModel');
    var sameModel = RandomForestModel.load(
        sc,
        "target/tmp/myRandomForestRegressionModel"
    );

    sc.stop();
