In [1]:
    var SparkConf = require('eclairjs/SparkConf');
    var SparkContext = require('eclairjs/SparkContext');
    var sparkConf = new SparkConf().setAppName("JavaScript Decision Tree Regression Example");
    var sc = new SparkContext(sparkConf);

    var SQLContext = require('eclairjs/sql/SQLContext');
    var VectorIndexer = require('eclairjs/ml/feature/VectorIndexer');
    var DecisionTreeRegressor = require('eclairjs/ml/regression/DecisionTreeRegressor');
    var RegressionEvaluator = require('eclairjs/ml/evaluation/RegressionEvaluator');
    var Pipeline = require('eclairjs/ml/Pipeline');

    var sqlContext = new SQLContext(sc);


 Load the data stored in LIBSVM format as a DataFrame.


In [2]:
    var data = sqlContext.read().format("libsvm")
        .load("examples/data/mllib/sample_libsvm_data.txt");


 Automatically identify categorical features, and index them.
 Set maxCategories so features with > 4 distinct values are treated as continuous.


In [3]:
    var featureIndexer = new VectorIndexer()
        .setInputCol("features")
        .setOutputCol("indexedFeatures")
        .setMaxCategories(4)
        .fit(data);


 Split the data into training and test sets (30% held out for testing)


In [4]:
    var splits = data.randomSplit([0.7, 0.3]);
    var trainingData = splits[0];
    var testData = splits[1];


 Train a DecisionTree model.


In [5]:
    var dt = new DecisionTreeRegressor()
        .setFeaturesCol("indexedFeatures");


 Chain indexer and tree in a Pipeline


In [6]:
    var pipeline = new Pipeline()
        .setStages([featureIndexer, dt]);


 Train model.  This also runs the indexer.


In [7]:
    var model = pipeline.fit(trainingData);


 Make predictions.


In [8]:
    var predictions = model.transform(testData);
    var ret = {};
    ret.predictions = predictions;


 Select (prediction, true label) and compute test error


In [9]:
    var evaluator = new RegressionEvaluator()
        .setLabelCol("label")
        .setPredictionCol("prediction")
        .setMetricName("rmse");
    ret.rmse = evaluator.evaluate(predictions);
    ret.treeModel = model.stages()[1];

var result = ret;


 Select example rows to display.


In [10]:
    result.predictions.select("label", "features").show(5);
    print("Root Mean Squared Error (RMSE) on test data = " + result.rmse);
    print("Learned regression tree model:\n" + result.treeModel.toDebugString());
    sc.stop();
