In [1]:
    var SparkConf = require('eclairjs/SparkConf');
    var SparkContext = require('eclairjs/SparkContext');
    var sparkConf = new SparkConf().setAppName("JavaScript Gradient Boosted Tree Regressor Example");
    var sc = new SparkContext(sparkConf);

    var SQLContext = require('eclairjs/sql/SQLContext');
    var VectorIndexer = require('eclairjs/ml/feature/VectorIndexer');
    var GBTRegressor = require('eclairjs/ml/regression/GBTRegressor');
    var Pipeline = require('eclairjs/ml/Pipeline');
    var RegressionEvaluator = require('eclairjs/ml/evaluation/RegressionEvaluator');

    var result = {};
    var sqlContext = new SQLContext(sc);


 Load and parse the data file, converting it to a DataFrame.


In [2]:
    var data =
        sqlContext.read().format("libsvm").load("examples/data/mllib/sample_libsvm_data.txt");


 Automatically identify categorical features, and index them.
 Set maxCategories so features with > 4 distinct values are treated as continuous.


In [3]:
    var featureIndexer = new VectorIndexer()
        .setInputCol("features")
        .setOutputCol("indexedFeatures")
        .setMaxCategories(4)
        .fit(data);


 Split the data into training and test sets (30% held out for testing)


In [4]:
    var splits = data.randomSplit([0.7, 0.3]);
    var trainingData = splits[0];
    var testData = splits[1];


 Train a GBT model.


In [5]:
    var gbt = new GBTRegressor()
        .setLabelCol("label")
        .setFeaturesCol("indexedFeatures")
        .setMaxIter(10);


 Chain indexer and GBT in a Pipeline


In [6]:
    var pipeline = new Pipeline().setStages([featureIndexer, gbt]);


 Train model.  This also runs the indexer.


In [7]:
    var model = pipeline.fit(trainingData);


 Make predictions.


In [8]:
    var predictions = model.transform(testData);


 Select example rows to display.


In [9]:
    result.predictionDF = predictions.select("prediction", "label", "features");


 Select (prediction, true label) and compute test error


In [10]:
    var evaluator = new RegressionEvaluator()
        .setLabelCol("label")
        .setPredictionCol("prediction")
        .setMetricName("rmse");
    result.rmse = evaluator.evaluate(predictions);
    result.gbtModel = model.stages()[1];

    result.predictionDF.show(5);
    print("Root Mean Squared Error (RMSE) on test data = " + result.rmse);
    print("Learned regression GBT model:\n" + result.gbtModel.toDebugString());

    sc.stop();
