In [1]:

    var sparkConf = new SparkConf().setAppName("DecisionTreeClassificationExample");
    var sc = new SparkContext(sparkConf);


 Load and parse the data file.


In [2]:
    var datapath = ((typeof args !== "undefined") && (args.length > 1)) ?
        args[1] : "examples/data/mllib/sample_libsvm_data.txt";
    var data = MLUtils.loadLibSVMFile(sc, datapath);


 Split the data into training and test sets (30% held out for testing)


In [3]:
    var splits = data.randomSplit([0.7, 0.3]);
    var trainingData = splits[0];
    var testData = splits[1];


 Set parameters.
  Empty categoricalFeaturesInfo indicates all features are continuous.


In [4]:
    var numClasses = 2;
    var categoricalFeaturesInfo = {};
    var impurity = "gini";
    var maxDepth = 5;
    var maxBins = 32;


 Train a DecisionTree model for classification.


In [5]:
    var model = DecisionTree.trainClassifier(trainingData, numClasses,
        categoricalFeaturesInfo, impurity, maxDepth, maxBins);


 Evaluate model on test instances and compute test error


In [6]:
    var predictionAndLabel = testData.mapToPair(function (labeledPoint, model) {
        return new Tuple(model.predict(labeledPoint.getFeatures()), labeledPoint.getLabel());
    }, [model]);


In [7]:
    var result = predictionAndLabel.filter(function (tuple2) {
        return (tuple2[0] != tuple2[1]);
    });
    var testErr = result.count() / testData.count();
    var ret = {};
    ret.testErr = testErr;
    ret.model = model;
var result = ret;
    print("Test Error: " + result.testErr);
    print("Learned classification tree model:\n" + result.model.toDebugString());


 Save and load model


In [8]:
    result.model.save(sc, "target/tmp/myDecisionTreeClassificationModel");
    var sameModel = DecisionTreeModel
        .load(sc, "target/tmp/myDecisionTreeClassificationModel");

    sc.stop();
