In [1]:
    var SparkConf = require('eclairjs/SparkConf');
    var SparkContext = require('eclairjs/SparkContext');

    var sparkConf = new SparkConf().setAppName("Example");
    var sc = new SparkContext(sparkConf);
    var SQLContext = require('eclairjs/sql/SQLContext');
    var LinearRegression = require("eclairjs/ml/regression/LinearRegression");
    var ParamGridBuilder = require("eclairjs/ml/tuning/ParamGridBuilder");
    var TrainValidationSplit = require("eclairjs/ml/tuning/TrainValidationSplit");
    var RegressionEvaluator = require("eclairjs/ml/evaluation/RegressionEvaluator");

    var sqlContext = new SQLContext(sc);

    var data = sqlContext.read().format("libsvm")
        .load("examples/data/mllib/sample_linear_regression_data.txt");


 Prepare training and test data.


In [2]:
    var splits = data.randomSplit([0.9, 0.1], 12345);
    var training = splits[0];
    var test = splits[1];

    var lr = new LinearRegression();


 We use a ParamGridBuilder to construct a grid of parameters to search over.
 TrainValidationSplit will try all combinations of values and determine best model using
 the evaluator.


In [3]:
    var paramGrid = new ParamGridBuilder()
        .addGrid(lr.regParam(), [0.1, 0.01])
        .addGrid(lr.fitIntercept())
        .addGrid(lr.elasticNetParam(), [0.0, 0.5, 1.0])
        .build();


 In this case the estimator is simply the linear regression.
 A TrainValidationSplit requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.


In [4]:
    var trainValidationSplit = new TrainValidationSplit()
        .setEstimator(lr)
        .setEvaluator(new RegressionEvaluator())
        .setEstimatorParamMaps(paramGrid)
        .setTrainRatio(0.8);  // 80% for training and the remaining 20% for validation


 Run train validation split, and choose the best set of parameters.


In [5]:
    var model = trainValidationSplit.fit(training);


 Make predictions on test data. model is the model with combination of parameters
 that performed best.


In [6]:
    return model.transform(test)
        .select("features", "label", "prediction");
    result.show();

    sc.stop();
