In [1]:

    var SparkConf = require('eclairjs/SparkConf');
    var SparkContext = require('eclairjs/SparkContext');
    var sparkConf = new SparkConf().setAppName("JavaScript TrainValidationSplitExample");
    var sc = new SparkContext(sparkConf);


In [2]:
    var SQLContext = require('eclairjs/sql/SQLContext');
    var LinearRegression = require('eclairjs/ml/regression/LinearRegression');
    var ParamGridBuilder = require('eclairjs/ml/tuning/ParamGridBuilder');
    var TrainValidationSplit = require('eclairjs/ml/tuning/TrainValidationSplit');
    var RegressionEvaluator = require('eclairjs/ml/evaluation/RegressionEvaluator');


In [3]:
    var RowFactory = require('eclairjs/sql/RowFactory');
    var Vectors = require('eclairjs/mllib/linalg/Vectors');
    var StructField = require('eclairjs/sql/types/StructField');
    var DataTypes = require('eclairjs/sql/types').DataTypes;
    var VectorUDT = require('eclairjs/mllib/linalg/VectorUDT');
    var VectorAssembler = require('eclairjs/ml/feature/VectorAssembler');

    var sqlContext = new SQLContext(sc);

    var data = sqlContext.read().format("libsvm").load("examples/data/mllib/sample_libsvm_data.txt");


 Prepare training and test data.


In [4]:
    var splits = data.randomSplit([0.9, 0.1], 12345);
    var training = splits[0];
    var test = splits[1];

    var lr = new LinearRegression();


 We use a ParamGridBuilder to construct a grid of parameters to search over.
 TrainValidationSplit will try all combinations of values and determine best model using
 the evaluator.


In [5]:
    var paramGrid = new ParamGridBuilder()
      .addGrid(lr.regParam(), [ 0.1, 0.01])
      .addGrid(lr.fitIntercept())
      .addGrid(lr.elasticNetParam(), [ 0.0, 0.5, 1.0])
      .build();


 In this case the estimator is simply the linear regression.
 A TrainValidationSplit requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.


In [6]:
    var trainValidationSplit = new TrainValidationSplit()
      .setEstimator(lr)
      .setEvaluator(new RegressionEvaluator())
      .setEstimatorParamMaps(paramGrid);


 80% of the data will be used for training and the remaining 20% for validation.


In [7]:
    trainValidationSplit.setTrainRatio(0.8);

print("TRAINGIN="+training)


 Run train validation split, and choose the best set of parameters.


In [8]:
    var model = trainValidationSplit.fit(training);


 Make predictions on test data. model is the model with combination of parameters
 that performed best.


In [9]:
    var output=model.transform(test)
      .select("features", "label", "prediction");


In [10]:
var result = output;

    result.show();


 $example off$


In [11]:
    sc.stop();
