In [1]:

    var SparkConf = require('eclairjs/SparkConf');
    var SparkContext = require('eclairjs/SparkContext');

    var sparkConf = new SparkConf().setAppName("Example");
    var sc = new SparkContext(sparkConf);
    var SQLContext = require('eclairjs/sql/SQLContext');
    var functions = require('eclairjs/sql/functions');
    var LogisticRegression = require("eclairjs/ml/classification/LogisticRegression");


var Vectors = require("eclairjs/mllib/linalg/Vectors");


In [2]:

    var sqlContext = new SQLContext(sc);


 Load training data


In [3]:
    var training = sqlContext.read().format("libsvm")
        .load("examples/data/mllib/sample_libsvm_data.txt");

    var lr = new LogisticRegression()
        .setMaxIter(10)
        .setRegParam(0.3)
        .setElasticNetParam(0.8);


 Fit the model


In [4]:
    var lrModel = lr.fit(training);


 Extract the summary from the returned LogisticRegressionModel instance trained in the earlier
 example


In [5]:
    var binarySummary = lrModel.summary();

    var ret = {};


 Obtain the loss per iteration.


In [6]:
    ret.objectiveHistory = binarySummary.objectiveHistory();


 Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.


In [7]:
    ret.roc = binarySummary.roc();
    ret.areaUnderROC = binarySummary.areaUnderROC();


 Get the threshold corresponding to the maximum F-Measure and rerun LogisticRegression with
 this selected threshold.


In [8]:
    var fMeasure = binarySummary.fMeasureByThreshold();
    var maxFMeasure = fMeasure.select(functions.max("F-Measure")).head().getDouble(0);
    var bestThreshold = fMeasure.where(fMeasure.col("F-Measure").equalTo(maxFMeasure))
        .select("threshold").head().getDouble(0);
    lrModel.setThreshold(bestThreshold);

var result = ret;
    result.objectiveHistory.forEach(function (lossPerIteration) {
        print(lossPerIteration);
    });
    result.roc.show();
    result.roc.select("FPR").show();
    print(result.areaUnderROC);

    sc.stop();
