Skip to content
Permalink
Browse files

re-enabling windows tests for lightgbm

  • Loading branch information...
imatiach-msft authored and mhamilton723 committed Aug 31, 2019
1 parent 8361ead commit 671b68892ace5967e60c7a064effd42dd5a21ec7
@@ -9,14 +9,17 @@ import com.microsoft.ml.spark.core.schema.ImageSchemaUtils
import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.io.binary.FileReaderUtils
import com.microsoft.ml.spark.io.image.ImageUtils
import com.microsoft.ml.spark.lightgbm.split1.OsUtils
import org.apache.commons.codec.binary.Base64
import org.apache.commons.io.IOUtils
import org.apache.spark.ml.image.ImageSchema
import org.apache.spark.ml.source.image.PatchedImageFileFormat
import org.apache.spark.sql.functions.{col, to_json, udf}
import org.apache.spark.sql.types.StringType

trait OsUtils {
val isWindows: Boolean = System.getProperty("os.name").toLowerCase().indexOf("win") >= 0
}

class ImageReaderSuite extends TestBase with FileReaderUtils with OsUtils {

val imageFormat: String = classOf[PatchedImageFileFormat].getName
@@ -26,10 +26,6 @@ import org.apache.spark.sql.functions._
import org.scalactic.Equality
import org.scalatest.Assertion

trait OsUtils {
val isWindows: Boolean = System.getProperty("os.name").toLowerCase().indexOf("win") >= 0
}

// scalastyle:off magic.number
trait LightGBMTestUtils extends TestBase {

@@ -121,7 +117,7 @@ trait LightGBMTestUtils extends TestBase {
// scalastyle:off magic.number
/** Tests to validate the functionality of LightGBM module. */
class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBMClassifier]
with OsUtils with LightGBMTestUtils {
with LightGBMTestUtils {

lazy val pimaDF: DataFrame = loadBinary("PimaIndian.csv", "Diabetes mellitus").cache()
lazy val taskDF: DataFrame = loadBinary("task.train.csv", "TaskFailed10").cache()
@@ -174,17 +170,14 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
colsToVerify = Array("Diabetes pedigree function", "Age (years)"))

test("Compare benchmark results file to generated file", TestBase.Extended) {
assume(!isWindows)
verifyBenchmarks()
}

override def testExperiments(): Unit = {
assume(!isWindows)
super.testExperiments()
}

override def testSerialization(): Unit = {
assume(!isWindows)
super.testSerialization()
}

@@ -200,7 +193,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

test("Verify LightGBM Classifier can be run with TrainValidationSplit") {
assume(!isWindows)
val model = baseModel.setUseBarrierExecutionMode(true)

val paramGrid = new ParamGridBuilder()
@@ -228,7 +220,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

ignore("Verify LightGBM Classifier with batch training") {
assume(!isWindows)
val batches = Array(0, 2, 10)
batches.foreach(nBatches => assertFitWithoutErrors(baseModel.setNumBatches(nBatches), pimaDF))
}
@@ -244,7 +235,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

test("Verify LightGBM Classifier continued training with initial score") {
assume(!isWindows)
val convertUDF = udf((vector: DenseVector) => vector(1))
val scoredDF1 = baseModel.fit(pimaDF).transform(pimaDF)
val df2 = scoredDF1.withColumn(initScoreCol, convertUDF(col(rawPredCol)))
@@ -255,7 +245,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

test("Verify LightGBM Classifier with weight column") {
assume(!isWindows)
val model = baseModel.setWeightCol(weightCol)

val df = pimaDF.withColumn(weightCol, lit(1.0))
@@ -270,7 +259,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

test("Verify LightGBM Classifier with unbalanced dataset") {
assume(!isWindows)
val Array(train, test) = taskDF.randomSplit(Array(0.8, 0.2), seed)
assertBinaryImprovement(
baseModel, train, test,
@@ -279,7 +267,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

test("Verify LightGBM Classifier with validation dataset") {
assume(!isWindows)
val df = taskDF.orderBy(rand()).withColumn(validationCol, lit(false))

val Array(train, validIntermediate, test) = df.randomSplit(Array(0.6, 0.2, 0.2), seed)
@@ -294,7 +281,7 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
.setNumLeaves(100)
.setIsUnbalance(true)
.setValidationIndicatorCol(validationCol)
.setEarlyStoppingRound(2)
.setEarlyStoppingRound(5)

Array("auc", "binary_error", "binary_logloss").foreach { metric =>
assertBinaryImprovement(
@@ -305,8 +292,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

test("Verify LightGBM Classifier categorical parameter") {
assume(!isWindows)

val Array(train, test) = bankTrainDF.randomSplit(Array(0.8, 0.2), seed)

val model = baseModel
@@ -320,7 +305,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

test("Verify LightGBM Classifier won't get stuck on empty partitions") {
assume(!isWindows)
val baseDF = pimaDF.select(labelCol, featuresCol)
val df = baseDF.mapPartitions { rows =>
// Create an empty partition
@@ -335,7 +319,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
}

ignore("Verify LightGBM Classifier won't get stuck on unbalanced classes in multiclass classification") {
assume(!isWindows)
val baseDF = breastTissueDF.select(labelCol, featuresCol)
val df = baseDF.mapPartitions({ rows =>
// Remove all instances of some classes
@@ -388,7 +371,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
boostingTypes.foreach { boostingType =>
test("Verify LightGBMClassifier can be trained " +
s"and scored on $fileName with boosting type $boostingType", TestBase.Extended) {
assume(!isWindows)
val df = loadBinary(fileName, labelColumnName)
val model = baseModel
.setBoostingType(boostingType)
@@ -423,8 +405,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
boostingTypes.foreach { boostingType =>
test(s"Verify LightGBMClassifier can be trained and scored " +
s"on multiclass $fileName with boosting type $boostingType", TestBase.Extended) {
assume(!isWindows)

val model = baseModel
.setObjective(multiclassObject)
.setBoostingType(boostingType)
@@ -456,8 +436,6 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM
labelColumnName: String,
colsToVerify: Array[String]): Unit = {
test("Verify LightGBMClassifier save booster to " + fileName) {
assume(!isWindows)

val model = baseModel
val df = loadBinary(fileName, labelColumnName)
val fitModel = model.fit(df)
@@ -5,7 +5,7 @@ package com.microsoft.ml.spark.lightgbm.split2

import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils}
import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject}
import com.microsoft.ml.spark.lightgbm.split1.{LightGBMTestUtils, OsUtils}
import com.microsoft.ml.spark.lightgbm.split1.LightGBMTestUtils
import com.microsoft.ml.spark.lightgbm.{LightGBMRanker, LightGBMRankerModel, LightGBMUtils}
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.VectorAssembler
@@ -17,7 +17,7 @@ import org.apache.spark.sql.types.StringType
//scalastyle:off magic.number
/** Tests to validate the functionality of LightGBM Ranker module. */
class VerifyLightGBMRanker extends Benchmarks with EstimatorFuzzing[LightGBMRanker]
with OsUtils with LightGBMTestUtils {
with LightGBMTestUtils {

import session.implicits._

@@ -60,23 +60,18 @@ class VerifyLightGBMRanker extends Benchmarks with EstimatorFuzzing[LightGBMRank
}

override def testExperiments(): Unit = {
assume(!isWindows)
super.testExperiments()
}

override def testSerialization(): Unit = {
assume(!isWindows)
super.testSerialization()
}

test("Verify LightGBM Ranker on ranking dataset") {
assume(!isWindows)
assertFitWithoutErrors(baseModel, rankingDF)
}

test("Throws error when group column is not long or int") {
assume(!isWindows)

val df = rankingDF.withColumn(queryCol, col(queryCol).cast(StringType))

// Throws SparkException instead of IllegalArgumentException because the type
@@ -6,7 +6,7 @@ package com.microsoft.ml.spark.lightgbm.split2
import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils}
import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject}
import com.microsoft.ml.spark.lightgbm.split1.{LightGBMTestUtils, OsUtils}
import com.microsoft.ml.spark.lightgbm.split1.LightGBMTestUtils
import com.microsoft.ml.spark.lightgbm.{LightGBMRegressionModel, LightGBMRegressor, LightGBMUtils}
import com.microsoft.ml.spark.stages.MultiColumnAdapter
import org.apache.spark.ml.evaluation.RegressionEvaluator
@@ -20,7 +20,7 @@ import org.apache.spark.sql.functions.{avg, col, lit, when}
/** Tests to validate the functionality of LightGBM module.
*/
class VerifyLightGBMRegressor extends Benchmarks
with EstimatorFuzzing[LightGBMRegressor] with OsUtils with LightGBMTestUtils {
with EstimatorFuzzing[LightGBMRegressor] with LightGBMTestUtils {
override val startingPortIndex = 30

verifyLearnerOnRegressionCsvFile("energyefficiency2012_data.train.csv", "Y1", 0,
@@ -33,17 +33,14 @@ class VerifyLightGBMRegressor extends Benchmarks
verifyLearnerOnRegressionCsvFile("Concrete_Data.train.csv", "Concrete compressive strength(MPa, megapascals)", 0)

override def testExperiments(): Unit = {
assume(!isWindows)
super.testExperiments()
}

override def testSerialization(): Unit = {
assume(!isWindows)
super.testSerialization()
}

test("Compare benchmark results file to generated file", TestBase.Extended) {
assume(!isWindows)
verifyBenchmarks()
}

@@ -60,7 +57,6 @@ class VerifyLightGBMRegressor extends Benchmarks
}

test("Verify LightGBM Regressor can be run with TrainValidationSplit") {
assume(!isWindows)
val model = baseModel

val paramGrid = new ParamGridBuilder()
@@ -88,8 +84,6 @@ class VerifyLightGBMRegressor extends Benchmarks
}

test("Verify LightGBM Regressor with weight column") {
assume(!isWindows)

val df = airfoilDF.withColumn(weightCol, lit(1.0))

val model = baseModel.setWeightCol(weightCol)
@@ -126,7 +120,6 @@ class VerifyLightGBMRegressor extends Benchmarks
}

test("Verify LightGBM Regressor categorical parameter") {
assume(!isWindows)
val Array(train, test) = flareDF.randomSplit(Array(0.8, 0.2), seed.toLong)
val model = baseModel.setCategoricalSlotNames(flareDF.columns.filter(_.startsWith("c_")))
val metric = regressionEvaluator.evaluate(model.fit(train).transform(test))
@@ -136,7 +129,6 @@ class VerifyLightGBMRegressor extends Benchmarks
}

test("Verify LightGBM Regressor with tweedie distribution") {
assume(!isWindows)
val model = baseModel.setObjective("tweedie").setTweedieVariancePower(1.5)

val paramGrid = new ParamGridBuilder()
@@ -162,7 +154,6 @@ class VerifyLightGBMRegressor extends Benchmarks
boostingTypes.foreach { boostingType =>
test(s"Verify LightGBMRegressor can be trained " +
s"and scored on $fileName with boosting type $boostingType") {
assume(!isWindows)
val model = baseModel.setBoostingType(boostingType)

if (boostingType == "rf") {

0 comments on commit 671b688

Please sign in to comment.
You can’t perform that action at this time.