Skip to content
Permalink
Browse files

Improve noisy neighbor problems for e2e tests

  • Loading branch information...
mhamilton723 committed Jul 11, 2019
1 parent 6ab8916 commit 8e7626332f5da8757a12d2614ffb27b87ff3746f
Showing with 32 additions and 32 deletions.
  1. +32 −32 src/it/scala/com/microsoft/ml/nbtest/NotebookTests.scala
@@ -16,44 +16,44 @@ import scala.language.existentials
/** Tests to validate fuzzing of modules. */
class NotebookTests extends TestBase {

override def beforeAll(): Unit = {
super.beforeAll()
tryWithRetries(Array.fill(500)(10000)) {() =>
assert(listActiveJobs(clusterId).isEmpty,
"Cluster already has running jobs cannot change libraries safely")
}
()
}

ignore("Install libraries"){
assert(listInstalledLibraries(clusterId).isEmpty, "Cluster already has libraries installed")
println("Installing libraries")
installLibraries(clusterId)
}

test("Databricks Notebooks") {
assert(listInstalledLibraries(clusterId).isEmpty, "Cluster already has libraries installed")
println("Installing libraries")
installLibraries(clusterId)
println(s"Creating folder $folder")
workspaceMkDir(folder)
println(s"Submitting jobs")
val jobIds = notebookFiles.map(uploadAndSubmitNotebook)
println(s"Submitted ${jobIds.length} for execution: ${jobIds.toList}")
tryWithRetries(Array.fill(500)(10000)) {() =>
assert(listActiveJobs(clusterId).isEmpty,
"Cluster already has running jobs cannot change libraries safely")
}
try {
val monitors = jobIds.map((runId: Int) => monitorJob(runId, timeoutInMillis, logLevel = 2))
println(s"Monitoring Jobs...")
val failures = monitors
.map(Await.ready(_, Duration(timeoutInMillis.toLong, TimeUnit.MILLISECONDS)).value.get)
.filter(_.isFailure)
assert(failures.isEmpty)
} catch {
case t: Throwable =>
jobIds.foreach { jid =>
println(s"Cancelling job $jid")
cancelRun(jid)
}
throw t
assert(listInstalledLibraries(clusterId).isEmpty, "Cluster already has libraries installed")
println("Installing libraries")
installLibraries(clusterId)
println(s"Creating folder $folder")
workspaceMkDir(folder)
println(s"Submitting jobs")
val jobIds = notebookFiles.map(uploadAndSubmitNotebook)
println(s"Submitted ${jobIds.length} for execution: ${jobIds.toList}")
try {
val monitors = jobIds.map((runId: Int) => monitorJob(runId, timeoutInMillis, logLevel = 2))
println(s"Monitoring Jobs...")
val failures = monitors
.map(Await.ready(_, Duration(timeoutInMillis.toLong, TimeUnit.MILLISECONDS)).value.get)
.filter(_.isFailure)
assert(failures.isEmpty)
} catch {
case t: Throwable =>
jobIds.foreach { jid =>
println(s"Cancelling job $jid")
cancelRun(jid)
}
throw t
}
} finally {
uninstallAllLibraries(clusterId)
restartCluster(clusterId)
}
}

@@ -70,10 +70,10 @@ class NotebookTests extends TestBase {
restartCluster(clusterId)
}

override def afterAll(): Unit = {
ignore("Refresh cluster") {
restartCluster(clusterId)
uninstallAllLibraries(clusterId)
restartCluster(clusterId)
super.afterAll()
}

}

0 comments on commit 8e76263

Please sign in to comment.
You can’t perform that action at this time.