Skip to content
Permalink
Browse files

Simplify LIME parallelization

  • Loading branch information...
mhamilton723 committed Mar 12, 2019
1 parent f73110b commit d21e23877e50e162f9c6188769d3d5ad5a103cc9
@@ -263,7 +263,6 @@
" .setLabelCol(\"leopard_prob\")\\\n",
" .setOutputCol(\"weights\")\\\n",
" .setInputCol(\"image\")\\\n",
" .setModelPartitions(50)\\\n",
" .setCellSize(100.0)\\\n",
" .setModifier(50.0)\\\n",
" .setNSamples(300)\n",
@@ -312,26 +311,26 @@
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"nbformat": 4,
"nbformat_minor": 2
}
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@@ -0,0 +1,118 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark

import breeze.generic.UFunc
import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.stats.regression.{LassoResult, LeastSquaresRegressionResult, leastSquaresDestructive}
import spire.implicits.cfor

/*
This is a copy of the LassoCalculator class in Breeze,
the only difference is the removal of one requirement check
*/
private case class LassoCalculator2(data: DenseMatrix[Double],
outputs: DenseVector[Double],
lambda: Double,
workArray: Array[Double],
MAX_ITER: Int = 100,
IMPROVE_THRESHOLD: Double = 1e-8) {
/*
* The main purpose of this complicated calculator object is to recycle all the assorted work arrays.
* If we didn't write it this way, we'd have to manually thread all the work arrays
* throughout a slew of functions.
*/
require(data.rows == outputs.size)
require(data.rows == outputs.size)
require(workArray.size >= 2 * data.rows * data.cols)

private val outputCopy = DenseVector.zeros[Double](outputs.size)
private val singleColumnMatrix = new DenseMatrix[Double](data.rows, 1)
private val resultVec = DenseVector.zeros[Double](data.cols)

lazy val result: LassoResult = {

var improvedResult = true
var iter = 0

while (improvedResult && (iter < MAX_ITER)) {
iter += 1
improvedResult = false
cfor(0)(i => i < data.cols, i => i + 1)(i => {
val eoc = estimateOneColumn(i)
val oldCoefficient = resultVec(i)
resultVec(i) = shrink(eoc.coefficients(0))
if (oldCoefficient != resultVec(i)) {
improvedResult = true
}
})
}

LassoResult(resultVec, computeRsquared, lambda)
}

private def shrink(x: Double): Double = {
// Soft thresholding
val sb = math.signum(x)
val ab = sb * x
if (ab > lambda) {
sb * (ab - lambda)
} else {
0.0
}
}

private def copyColumn(column: Int): Unit = {
/* After running this routine, outputCopy should consist of the residuals after multiplying
* data against resultVec, excluding the specified column.
*
* The single column matrix should then be set to equal the data from that column.
*/
require(column < data.cols)
require(column >= 0)
cfor(0)(i => i < outputs.size, i => i + 1)(i => {
singleColumnMatrix(i, 0) = data(i, column)

var o = outputs(i)
cfor(0)(j => j < data.cols, j => j + 1)(j => {
if (j != column) {
o -= data(i, j) * resultVec(j)
}
})
outputCopy(i) = o
})
}

private def computeRsquared = {
var r2 = 0.0
cfor(0)(i => i < outputs.size, i => i + 1)(i => {
var o = outputs(i)
cfor(0)(j => j < data.cols, j => j + 1)(j => {
o -= data(i, j) * resultVec(j)
})
r2 += o * o
})
r2
}

private def estimateOneColumn(column: Int): LeastSquaresRegressionResult = {
/*
* Goal of this routine is to use the specified column to explain as much of the residual
* as possible, after using the already specified values in other columns.
*/
copyColumn(column)
leastSquaresDestructive(singleColumnMatrix, outputCopy, workArray)
}
}

object LassoUtils {

def lasso(data: DenseMatrix[Double], outputs: DenseVector[Double], lambda: Double): LassoResult =
LassoCalculator2(
data.copy,
outputs.copy,
lambda,
new Array[Double](math.max(1, data.rows * data.cols * 2))
).result
}
Oops, something went wrong.

0 comments on commit d21e238

Please sign in to comment.
You can’t perform that action at this time.