Skip to content

Commit

Permalink
Fixed isolation score calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewdalpino committed Oct 14, 2019
1 parent 05ee231 commit bc72001
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 13 deletions.
3 changes: 2 additions & 1 deletion docs/anomaly-detectors/isolation-forest.md
Expand Up @@ -28,4 +28,5 @@ $estimator = new IsolationForest(100); // Default sample size and threshold

### References
>- F. T. Liu et al. (2008). Isolation Forest.
>- F. T. Liu et al. (2011). Isolation-based Anomaly Detection.
>- F. T. Liu et al. (2011). Isolation-based Anomaly Detection.
>- M. Garchery et al. (2018). On the influence of categorical features in ranking anomalies using mixed data.
8 changes: 5 additions & 3 deletions src/AnomalyDetectors/IsolationForest.php
Expand Up @@ -27,6 +27,8 @@
* References:
* [1] F. T. Liu et al. (2008). Isolation Forest.
* [2] F. T. Liu et al. (2011). Isolation-based Anomaly Detection.
* [3] M. Garchery et al. (2018). On the influence of categorical features in
* ranking anomalies using mixed data.
*
* @category Machine Learning
* @package Rubix/ML
Expand Down Expand Up @@ -168,7 +170,7 @@ public function train(Dataset $dataset) : void
for ($i = 0; $i < $this->estimators; $i++) {
$tree = new ITree($maxDepth);

$subset = $dataset->randomize()->head($k);
$subset = $dataset->randomSubset($k);

$tree->grow($subset);

Expand Down Expand Up @@ -236,9 +238,9 @@ protected function isolationScore(array $sample) : float
$depth += $node ? $node->depth() : EPSILON;
}

$depth /= $this->estimators;
$depth /= $this->estimators * $this->delta;

return 2. ** -($depth / $this->delta);
return 2. ** -$depth;
}

/**
Expand Down
4 changes: 2 additions & 2 deletions src/Graph/Nodes/Isolator.php
Expand Up @@ -58,8 +58,8 @@ public static function split(Dataset $dataset) : self
$values = $dataset->column($column);

if ($dataset->columnType($column) === DataType::CONTINUOUS) {
$min = (int) round(min($values) * PHI);
$max = (int) round(max($values) * PHI);
$min = (int) floor(min($values) * PHI);
$max = (int) ceil(max($values) * PHI);

$value = rand($min, $max) / PHI;
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/Graph/Trees/ExtraTree.php
Expand Up @@ -31,8 +31,8 @@ protected function split(Labeled $dataset) : Comparison
$values = $dataset->column($column);

if ($dataset->columnType($column) === DataType::CONTINUOUS) {
$min = (int) round(min($values) * PHI);
$max = (int) round(max($values) * PHI);
$min = (int) floor(min($values) * PHI);
$max = (int) ceil(max($values) * PHI);

$value = rand($min, $max) / PHI;
} else {
Expand Down
13 changes: 9 additions & 4 deletions src/Graph/Trees/ITree.php
Expand Up @@ -46,11 +46,16 @@ class ITree implements BST
*/
public static function c(int $n) : float
{
if ($n <= 1) {
return 1.;
switch (true) {
case $n > 2:
return 2. * (log($n - 1.) + M_EULER) - 2. * ($n - 1.) / $n;

case $n === 2:
return 1.;

default:
return 0.;
}

return 2. * (log($n - 1) + M_EULER) - 2. * ($n - 1) / $n;
}

/**
Expand Down
2 changes: 2 additions & 0 deletions tests/AnomalyDetectors/IsolationForestTest.php
Expand Up @@ -70,6 +70,8 @@ public function test_train_predict()

$this->assertTrue($this->estimator->trained());

var_dump($this->estimator->rank($testing));

$predictions = $this->estimator->predict($testing);

$score = $this->metric->score($predictions, $testing->labels());
Expand Down
3 changes: 2 additions & 1 deletion tests/Graph/Nodes/CellTest.php
Expand Up @@ -14,10 +14,11 @@ class CellTest extends TestCase
protected const SAMPLES = [
[5., 2., -3],
[6., 4., -5],
[-0.01, 0.1, -7],
];

protected const DEPTH = 8;
protected const C = 7.1544313298030655;
protected const C = 8.207392357589622;

public function test_build_node()
{
Expand Down

0 comments on commit bc72001

Please sign in to comment.