-
-
Notifications
You must be signed in to change notification settings - Fork 187
/
Copy pathLeastSquares.php
107 lines (91 loc) · 2.69 KB
/
LeastSquares.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
<?php
namespace Rubix\Engine;
use MathPHP\LinearAlgebra\Vector;
use MathPHP\LinearAlgebra\MatrixFactory;
use Rubix\Engine\Persisters\Persistable;
use InvalidArgumentException;
class LeastSquares implements Regression, Persistable
{
/**
* The computed y intercept.
*
* @var float
*/
protected $intercept;
/**
* The computed coefficients of the training data.
*
* @var array
*/
protected $coefficients = [
//
];
/**
* @return float|null
*/
public function intercept() : ?float
{
return $this->intercept;
}
/**
* @return array
*/
public function coefficients() : array
{
return $this->coefficients;
}
/**
* Learn the coefficients of the training data. i.e. compute the line that best
* fits the training data.
*
* @param \Rubix\Engine\Dataset $data
* @throws \InvalidArgumentException
* @return void
*/
public function train(Dataset $data) : void
{
if (!$data instanceof SupervisedDataset) {
throw new InvalidArgumentException('This estimator requires a supervised dataset.');
}
if (in_array(self::CATEGORICAL, $data->columnTypes())) {
throw new InvalidArgumentException('This estimator only works with continuous samples.');
}
$coefficients = $this->computeCoefficients($data->samples(), $data->outcomes());
$this->intercept = array_shift($coefficients);
$this->coefficients = $coefficients;
}
/**
* Make a prediction of a given sample.
*
* @param array $sample
* @return \Rubix\Engine\Prediction
*/
public function predict(array $sample) : Prediction
{
$outcome = $this->intercept;
foreach ($this->coefficients as $i => $coefficient) {
$outcome += $coefficient * $sample[$i];
}
return new Prediction($outcome);
}
/**
* Compute the coefficients of the training data by solving for the normal
* equation. The resulting equation is the polynomial that minimizes the sum of
* the squares of the errors.
*
* @param array $samples
* @param array $outcomes
* @return array
*/
protected function computeCoefficients(array $samples, array $outcomes) : array
{
foreach ($samples as &$sample) {
array_unshift($sample, 1);
}
$samples = MatrixFactory::create($samples);
$outcomes = MatrixFactory::create([new Vector($outcomes)]);
return $samples->transpose()->multiply($samples)->inverse()
->multiply($samples->transpose()->multiply($outcomes))
->getColumn(0);
}
}