forked from statsmodels/statsmodels
/
transform_model.py
98 lines (72 loc) · 2.69 KB
/
transform_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
"""
Created on Tue May 27 13:23:24 2014
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
class StandardizeTransform(object):
"""class to reparameterize a model for standardized exog
Parameters
----------
data : array_like
data that is standardized along axis=0
ddof : None or int
degrees of freedom for calculation of standard deviation.
default is 1, in contrast to numpy.std
const_idx : None or int
If None, then the presence of a constant is detected if the standard
deviation of a column is **equal** to zero. A constant column is
not transformed. If this is an integer, then the corresponding column
will not be transformed.
demean : bool, default is True
If demean is true, then the data will be demeaned, otherwise it will
only be rescaled.
Notes
-----
Warning: Not all options are tested and it is written for one use case.
API changes are expected.
This can be used to transform only the design matrix, exog, in a model,
which is required in some discrete models when the endog cannot be rescaled
or demeaned.
The transformation is full rank and does not drop the constant.
"""
def __init__(self, data, ddof=1, const_idx=None, demean=True):
data = np.asarray(data)
self.mean = data.mean(0)
self.scale = data.std(0, ddof=1)
# do not transform a constant
if const_idx is None:
const_idx = np.nonzero(self.scale == 0)[0]
if len(const_idx) == 0:
const_idx = 'nc'
if const_idx != 'nc':
self.mean[const_idx] = 0
self.scale[const_idx] = 1
if demean is False:
self.mean = None
self.const_idx = const_idx
def transform(self, data):
"""standardize the data using the stored transformation
"""
# could use scipy.stats.zscore instead
if self.mean is None:
return np.asarray(data) / self.scale
else:
return (np.asarray(data) - self.mean) / self.scale
def transform_params(self, params):
"""Transform parameters of the standardized model to the original model
Parameters
----------
params : ndarray
parameters estimated with the standardized model
Returns
-------
params_new : ndarray
parameters transformed to the parameterization of the original model
"""
params_new = params / self.scale
if self.const_idx != 'nc':
params_new[self.const_idx] -= (params_new * self.mean).sum()
return params_new
__call__ = transform