/
stacking.py
180 lines (144 loc) · 6.67 KB
/
stacking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.base import clone
def transformer(y, func=None):
"""Transforms target variable and prediction"""
if func is None:
return y
else:
return func(y)
def stacking_regression(models, meta_model, X_train, y_train, X_test,
transform_target=None, transform_pred=None,
metric=None, n_folds=3, average_fold=True,
shuffle=False, random_state=0, verbose=1):
'''
Function 'stacking' takes train data, test data, list of 1-st level
models, meta_model for the 2-nd level and returns stacking predictions.
Parameters
----------
models : list
List of 1-st level models. You can use any models that follow sklearn
convention i.e. accept numpy arrays and have methods 'fit' and 'predict'.
meta_model: model
2-nd level model. You can use any model that follow sklearn convention
X_train : numpy array or sparse matrix of shape [n_train_samples, n_features]
Training data
y_train : numpy 1d array
Target values
X_test : numpy array or sparse matrix of shape [n_test_samples, n_features]
Test data
transform_target : callable, default None
Function to transform target variable.
If None - transformation is not used.
For example, for regression task (if target variable is skewed)
you can use transformation like numpy.log.
Set transform_target = numpy.log
Usually you want to use respective backward transformation
for prediction like numpy.exp.
Set transform_pred = numpy.exp
Caution! Some transformations may give inapplicable results.
For example, if target variable contains zeros, numpy.log
gives you -inf. In such case you can use appropriate
transformation like numpy.log1p and respective
backward transformation like numpy.expm1
transform_pred : callable, default None
Function to transform prediction.
If None - transformation is not used.
If you use transformation for target variable (transform_target)
like numpy.log, then using transform_pred you can specify
respective backward transformation like numpy.exp.
Look at description of parameter transform_target
metric : callable, default None
Evaluation metric (score function) which is used to calculate
results of cross-validation.
If None, then by default:
sklearn.metrics.mean_absolute_error - for regression
n_folds : int, default 3
Number of folds in cross-validation
average_fold: boolean, default True
Whether to take the average of the predictions on test set from each fold.
Refit the model using the whole training set and predict test set if False
shuffle : boolean, default False
Whether to perform a shuffle before cross-validation split
random_state : int, default 0
Random seed for shuffle
verbose : int, default 1
Level of verbosity.
0 - show no messages
1 - for each model show single mean score
2 - for each model show score for each fold and mean score
Caution. To calculate MEAN score across all folds
full train set prediction and full true target are used.
So for some metrics (e.g. rmse) this value may not be equal
to mean of score values calculated for each fold.
Returns
-------
stacking_prediction : numpy array of shape n_test_samples
Stacking prediction
'''
# Specify default metric for cross-validation
if metric is None:
metric = mean_squared_error
# Print metric
if verbose > 0:
print('metric: [%s]\n' % metric.__name__)
# Split indices to get folds
kf = KFold(n_splits = n_folds, shuffle = shuffle, random_state = random_state)
is_X_df = False
is_y_df = False
if X_train.__class__.__name__ == "DataFrame":
X_train = X_train.as_matrix()
X_test = X_test.as_matrix()
is_X_df = True
if y_train.__class__.__name__ == "DataFrame":
y_train = y_train.as_matrix()
is_y_df = True
# Create empty numpy arrays for stacking features
S_train = np.zeros((X_train.shape[0], len(models)))
S_test = np.zeros((X_test.shape[0], len(models)))
# Loop across models
for model_counter, model in enumerate(models):
if verbose > 0:
print('model %d: [%s]' % (model_counter, model.__class__.__name__))
# Create empty numpy array, which will contain temporary predictions for test set made in each fold
S_test_temp = np.zeros((X_test.shape[0], n_folds))
# Loop across folds
for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
X_tr = X_train[tr_index]
y_tr = y_train[tr_index]
X_te = X_train[te_index]
y_te = y_train[te_index]
# Clone the model because fit will mutate the model.
instance = clone(model)
# Fit 1-st level model
instance.fit(X_tr, transformer(y_tr, func = transform_target))
# Predict out-of-fold part of train set
S_train[te_index, model_counter] = transformer(instance.predict(X_te), func = transform_pred)
# Predict full test set
S_test_temp[:, fold_counter] = transformer(instance.predict(X_test), func = transform_pred)
# Delete temperatory model
del instance
if verbose > 1:
print(' fold %d: [%.8f]' % (fold_counter, metric(y_te, S_train[te_index, model_counter])))
# Compute mean or mode of predictions for test set
if average_fold:
S_test[:, model_counter] = np.mean(S_test_temp, axis = 1)
else:
model.fit(X_train, transformer(y_train, func = transform_target))
S_test[:, model_counter] = transformer(model.predict(X_test), func = transform_pred)
if verbose > 0:
print(' ----')
print(' MEAN: [%.8f]\n' % (metric(y_train, S_train[:, model_counter])))
# Fit our second layer meta model
meta_model.fit(S_train, transformer(y_train, func = transform_target))
# Make our final prediction
stacking_prediction = transformer(meta_model.predict(S_test), func = transform_pred)
# resume original types of data structure:
if is_X_df:
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
if is_y_df:
y_train = pd.DataFrame(y_train)
return stacking_prediction