forked from nathanntg/lin-train
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
61 lines (44 loc) · 1.86 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
class Train:
def __init__(self, x, y, solver, scorer, number_of_folds=5):
self.x = x
self.y = y
self.solver = solver
self.scorer = scorer
self.number_of_folds = number_of_folds
self.folds = []
def _get_fold(self, fold):
# get number rows
l_indices = np.shape(self.x)[0]
# get indices
row_indices_for_validation = self.folds[fold]
row_indices_for_training = [s for s in xrange(0, l_indices) if s not in row_indices_for_validation]
return row_indices_for_training, row_indices_for_validation
def _train(self, col_indices_for_inputs, row_indices):
# get x and y
x = self.x[np.ix_(row_indices, col_indices_for_inputs)]
y = self.y[row_indices]
# run linear regression
fit = self.solver.calculate_parameters(x, y)
return fit
def _validate(self, col_indices_for_inputs, row_indices, fit):
# get x and y
x = self.x[np.ix_(row_indices, col_indices_for_inputs)]
y = self.y[row_indices]
# generate predictions
predicted_y = self.solver.apply_parameters(x, fit)
# create tuples
validation = np.concatenate((y, predicted_y), 1)
return self.scorer.score(validation)
def _score(self, col_indices_for_inputs):
score = 0.
# for each fold in k-fold-cross-validation
for fold in xrange(self.number_of_folds):
# get indices
(row_indices_for_training, row_indices_for_validation) = self._get_fold(fold)
# train and get fit
fit = self._train(col_indices_for_inputs, row_indices_for_training)
# validation score
score += self._validate(col_indices_for_inputs, row_indices_for_validation, fit)
# average MSE
return score / float(self.number_of_folds)