-
Notifications
You must be signed in to change notification settings - Fork 0
/
tools.py
154 lines (121 loc) · 3.49 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import numpy as np
import itertools
def tanh(x):
return np.tanh(x)
def tanh_deriv(x):
return 1.0 - np.tanh(x) ** 2
def logistic(x):
return 1 / (1 + np.exp(-x))
def logistic_derivative(x):
return logistic(x) * (1 - logistic(x))
def add_ones(samples):
# adds column of ones to the end
samples = np.array(samples)
temp = np.ones([samples.shape[0], samples.shape[1] + 1])
temp[:,0:-1] = samples
samples = temp
return samples
def add_zeros(samples):
# adds column of ones to the end
samples = np.array(samples)
temp = np.zeros([samples.shape[0], samples.shape[1] + 1])
temp[:,0:-1] = samples
samples = temp
return samples
def add_column(samples, col=1):
# adds column of ones to the end
samples = np.array(samples)
temp = np.empty([samples.shape[0], samples.shape[1] + 1])
temp.fill(col)
temp[:,0:-1] = samples
samples = temp
return samples
def categorize(features, ignore_nans=True):
# returns new array where every unique feature is labeled
uniq = np.unique(features)
if ignore_nans and np.nan in uniq:
uniq = np.delete(list(uniq).index(np.nan))
uniq = list(uniq)
labeled_features = []
for cat in features:
if ignore_nans and np.isnan(cat):
labeled_features.append(cat)
else:
labeled_features.append(uniq.index(cat))
return np.array(labeled_features)
def rescale(samples, low=-1, high=1):
# returns rescaled column from -1 to 1
mins = np.min(samples, axis=0)
maxs = np.max(samples, axis=0)
fs = np.asfarray(samples)
rng = maxs - mins
return high - (((high - low) * (maxs - fs)) / rng)
def scale_back(scaled, original, low=-1, high=1):
mins = np.min(original, axis=0)
maxs = np.max(original, axis=0)
rng = maxs - mins
return maxs - (high - scaled)*rng / (high-low)
def standardize(samples):
mean = np.mean(samples, axis=0)
std = np.std(samples, axis=0)
return (samples - mean) / std
def stand_back(samples, original):
mean = np.mean(original, axis=0)
std = np.std(original, axis=0)
return samples * std + mean
def getstats(samples):
mins = np.min(samples, axis=0)
maxs = np.max(samples, axis=0)
scaled_x = rescale(samples)
mean = np.mean(scaled_x, axis=0)
std = np.std(scaled_x, axis=0)
stats = {'min':mins,
'max':maxs,
'mean':mean,
'std':std}
return stats
def delnans_any(s, ax=1):
if ax is 1:
return s[~np.isnan(s).any(axis=1)]
elif ax is 0:
return s[:, ~np.isnan(s).any(axis=0)]
def delnans_all(s, ax=1):
if ax is 1:
return s[~np.isnan(s).all(axis=1)]
elif ax is 0:
return s[:, ~np.isnan(s).all(axis=0)]
#raises order of a sample set by n, returns new columns
def raise_order(samples, n):
if n==1:
return samples
elif n<1:
raise AttributeError('n must be greater or equal to 1')
else:
s = samples.copy()
# make iterations array
iter = []
for i in range(2, n+1):
iter += list(itertools.combinations_with_replacement(range(s.shape[1]),i))
# for every combination
for el in iter:
# for every element in combination
f = None
for i in el:
if f is None:
f = samples[:,i].copy()
else:
f *= samples[:,i]
s = np.concatenate([s,np.atleast_2d(f).T], axis=1)
return s
def array_to_float(data, tonans=True):
result = np.zeros((data.shape[0], data.shape[1]))
for i in range(data.shape[0]):
for j in range(data.shape[1]):
try:
result[i,j] = float(data[i,j])
except ValueError:
if tonans or result[i,j] is '':
result[i,j] = np.nan
else:
result[i,j] = data[i,j]
return result