-
Notifications
You must be signed in to change notification settings - Fork 0
/
Patterns.py
236 lines (183 loc) · 6.31 KB
/
Patterns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import division
import numpy as np
from sklearn.utils import check_X_y
from sklearn.utils import check_array
from ReplicatedFocusingBeliefPropagation.lib.Patterns import _Patterns
from ReplicatedFocusingBeliefPropagation.rfbp.misc import _check_string
from ReplicatedFocusingBeliefPropagation.rfbp.misc import get_int_size
__all__ = ['Pattern']
__author__ = ['Nico Curti', "Daniele Dall'Olio"]
__email__ = ['nico.curti2@unibo.it', 'daniele.dallolio@studio.unibo.it']
class Pattern (object):
'''
Pattern object for C++ compatibility.
The Pattern object is just a simple wrap of a data (matrix) + labels (vector).
This object type provide a compatibility with the rFBP functions in C++ and it provides also a series of checks
for the input validity.
Parameters
----------
X : None or 2D array-like or string
Input matrix of variables as (Nsample, Nfeatures) or filename with the input stored in the same way
y : None or 1D array-like
Input labels. The label can be given or read from the input filename as first row in the file.
Example
-------
>>> import numpy as np
>>> from ReplicatedFocusingBeliefPropagation import Pattern
>>>
>>> n_sample, n_feature = (20, 101) # n_feature must be odd
>>> data = np.random.choice(a=(-1, 1), p=(.5, .5), size=(n_sample, n_feature))
>>> labels = np.random.choice(a=(-1, 1), p=(.5, .5), size=(n_sample, ))
>>>
>>> pt = Pattern(X=data, y=labels)
>>> # dimensions
>>> assert pt.shape == (n_sample, n_feature)
>>> # data
>>> np.testing.assert_allclose(pt.data, data)
>>> # labels
>>> np.testing.assert_allclose(pt.labels, labels)
'''
def __init__ (self, X=None, y=None):
if X is not None and y is not None:
# check array
X, y = check_X_y(X, y)
N, M = X.shape
X = check_array(X)
X = X.ravel()
X = np.ascontiguousarray(X)
y = np.ascontiguousarray(y)
X = X.astype('float64')
y = y.astype(get_int_size())
self._pattern = _Patterns(X=X, y=y, M=M, N=N)
self._check_binary()
else:
self._pattern = None
def random (self, shape):
'''
Generate Random pattern.
The pattern is generated using a Bernoulli distribution and thus it creates a data (matrix) + labels (vector)
of binary values. The values are converted into the range (-1, 1) for the compatibility with the rFBP algorithm.
Parameters
----------
shapes : tuple
a 2-D tuple with (M, N) where M is the number of samples and N the number of probes
Example
-------
>>> from ReplicatedFocusingBeliefPropagation import Pattern
>>>
>>> n_sample = 10
>>> n_feature = 20
>>> data = Pattern().random(shape=(n_sample, n_feature))
>>> assert data.shape == (n_sample, n_feature)
>>> data
Pattern[shapes=(10, 20)]
'''
try:
M, N = map(int, shape)
except ValueError:
raise ValueError('Incorrect dimensions. Shapes must be a 2-D tuple with (M, N)')
if M <= 0 or N <= 0:
raise ValueError('Incorrect dimensions. M and N must be greater than 0. Given ({0:d}, {1:d})'.format(M, N))
self._pattern = _Patterns(M=M, N=N)
# We do not need to check the variables since they are correctly generated into the C++ code!
return self
def load (self, filename, binary=False, delimiter='\t'):
'''
Load pattern from file.
This is the main utility of the Pattern object. You can use this function to load data from csv-like files OR from a binary file.
Parameters
----------
filename : str
Filename/Path to the Pattern file
binary : bool
True if the filename is in binary fmt; False for ASCII fmt
delimiter : str
Separator of input file (valid if binary is False)
Example
-------
>>> from ReplicatedFocusingBeliefPropagation import Pattern
>>>
>>> data = Pattern().load(filename='path/to/datafile.csv', delimiter=',', binary=False)
>>> data
Pattern[shapes=(10, 20)]
'''
if not isinstance(filename, str):
raise ValueError('Invalid filename found. Filename must be a string. Given : {0}'.format(filename))
filename = _check_string(filename, exist=True)
delimiter = _check_string(delimiter, exist=False)
self._pattern = _Patterns(filename=filename, binary=binary, delimiter=delimiter)
self._check_binary()
return self
@property
def shape (self):
'''
Return the shape of the data matrix
Returns
-------
shape: tuple
The tuple related to the data dimensions (n_sample, n_features)
'''
try:
return (self._pattern.Nrow, self._pattern.Ncol)
except AttributeError:
return (0, 0)
@property
def labels (self):
'''
Return the label array
Returns
-------
labels: array-like
The labels vector as (n_sample, ) casted to integers.
'''
try:
return np.asarray(self._pattern.labels, dtype=int)
except AttributeError:
return None
@property
def data (self):
'''
Return the data matrix
Returns
-------
data: array-like
The data matrix as (n_sample, n_features) casted to integers.
'''
try:
return np.asarray(self._pattern.data, dtype=int)
except AttributeError:
return None
@property
def pattern (self):
'''
Return the pattern Cython object
Returns
-------
pattern: Cython object
The cython object wrapped by the Pattern class
Notes
-----
.. warning::
We discourage the use of this property if you do not know exactly what you are doing!
'''
return self._pattern
def _check_binary (self):
'''
Check if the input data and labels satisfy the binary
requirements
'''
if not (((-1 == self.data) | (1 == self.data)).all() or ((-1 == self.labels) | (1 == self.labels)).all()):
self._pattern = None # remove the loaded object
raise ValueError('Invalid input parameters! Input variables must be +1 or -1')
def __repr__ (self):
'''
Object representation
'''
class_name = self.__class__.__qualname__
if self._pattern is not None:
return '{0}[shapes=({1:d}, {2:d})]'.format(class_name, self._pattern.Nrow, self._pattern.Ncol)
else:
return '{0}[shapes=(0, 0)]'.format(class_name)