forked from biocore/qiime
-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_fizzy.py
162 lines (137 loc) · 6.28 KB
/
test_fizzy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python
from __future__ import division
import qiime.fizzy as fizzy
from unittest import TestCase, main
import StringIO
import numpy as np
__author__ = ["Gregory Ditzler", "Calvin Morrision"]
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Gregory Ditzler", "Calvin Morrison", "Gail Rosen"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "Gregory Ditzler"
__email__ = "gregory.ditzler@gmail.com"
__status__ = "Development"
class FizzyTests(TestCase):
def setUp(self):
self.biom_file_handle = StringIO.StringIO(biom_file_string)
self.map_file_handle = StringIO.StringIO(map_file_string)
def uniform_data(self, n_observations, n_features, n_select):
"""
Generate some uniform data to use for test_pyfeast_run
"""
xmax = 10
xmin = 1
data = 1.0*np.random.randint(xmax + 1, size = (n_features, n_observations))
labels = np.zeros(n_observations)
delta = n_select * (xmax - xmin) / 2.0
for m in range(n_observations):
zz = 0.0
for k in range(n_select):
zz += data[k, m]
if zz > delta:
labels[m] = 1
else:
labels[m] = 2
data = data.transpose()
return data, labels
def test_get_fs_methods(self):
"""
This test is going to make sure that we are only implementing feature
selection methods that are in a predefined list.
"""
self.assertEqual(fizzy.get_fs_methods(), information_theoretic_methods)
def test_parse_biom(self):
"""
This test is going to ensure that we can properly parse our biom file
"""
correct_biom = (np.array([[ 1., 0., 1., 6.], [ 0., 5., 1., 6.],
[ 0., 7., 1., 6.]]), [u'OTU0', u'OTU1', u'OTU2', u'OTU3'],
[u'ID0', u'ID1', u'ID2'])
parsed_biom = fizzy.parse_biom(self.biom_file_handle)
n = 0
for parsed_val, correct_val in map(None, parsed_biom, correct_biom):
if n != 0:
self.assertEqual(parsed_val, correct_val)
else:
self.assertEqual(parsed_val.tolist(), correct_val.tolist())
def test_parse_map_file(self):
"""
This test ensures that the map file is properly parsed
"""
correct_map = [0.,1.,1.]
parsed_map = fizzy.parse_map_file(self.map_file_handle, "Class",
[u'ID0',u'ID1',u'ID2'])
self.assertEqual(parsed_map.tolist(), correct_map)
def test_parse_map_invalid_column_name(self):
"""
Test the functionality of the map parser.
"""
self.assertRaises(ValueError, fizzy.parse_map_file, self.map_file_handle,
"Unknown", [u'ID0',u'ID1',u'ID2'])
def test_parse_map_invalid_sample_name(self):
"""
Test with invalid sample names
"""
self.assertRaises(ValueError, fizzy.parse_map_file, self.map_file_handle,
"Class", [u'ID99',u'ID1',u'ID2'])
def test_parse_map_invalid_equal_num_sample_classes(self):
"""
Test with an invalid number of samples
"""
self.assertRaises(ValueError, fizzy.parse_map_file, self.map_file_handle,
"Class", [u'ID0',u'ID1'])
def test_run_pyfeast(self):
"""
Assert that each of the selected features are in the appropriate range
"""
n_select = 5
(data, labels) = self.uniform_data(1000, 50, n_select)
variable_names = range(len(labels))
selected_features = fizzy.run_pyfeast(data, labels, variable_names,
"MIM", n_select)
for k in selected_features:
self.assertTrue(k in range(n_select))
def test_run_pyfeast_invalid(self):
"""
Test the run_pyfeast function with an invalid algorithm name
"""
self.assertRaises(AttributeError, fizzy.run_pyfeast, None, None,
None, "An Invalid Name", None)
def test_run_feature_selection(self):
"""
Test the feature selection on a toy problem
"""
column_name = "Class"
selected_features = fizzy.run_feature_selection(self.biom_file_handle,
self.map_file_handle, column_name, n_select=2)
self.assertEqual(selected_features, ['OTU0', "OTU1"])
def test_zero_selection(self):
"""
Assert that fizzy throws an error for zero features being selected
"""
(data, labels) = self.uniform_data(500, 50, 10)
variable_names = range(len(labels))
self.assertRaises(ValueError, fizzy.run_pyfeast, data, labels,
variable_names, "MIM", 0)
def test_negative_selection(self):
"""
Assert that fizzy throws an error for negative features being selected
"""
(data, labels) = self.uniform_data(500, 50, 10)
variable_names = range(len(labels))
self.assertRaises(ValueError, fizzy.run_pyfeast, data, labels,
variable_names, "MIM", -1)
def test_too_large_selection(self):
"""
Assert that fizzy throws an error for requesting more features than exist.
"""
(data, labels) = self.uniform_data(500, 50, 10)
variable_names = range(len(labels))
self.assertRaises(ValueError, fizzy.run_pyfeast, data, labels,
variable_names, "MIM", 10000)
information_theoretic_methods = ['CIFE','CMIM','CondMI', 'Condred','ICAP','JMI','MIM','MIFS','mRMR']
map_file_string = """#SampleID\tClass\nID0\tS1\nID1\tS2\nID2\tS2\n"""
biom_file_string = """{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "BIOM-Format 1.1.2","date":"2013-03-27T13:59:38.949014","matrix_type": "sparse","matrix_element_type":"float","shape": [4, 3], "data": [[0,0,1.0],[1,1,5.0],[1,2,7.0],[2,0,1.0],[2,1,1.0],[2,2,1.0],[3,0,6.0],[3,1,6.0],[3,2,6.0]],"rows":[{"id": "OTU0", "metadata": null},{"id": "OTU1", "metadata": null},{"id": "OTU2", "metadata": null},{"id": "OTU3", "metadata": null}],"columns": [{"id": "ID0", "metadata": null},{"id": "ID1", "metadata": null},{"id": "ID2", "metadata": null}]}"""
if __name__ == "__main__":
main()