-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_evaluation.py
225 lines (185 loc) · 7.28 KB
/
test_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
"""Unit tests for the evaluation module."""
from unittest.mock import patch
import numpy as np
import pytest
from clusx.evaluation import ClusterEvaluator
@pytest.fixture
def embeddings_fx():
"""Return sample embeddings for testing."""
return np.array(
[
[1.0, 0.0, 0.0], # Cluster 0
[0.9, 0.1, 0.0], # Cluster 0
[0.0, 1.0, 0.0], # Cluster 1
[0.1, 0.9, 0.0], # Cluster 1
[0.0, 0.0, 1.0], # Cluster 2 (singleton)
]
)
@pytest.fixture
def texts_fx():
"""Return sample texts for testing."""
return [
"Text in cluster 0",
"Another text in cluster 0",
"Text in cluster 1",
"Another text in cluster 1",
"Singleton text in cluster 2",
]
@pytest.fixture
def cluster_assignments_fx():
"""Return sample cluster assignments for testing."""
return [0, 0, 1, 1, 2]
@pytest.fixture
def evaluator_fx(texts_fx, embeddings_fx, cluster_assignments_fx):
"""Return a ClusterEvaluator instance with sample data."""
return ClusterEvaluator(
texts=texts_fx,
embeddings=embeddings_fx,
cluster_assignments=cluster_assignments_fx,
model_name="TestModel",
alpha=1.0,
sigma=0.0,
kappa=0.0,
random_state=42,
)
def test_silhouette_score_with_valid_clusters(evaluator_fx):
"""Test silhouette score calculation with valid clusters."""
with patch(
"clusx.evaluation.silhouette_score", return_value=0.75
) as silhouette_mock:
score = evaluator_fx.calculate_silhouette_score()
assert score == 0.75
# Verify silhouette_score was called with filtered data
# (excluding singleton cluster)
args, kwargs = silhouette_mock.call_args
filtered_embeddings, filtered_assignments = args
# Should only include clusters 0 and 1 (4 samples total)
assert len(filtered_embeddings) == 4
assert len(filtered_assignments) == 4
assert set(filtered_assignments) == {0, 1}
assert kwargs["metric"] == "cosine"
def test_silhouette_score_with_only_singletons():
"""Test silhouette score calculation when all clusters are singletons."""
texts = ["Text 1", "Text 2", "Text 3"]
embeddings = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
cluster_assignments = [0, 1, 2] # Each text in its own cluster
evaluator = ClusterEvaluator(
texts=texts,
embeddings=embeddings,
cluster_assignments=cluster_assignments,
model_name="SingletonModel",
alpha=1.0,
sigma=0.0,
kappa=0.0,
)
# Should return 0.0 as there are no valid clusters with ≥2 samples
with patch("clusx.evaluation.logger.warning") as warning_mock:
score = evaluator.calculate_silhouette_score()
assert score == 0.0
warning_mock.assert_called_once()
assert "fewer than 2 valid clusters found" in warning_mock.call_args[0][0]
def test_silhouette_score_with_fewer_than_two_clusters():
"""Test silhouette score calculation with fewer than two clusters."""
texts = ["Text 1", "Text 2", "Text 3"]
embeddings = np.array([[1.0, 0.0, 0.0], [0.9, 0.1, 0.0], [0.8, 0.2, 0.0]])
cluster_assignments = [0, 0, 0] # All texts in the same cluster
evaluator = ClusterEvaluator(
texts=texts,
embeddings=embeddings,
cluster_assignments=cluster_assignments,
model_name="SingleClusterModel",
alpha=1.0,
sigma=0.0,
kappa=0.0,
)
# Should return 0.0 as there's only one cluster
with patch("clusx.evaluation.logger.warning") as warning_mock:
score = evaluator.calculate_silhouette_score()
assert score == 0.0
warning_mock.assert_called_once()
assert "fewer than 2 valid clusters found" in warning_mock.call_args[0][0]
def test_silhouette_score_with_exception():
"""Test silhouette score calculation when an exception occurs."""
texts = ["Text 1", "Text 2", "Text 3", "Text 4"]
embeddings = np.array([[1.0, 0.0], [0.9, 0.1], [0.1, 0.9], [0.0, 1.0]])
cluster_assignments = [0, 0, 1, 1]
evaluator = ClusterEvaluator(
texts=texts,
embeddings=embeddings,
cluster_assignments=cluster_assignments,
model_name="ExceptionModel",
alpha=1.0,
sigma=0.0,
kappa=0.0,
)
with patch(
"clusx.evaluation.silhouette_score", side_effect=ValueError("Test error")
):
with patch("clusx.evaluation.logger.error") as error_mock:
score = evaluator.calculate_silhouette_score()
assert score == 0.0
error_mock.assert_called_once()
assert "Error calculating silhouette score" in error_mock.call_args[0][0]
def test_silhouette_score_with_mixed_valid_and_singleton():
"""Test silhouette score with a mix of valid clusters and singletons."""
texts = ["Text 1", "Text 2", "Text 3", "Text 4", "Text 5", "Text 6"]
embeddings = np.array(
[
[1.0, 0.0, 0.0], # Cluster 0
[0.9, 0.1, 0.0], # Cluster 0
[0.0, 1.0, 0.0], # Cluster 1
[0.1, 0.9, 0.0], # Cluster 1
[0.0, 0.0, 1.0], # Cluster 2 (singleton)
[0.5, 0.5, 0.0], # Cluster 3 (singleton)
]
)
cluster_assignments = [0, 0, 1, 1, 2, 3]
evaluator = ClusterEvaluator(
texts=texts,
embeddings=embeddings,
cluster_assignments=cluster_assignments,
model_name="MixedModel",
alpha=1.0,
sigma=0.0,
kappa=0.0,
)
with patch(
"clusx.evaluation.silhouette_score", return_value=0.8
) as silhouette_mock:
score = evaluator.calculate_silhouette_score()
# Verify the score matches our mock
assert score == 0.8
# Verify silhouette_score was called with filtered data
# (excluding singleton clusters)
args, _ = silhouette_mock.call_args
filtered_embeddings, filtered_assignments = args
# Should only include clusters 0 and 1 (4 samples total)
assert len(filtered_embeddings) == 4
assert len(filtered_assignments) == 4
assert set(filtered_assignments) == {0, 1}
def test_silhouette_score_logging(evaluator_fx):
"""Test that silhouette score calculation logs appropriate information."""
with patch("clusx.evaluation.silhouette_score", return_value=0.75):
with patch("clusx.evaluation.logger.info") as info_mock:
evaluator_fx.calculate_silhouette_score()
info_mock.assert_called_once()
expected = (
"Silhouette score for %s: %.4f (using %d/%d samples in %d/%d clusters)"
)
actual, *args = info_mock.call_args[0]
assert actual == expected
# Check the arguments passed to the logger
(
model_name,
score,
valid_samples,
total_samples,
valid_clusters,
total_clusters,
) = args
assert model_name == "TestModel"
assert score == 0.75
assert valid_samples == 4 # 4 samples in valid clusters
assert total_samples == 5 # 5 total samples
assert valid_clusters == 2 # 2 valid clusters
assert total_clusters == 3 # 3 total clusters