-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathtest_datautils.py
124 lines (95 loc) · 3.7 KB
/
test_datautils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# pylint: disable=import-error, wrong-import-position, wrong-import-order, invalid-name
"""Data utils test suite"""
from common import *
from pytest import approx
import random
from trustyai.utils import DataUtils
from trustyai.model import FeatureFactory
from java.util import Random
jrandom = Random()
def test_get_mean():
"""Test GetMean"""
data = [2, 4, 3, 5, 1]
assert DataUtils.getMean(data) == approx(3, 1e-6)
def test_get_std_dev():
"""Test GetStdDev"""
data = [2, 4, 3, 5, 1]
assert DataUtils.getStdDev(data, 3) == approx(1.41, 1e-2)
def test_gaussian_kernel():
"""Test Gaussian Kernel"""
x = 0.0
k = DataUtils.gaussianKernel(x, 0, 1)
assert k == approx(0.398, 1e-2)
x = 0.218
k = DataUtils.gaussianKernel(x, 0, 1)
assert k == approx(0.389, 1e-2)
def test_euclidean_distance():
"""Test Euclidean distance"""
x = [1, 1]
y = [2, 3]
distance = DataUtils.euclideanDistance(x, y)
assert approx(distance, 1e-3) == 2.236
def test_hamming_distance_double():
"""Test Hamming distance for doubles"""
x = [2, 1]
y = [2, 3]
distance = DataUtils.hammingDistance(x, y)
assert distance == approx(1, 1e-1)
def test_hamming_distance_string():
"""Test Hamming distance for strings"""
x = "test1"
y = "test2"
distance = DataUtils.hammingDistance(x, y)
assert distance == approx(1, 1e-1)
def test_doubles_to_features():
"""Test doubles to features"""
inputs = [1 if i % 2 == 0 else 0 for i in range(10)]
features = DataUtils.doublesToFeatures(inputs)
assert features is not None
assert len(features) == 10
for f in features:
assert f is not None
assert f.getName() is not None
assert f.getValue() is not None
def test_exponential_smoothing_kernel():
"""Test exponential smoothing kernel"""
x = 0.218
k = DataUtils.exponentialSmoothingKernel(x, 2)
assert k == approx(0.994, 1e-3)
# def test_perturb_features_empty():
# """Test perturb empty features"""
# features = []
# perturbationContext = PerturbationContext(jrandom, 0)
# newFeatures = DataUtils.perturbFeatures(features, perturbationContext)
# assert newFeatures is not None
# assert len(features) == newFeatures.size()
def test_random_distribution_generation():
"""Test random distribution generation"""
dataDistribution = DataUtils.generateRandomDataDistribution(10, 10, jrandom)
assert dataDistribution is not None
assert dataDistribution.asFeatureDistributions() is not None
for featureDistribution in dataDistribution.asFeatureDistributions():
assert featureDistribution is not None
def test_linearized_numeric_features():
"""Test linearised numeric features"""
f = FeatureFactory.newNumericalFeature("f-num", 1.0)
features = [f]
linearizedFeatures = DataUtils.getLinearizedFeatures(features)
assert len(features) == linearizedFeatures.size()
def test_sample_with_replacement():
"""Test sample with replacement"""
emptyValues = []
emptySamples = DataUtils.sampleWithReplacement(emptyValues, 1, jrandom)
assert emptySamples is not None
assert emptySamples.size() == 0
values = DataUtils.generateData(0, 1, 100, jrandom)
sampleSize = 10
samples = DataUtils.sampleWithReplacement(values, sampleSize, jrandom)
assert samples is not None
assert samples.size() == sampleSize
assert samples[random.randint(0, sampleSize - 1)] in values
largerSampleSize = 300
largerSamples = DataUtils.sampleWithReplacement(values, largerSampleSize, jrandom)
assert largerSamples is not None
assert largerSampleSize == largerSamples.size()
assert largerSamples[random.randint(0, largerSampleSize - 1)] in largerSamples