-
-
Notifications
You must be signed in to change notification settings - Fork 75
/
metrics.py
121 lines (100 loc) · 4.07 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Copyright 2022 OpenMined.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Dataclasses with Utility Analysis result metrics."""
import pipeline_dp
from dataclasses import dataclass
from typing import List, Optional
import math
@dataclass
class CountMetrics:
"""Stores metrics for the count utility analysis.
Attributes:
count: actual count of contributions per partition.
per_partition_error: the amount of error due to per-partition
contribution bounding.
expected_cross_partition_error: the expected amount of error due to
cross-partition contribution bounding.
std_cross_partition_error: the standard deviation of the error due to
cross-partition contribution bounding.
std_noise: the noise standard deviation.
noise_kind: the type of noise used.
"""
count: int
per_partition_error: int
expected_cross_partition_error: float
std_cross_partition_error: float
std_noise: float
noise_kind: pipeline_dp.NoiseKind
@dataclass
class SumMetrics:
"""Stores metrics for the sum utility analysis.
Attributes:
sum: actual sum of contributions per partition.
per_partition_error_min: the amount of error due to contribution min clipping.
per_partition_error_max: the amount of error due to contribution max clipping.
expected_cross_partition_error: the expected amount of error due to cross-partition contribution bounding.
std_cross_partition_error: the standard deviation of the error due to cross-partition contribution bounding.
std_noise: the noise standard deviation.
noise_kind: the type of noise used.
"""
sum: float
per_partition_error_min: float
per_partition_error_max: float
expected_cross_partition_error: float
std_cross_partition_error: float
std_noise: float
noise_kind: pipeline_dp.NoiseKind
@dataclass
class PrivacyIdCountMetrics:
"""Stores metrics for the privacy ID count utility analysis.
Attributes:
privacy_id_count: actual count of privacy id in a partition.
expected_cross_partition_error: the estimated amount of error across partitions.
std_cross_partition_error: the standard deviation of the contribution bounding error.
std_noise: the noise standard deviation for DP count.
noise_kind: the type of noise used.
"""
privacy_id_count: int
expected_cross_partition_error: float
std_cross_partition_error: float
std_noise: float
noise_kind: pipeline_dp.NoiseKind
@dataclass
class AggregateErrorMetrics:
"""Stores aggregate metrics for utility analysis.
All attributes in this dataclass are averages across partitions.
"""
abs_error_expected: float
abs_error_variance: float
abs_error_quantiles: List[float]
rel_error_expected: float
rel_error_variance: float
rel_error_quantiles: List[float]
# RMSE = sqrt(bias**2 + variance), more details in
# https://en.wikipedia.org/wiki/Bias-variance_tradeoff.
def absolute_rmse(self) -> float:
return math.sqrt(self.abs_error_expected**2 + self.abs_error_variance)
def relative_rmse(self) -> float:
return math.sqrt(self.rel_error_expected**2 + self.rel_error_variance)
@dataclass
class PartitionSelectionMetrics:
"""Stores aggregate metrics about partition selection."""
num_partitions: float
dropped_partitions_expected: float
dropped_partitions_variance: float
@dataclass
class AggregateMetrics:
"""Stores aggregate metrics for utility analysis."""
aggregate_error_metrics: AggregateErrorMetrics
partition_selection_metrics: Optional[PartitionSelectionMetrics] = None