Skip to content

Commit

Permalink
Remove caches due to excessive memory use (Trusted-AI#317)
Browse files Browse the repository at this point in the history
* Remove caches due to excessive memory use

* Validate pandas objects passed into scan
  • Loading branch information
Adebayo-Oshingbesan committed Jul 5, 2022
1 parent db843a1 commit 007b403
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 124 deletions.
8 changes: 8 additions & 0 deletions aif360/detectors/mdss/MDSS.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,14 @@ def scan(self, coordinates: pd.DataFrame, expectations: pd.Series, outcomes: pd.
"""
np.random.seed(seed)

# Reset indexes
coordinates = coordinates.reset_index(drop = True)
expectations = expectations.reset_index(drop = True)
outcomes = outcomes.reset_index(drop = True)

assert len(coordinates) == len(expectations) == len(outcomes), \
f'Lengths of coordinates, expectations, and outcomes should be equal.'

# Check that the appropriate scoring function is used

if isinstance(self.scoring_function, BerkJones):
Expand Down
25 changes: 0 additions & 25 deletions aif360/detectors/mdss/ScoringFunctions/BerkJones.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
"""
alpha = self.alpha

key = tuple([observed_sum, len(expectations), penalty, q, alpha])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter['score'] += 1
return ans

if q < alpha:
q = alpha

Expand All @@ -57,7 +51,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
)
if q == 1:
ans = observed_sum * np.log(q / alpha) - penalty
self.score_cache[key] = ans
return ans

a = observed_sum * np.log(q / alpha)
Expand All @@ -68,7 +61,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
- penalty
)

self.score_cache[key] = ans
return ans

def qmle(self, observed_sum: float, expectations: np.array):
Expand All @@ -81,24 +73,15 @@ def qmle(self, observed_sum: float, expectations: np.array):
:return: q MLE
"""
alpha = self.alpha

key = tuple([observed_sum, len(expectations), alpha])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter['qmle'] += 1
return ans

if len(expectations) == 0:
self.qmle_cache[key] = 0
return 0
else:
q = observed_sum / len(expectations)

if (q < alpha):
self.qmle_cache[key] = alpha
return alpha

self.qmle_cache[key] = q
return q

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -110,13 +93,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
:param penalty: penalty coefficient
"""
alpha = self.alpha

key = tuple([observed_sum, len(expectations), penalty, alpha])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter['qs'] += 1
return ans

q_mle = self.qmle(observed_sum, expectations)

if self.score(observed_sum, expectations, penalty, q_mle) > 0:
Expand All @@ -134,5 +110,4 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
q_max = 0

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans
32 changes: 1 addition & 31 deletions aif360/detectors/mdss/ScoringFunctions/Bernoulli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,7 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
% (observed_sum, len(expectations), penalty, q)
)

key = tuple([observed_sum, expectations.tostring(), penalty, q])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter['score'] += 1
return ans

ans = observed_sum * np.log(q) - np.log(1 - expectations + q * expectations).sum() - penalty
self.score_cache[key] = ans
return ans

def qmle(self, observed_sum: float, expectations: np.array):
Expand All @@ -50,16 +43,8 @@ def qmle(self, observed_sum: float, expectations: np.array):
:param observed_sum: sum of observed binary outcomes for all i
:param expectations: predicted outcomes for each data element i
"""
direction = self.direction

key = tuple([observed_sum, expectations.tostring()])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter['qmle'] += 1
return ans

direction = self.direction
ans = optim.bisection_q_mle(self, observed_sum, expectations, direction=direction)
self.qmle_cache[key] = ans
return ans

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -71,13 +56,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
:param penalty: penalty coefficient
"""
direction = self.direction

key = tuple([observed_sum, expectations.tostring(), penalty])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter['qs'] += 1
return ans

q_mle = self.qmle(observed_sum, expectations)

if self.score(observed_sum, expectations, penalty, q_mle) > 0:
Expand All @@ -95,7 +73,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max)

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans

def q_dscore(self, observed_sum:float, expectations:np.array, q:float):
Expand All @@ -110,12 +87,5 @@ def q_dscore(self, observed_sum:float, expectations:np.array, q:float):
:param q: current value of q
:return: q dscore/dq
"""
key = tuple([observed_sum, expectations.tostring(), q])
ans = self.qdscore_cache.get(key)
if ans is not None:
self.cache_counter['qdscore'] += 1
return ans

ans = observed_sum - (q * expectations / (1 - expectations + q * expectations)).sum()
self.qdscore_cache[key] = ans
return ans
24 changes: 1 addition & 23 deletions aif360/detectors/mdss/ScoringFunctions/Gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ def score(
:return: bias score for the current value of q
"""

key = tuple([observed_sum, expectations.sum(), penalty, q])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter["score"] += 1
return ans

assumed_var = self.var
expected_sum = expectations.sum()
penalty /= self.var
Expand All @@ -56,20 +50,13 @@ def score(
ans = 0

ans -= penalty
self.score_cache[key] = ans

return ans

def qmle(self, observed_sum: float, expectations: np.array):
"""
Computes the q which maximizes score (q_mle).
"""
key = tuple([observed_sum, expectations.sum()])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter["qmle"] += 1
return ans

expected_sum = expectations.sum()

# Deals with case where observed_sum = expected_sum = 0
Expand All @@ -78,8 +65,7 @@ def qmle(self, observed_sum: float, expectations: np.array):
else:
ans = observed_sum / expected_sum

assert np.isnan(ans) == False, f'{expected_sum}, {observed_sum}, {ans}'
self.qmle_cache[key] = ans
assert np.isnan(ans) == False, f'{expected_sum}, {observed_sum}, {ans}'
return ans

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -94,13 +80,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
direction = self.direction

q_mle = self.qmle(observed_sum, expectations)

key = tuple([observed_sum, expectations.sum(), penalty])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter["qs"] += 1
return ans

q_mle_score = self.score(observed_sum, expectations, penalty, q_mle)

if q_mle_score > 0:
Expand All @@ -118,5 +97,4 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max)

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans
29 changes: 0 additions & 29 deletions aif360/detectors/mdss/ScoringFunctions/Poisson.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,30 +32,16 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
"observed_sum=%.2f, expectations of length=%d, penalty=%.2f, q=%.2f"
% (observed_sum, len(expectations), penalty, q)
)
key = tuple([observed_sum, expectations.sum(), penalty, q])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter['score'] += 1
return ans

ans = observed_sum * np.log(q) + (expectations - q * expectations).sum() - penalty
self.score_cache[key] = ans
return ans

def qmle(self, observed_sum: float, expectations: np.array):
"""
Computes the q which maximizes score (q_mle).
"""
direction = self.direction

key = tuple([observed_sum, expectations.sum()])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter['qmle'] += 1
return ans

ans = optim.bisection_q_mle(self, observed_sum, expectations, direction=direction)
self.qmle_cache[key] = ans
return ans

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -68,15 +54,8 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
"""

direction = self.direction

q_mle = self.qmle(observed_sum, expectations)

key = tuple([observed_sum, expectations.tostring(), penalty])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter['qs'] += 1
return ans

if self.score(observed_sum, expectations, penalty, q_mle) > 0:
exist = 1
q_min = optim.bisection_q_min(self, observed_sum, expectations, penalty, q_mle)
Expand All @@ -92,7 +71,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max)

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans

def q_dscore(self, observed_sum, expectations, q):
Expand All @@ -107,12 +85,5 @@ def q_dscore(self, observed_sum, expectations, q):
:param q: current value of q
:return: q dscore/dq
"""
key = tuple([observed_sum, expectations.sum(), q])
ans = self.qdscore_cache.get(key)
if ans is not None:
self.cache_counter['qdscore'] += 1
return ans

ans = observed_sum - (q * expectations).sum()
self.qdscore_cache[key] = ans
return ans
9 changes: 0 additions & 9 deletions aif360/detectors/mdss/ScoringFunctions/ScoringFunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,11 @@ def __init__(self, **kwargs):
Journal of Computational and Graphical Statistics, 25(2), 382-404.
"""
self.kwargs = kwargs
self._reset()
self.direction = kwargs.get('direction')

directions = ['positive', 'negative']
assert self.direction in directions, f"Expected one of {directions}, got {self.direction}"

def _reset(self):
self.score_cache = {}
self.dscore_cache = {}
self.qdscore_cache = {}
self.qmle_cache = {}
self.compute_qs_cache = {}
self.cache_counter = {"score": 0, "dscore": 0, "qdscore": 0, "qmle": 0, "qs": 0}

def score(
self, observed_sum: float, expectations: np.array, penalty: float, q: float
):
Expand Down
14 changes: 7 additions & 7 deletions examples/demo_mdss_detector.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@
{
"data": {
"text/plain": [
"'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.3827815971689547'"
"'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.38278159716895366'"
]
},
"execution_count": 12,
Expand Down Expand Up @@ -379,7 +379,7 @@
{
"data": {
"text/plain": [
"'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.44470388217799317'"
"'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.4447038821779929'"
]
},
"execution_count": 14,
Expand Down Expand Up @@ -784,7 +784,7 @@
{
"data": {
"text/plain": [
"'Our detected privileged group has a size of 321, we observe 7844.840295856697 as the mean insurance costs, but our model predicts 5420.49326277455'"
"'Our detected privileged group has a size of 321, we observe 7844.8402958566985 as the mean insurance costs, but our model predicts 5420.493262774548'"
]
},
"execution_count": 28,
Expand All @@ -809,7 +809,7 @@
{
"data": {
"text/plain": [
"'Our detected privileged group has a size of 115, we observe 21148.37389617392 as the mean insurance costs, but our model predicts 29694.035319112852'"
"'Our detected privileged group has a size of 115, we observe 21148.373896173915 as the mean insurance costs, but our model predicts 29694.035319112845'"
]
},
"execution_count": 29,
Expand Down Expand Up @@ -1152,7 +1152,7 @@
{
"data": {
"text/plain": [
"'Our detected privileged group has a size of 31607, we observe 5.155584909121915 as the mean temperature, but our model predicts 11.932678437519867'"
"'Our detected privileged group has a size of 31607, we observe 5.155584909121934 as the mean temperature, but our model predicts 11.93267843751985'"
]
},
"execution_count": 42,
Expand All @@ -1176,7 +1176,7 @@
{
"data": {
"text/plain": [
"'Our detected unprivileged group has a size of 55642, we observe 16.773802762911167 as the mean temperature, but our model predicts 11.932678437519867'"
"'Our detected unprivileged group has a size of 55642, we observe 16.773802762911078 as the mean temperature, but our model predicts 11.93267843751985'"
]
},
"execution_count": 43,
Expand Down Expand Up @@ -1533,7 +1533,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.8.12"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 007b403

Please sign in to comment.