Skip to content
Permalink
Browse files

Improves code reading using PEP8

  • Loading branch information...
nicolasmota committed Jan 30, 2018
1 parent 42d2733 commit 61831f1b169ca0eb464106de21688cc0a6529a46
Showing with 44 additions and 23 deletions.
  1. +19 −11 dsio/anomaly_detectors.py
  2. +2 −0 dsio/exceptions.py
  3. +10 −8 dsio/generate_data.py
  4. +13 −4 dsio/update_formulae.py
@@ -7,7 +7,11 @@
from scipy.stats import norm
from collections import namedtuple
from dsio.update_formulae import update_effective_sample_size
from dsio.update_formulae import convex_combination, rolling_window_update, decision_rule
from dsio.update_formulae import (
convex_combination,
rolling_window_update,
decision_rule
)

from sklearn.base import BaseEstimator

@@ -16,7 +20,10 @@


class AnomalyMixin(object):
"""Mixin class for all anomaly detectors, compatible with BaseEstimator from scikit-learn."""
"""
Mixin class for all anomaly detectors,
compatible with BaseEstimator from scikit-learn.
"""
_estimator_type = "anomaly"

def fit_score(self, X):
@@ -57,14 +64,15 @@ def compute_confusion_matrix(detector_output, index_anomalies):
false_anomalies = index_detected.difference(index_true)
return {
'TPR': len(true_anomalies)/(1.0*len(index_anomalies)),
'FPR': len(false_anomalies)/(1.0*len(detector_output))}
'FPR': len(false_anomalies)/(1.0*len(detector_output))
}


class Gaussian1D(BaseEstimator, AnomalyMixin):
def __init__(
self,
ff=1.0,
threshold=THRESHOLD
self,
ff=1.0,
threshold=THRESHOLD
):
self.ff = ff
self.threshold = threshold
@@ -108,10 +116,10 @@ def flag_anomaly(self, x):
class Percentile1D(BaseEstimator, AnomalyMixin):

def __init__(
self,
ff=1.0,
window_size = 300,
threshold= THRESHOLD
self,
ff=1.0,
window_size=300,
threshold=THRESHOLD
):
self.ff = ff
self.window_size = window_size
@@ -122,7 +130,7 @@ def fit(self, x):
x = pd.Series(x)
self.__setattr__('sample_', x[:int(np.floor(self.window_size))])

def update(self, x): # allows mini-batch
def update(self, x): # allows mini-batch
x = pd.Series(x)
window = rolling_window_update(
old=self.sample_, new=x,
@@ -54,10 +54,12 @@ class SensorsNotFoundError(DsioError):
msg = "Selected sensors not found in data"
code = 4


class ElasticsearchConnectionError(DsioError):
msg = "Cannot connect to Elasticsearch"
code = 5


class KibanaConfigNotFoundError(DsioError):
msg = "Kibana config index not found in Elasticsearch"
code = 6
@@ -7,10 +7,10 @@


def gen_data_with_obvious_anomalies(
n=1000,
anomalies=10,
sigmas=5.0,
filename=None
n=1000,
anomalies=10,
sigmas=5.0,
filename=None
):
"""
:param n: number of total samples, including anomalies, defaults to 1000
@@ -25,11 +25,13 @@ def gen_data_with_obvious_anomalies(

# we shift by 5 sigmas (or whatever the user specified) in the direction of the datapoint
# multiplying could end up with a normal value if the original value is small enough
x[index_of_anomalies] = x[index_of_anomalies] + np.sign(x[index_of_anomalies]) * sigmas
x[index_of_anomalies] = (
x[index_of_anomalies] + np.sign(x[index_of_anomalies]) * sigmas
)
if filename:
pd.DataFrame(data=x, columns=['simulated_data']).to_csv(filename, index=False)
pd.DataFrame(
data=x, columns=['simulated_data']
).to_csv(filename, index=False)
return None
else:
return x, index_of_anomalies


@@ -12,7 +12,7 @@
import numpy as np


def convex_combination(a,b,weight):
def convex_combination(a, b, weight):
"""
:param a: one summand (e.g., partial sum)
@@ -27,7 +27,11 @@ def convex_combination(a,b,weight):
return (1-weight) * a + weight * b


def update_effective_sample_size(effective_sample_size, batch_size, forgetting_factor):
def update_effective_sample_size(
effective_sample_size,
batch_size,
forgetting_factor
):
"""
:param effective_sample_size:
@@ -39,8 +43,13 @@ def update_effective_sample_size(effective_sample_size, batch_size, forgetting_f
(2.0, 1.0)
"""
updated_sample_size = effective_sample_size * forgetting_factor + batch_size
weight = 1 - (effective_sample_size*1.0 - batch_size)/(effective_sample_size*1.0)
updated_sample_size = (
effective_sample_size * forgetting_factor + batch_size
)
weight = 1 - (
(effective_sample_size*1.0 - batch_size) /
(effective_sample_size*1.0)
)
return updated_sample_size, weight


0 comments on commit 61831f1

Please sign in to comment.
You can’t perform that action at this time.