In [1]:
from scipy.stats import multivariate_normal
from pandas import read_csv, DataFrame, option_context
import numpy as np
from matplotlib import pyplot

## Config Vars

In [2]:
conf_error_vects_path = 'data/error_vects.csv'
shifts = 5
threshold = 2.8237531487356081629

## Load Dataset

In [3]:
error_vects_full = read_csv(conf_error_vects_path , header=0, index_col=0)

## Seperate data

In [4]:
train = error_vects_full.shape[0] * 1 // 2
    
error_vects = error_vects_full.iloc[:-train]

error_vects_test = error_vects_full.iloc[-train:]

In [5]:
error_vects.head()

Unnamed: 0,1,2,3,4,5,anomaly
40005,0.299401,0.289433,0.304116,0.282207,0.277893,0
40006,0.197175,0.204604,0.196968,0.206062,0.192897,0
40007,0.204985,0.18778,0.195564,0.188006,0.1968,0
40008,0.25641,0.265177,0.245307,0.256989,0.248547,0
40009,0.244769,0.231876,0.239966,0.221019,0.22963,0


## Calculate Means and covariance matrix of error vects

In [6]:
means = error_vects[error_vects.columns[:-1]].mean()
covariance = error_vects[error_vects.columns[:-1]].cov()

In [7]:
np.array(error_vects_test[error_vects_test.columns[:-1]].iloc[row])

NameError: name 'row' is not defined

## Creating the Model

In [None]:
g_model = multivariate_normal(mean=means, cov=covariance)

In [None]:
error_val = DataFrame(columns = ['error'])
for row in range(error_vects_test.shape[0]):
    val = g_model.pdf(np.array(error_vects_test[error_vects_test.columns[:-1]].iloc[row]))
#     print(val*10**35)
    error_val.loc[row] = val*10**35
error_val.index = error_vects_test.index
error_val[["anomaly"]] = error_vects_test[["anomaly"]]

In [None]:
with option_context('display.float_format', '{:0.20f}'.format):
    with option_context('display.max_rows', None, 'display.max_columns', None):
        print(error_val)


In [None]:
error_val_anomaly = error_val[error_val.anomaly == 1]
error_val_non_anomaly = error_val[error_val.anomaly == 0]

In [None]:
xmarks=[i for i in range(42000,50000+1,50)]

In [None]:
pyplot.figure()
pyplot.plot(error_val_anomaly['error'], label='Anomaly')
pyplot.plot(error_val_non_anomaly['error'], label='Non Anomaly')
pyplot.xticks(xmarks)
pyplot.xticks(rotation=90, fontsize=2)
pyplot.legend()
pyplot.title("Anomaly")
pyplot.savefig('graph.png', dpi=1000)
pyplot.show()

In [None]:
def thresh():
    def func(x):
        if(x.error > threshold):
            return 1
        return 0
    return func

error_val['pred'] = error_val.apply(thresh(), axis=1)
from sklearn.metrics import precision_recall_fscore_support as score, accuracy_score
accuracy_score(error_val['anomaly'], error_val['pred'], normalize=True, sample_weight=None)

In [None]:
# from sklearn.metrics import precision_recall_fscore_support as score, accuracy_score
accuracy_score(error_val['anomaly'], error_val['pred'], normalize=True, sample_weight=None)

In [None]:
with option_context('display.float_format', '{:0.20f}'.format):
    with option_context('display.max_rows', None, 'display.max_columns', None):
        print(error_val)

In [None]:
pyplot.figure()
pyplot.plot(error_val['anomaly'], label='Real')
pyplot.plot(error_val['pred'], label='Pred')
pyplot.legend()
pyplot.title("Anomaly")
pyplot.show()

In [None]:
from sklearn.metrics import precision_recall_fscore_support as score, accuracy_score
precision, recall, fscore, support = score(error_val['anomaly'], error_val['pred'])

print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))

In [None]:
accuracy_score(error_val['anomaly'], error_val['pred'], normalize=True, sample_weight=None)