In [None]:
pip install keract

In [None]:
import numpy
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller
from tensorflow.keras.layers import Activation
from tensorflow.keras import backend as K
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras import losses
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import Callback
from keract import get_activations, persist_to_json_file, load_activations_from_json_file
period_size = 76
step_size = 38
upper_q_threshold=0.5
lower_q_threshold=0.0001
output = 'activations.json'
path='final.csv'
first_layer=[]
second_layer=[]
third_layer=[]
fourth_layer=[]
class LogThirdLayerOutput(Callback):
    def on_epoch_end(self, epoch, logs=None):
        outputs = []
        x = numpy.ones((1,2, 1))
        activations = get_activations(self.model, x)
        row=''
        for k, v in activations.items():
            print('key',k,'value', v.tolist())
            if k=='lstm':
                lstm_array = v.tolist()
                lstm_array = lstm_array[0]
                for i in range(len(lstm_array)):
                    row = row + "," + str(lstm_array[i])
                    if i == 0:
                        first_layer.append(lstm_array[i])
                    elif i == 1:
                        second_layer.append(lstm_array[i])
                    elif i == 2:
                        third_layer.append(lstm_array[i])
                    else:
                        fourth_layer.append(lstm_array[i])
        row=row+"\n"
        csv_file = open(path, 'a')
        csv_file.write(row)

        # persist the activations to the disk.

        persist_to_json_file(activations, output)

        # read them from the disk.
        activations2 = load_activations_from_json_file(output)

        # print them.
        print(list(activations.keys()))
        print(list(activations2.keys()))
        print('Dumped to {}.'.format(output))
def intersection(lst1, lst2):
    lst3=[]
    for itr in range(len(lst1)):
        if lst1[itr][0] in lst2:
            lst3.append(lst1[itr])
    return lst3

def verify_stationarity(dataset):
    is_stationary=True
    test_results = adfuller(dataset)

    print(f"ADF test statistic: {test_results[0]}")
    print(f"p-value: {test_results[1]}")
    print("Critical thresholds:")

    for key, value in test_results[4].items():
        print(f"\t{key}: {value}")
    itr = 0
    for key, value in test_results[4].items():
       print('\t%s: %.3f' % (key, value))
       if itr==0:
         critical=value
       itr=itr+1

    print('critical',critical)
    if test_results[0] > critical:
         print('non stationary')
         is_stationary=False
    return  is_stationary

def create_dataset(dataset, look_back=1, tw=3):
    dataX, dataY = [], []  # dtaset for mean
    datastdX, datastdY = [], []  # dataset for std
    datacombX, datacomY = [], []  # dataset for mean and std for third deep learning
    multi = look_back // tw
    for i in range(len(dataset) - look_back - 1):
        q50X = []
        a = dataset[i + 1:(i + look_back + 1)]
        indices = i + (multi - 1) * tw
        c = numpy.quantile(a, upper_q_threshold)
        for j in range(0, len(a), tw):
            q50 = numpy.quantile(a[j:j + tw], upper_q_threshold)
            q50X.append(q50)
        dataX.append(q50X)
        dataY.append(c)

    return numpy.array(dataX), numpy.array(dataY)
def identify_anomaly_quantiles(prediction_errors):
    anomaly_detection=[]
    for m in range(0, len(prediction_errors), period_size):
        period_prediction_errors=prediction_errors[m:m + period_size]
        upper_threshold = numpy.quantile(prediction_errors[m:m + period_size],0.9)
        lower_threshold = numpy.quantile(prediction_errors[m:m + period_size],0.1)
        for i in range(len(period_prediction_errors)):
            if ( period_prediction_errors[i]> upper_threshold) or (period_prediction_errors[i]<0 and period_prediction_errors[i]< lower_threshold):
                anomaly_detection.append(period_prediction_errors[i])

    return anomaly_detection

def identify_anomaly(prediction_errors):
    anomaly_detection=[]
    for m in range(0, len(prediction_errors), period_size):
        period_prediction_errors=prediction_errors[m:m + period_size]
        avg = numpy.average(prediction_errors[m:m + period_size])
        std1 = numpy.std(prediction_errors[m:m + period_size])
        upper_threshold=avg+1.6*std1
        lower_threshold = avg - 1.6* std1
        for i in range(len(period_prediction_errors)):
            if (period_prediction_errors[i]> upper_threshold) or ( period_prediction_errors[i]< lower_threshold):
                anomaly_detection.append(period_prediction_errors[i])

    return  anomaly_detection
if __name__ == '__main__':
    # fix random seed for reproducibility
    numpy.random.seed(7)
    # load the dataset
    dataframe = read_csv('yahoo3_train.csv', usecols=[0], engine='python') # yahoo3_train contains all the normal datapoints in yahoo3
    dataset = dataframe.values
    stationary=verify_stationarity(dataset)
    dataset = dataset.astype('float32')
    # normalize the dataset
    print('dataset', dataset)
    stationary = verify_stationarity(dataset)
    scaler = MinMaxScaler(feature_range=(0, 1))

    dataset = scaler.fit_transform(dataset)
    # split into train and test sets
    train_size = int(len(dataset) * 0.3)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]
    # reshape into X=t and Y=t+1
    look_back = period_size
    tw = step_size
    multi = look_back // tw
    trainX, trainY = create_dataset(train, look_back, tw)
    testX, testY = create_dataset(test, look_back, tw)
    print(trainX)
    # reshape input to be [samples, time steps, features]
    trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
    testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1))
    print(trainX)

    modelq10 = Sequential()
    modelq10.add(LSTM(4, input_shape=(multi, 1), activation='tanh',recurrent_activation='tanh'))
    modelq10.add(Dense(1))

    modelq10.compile(loss=losses.logcosh, optimizer='adam')

    modelq10.fit(trainX, trainY, epochs=50, batch_size=1, verbose=2, callbacks=[LogThirdLayerOutput()])

    i = 0
    j = look_back
    actual_quantile_interval = []
    steps = tw
    positive = True
    anomalies=[]
    finalres_q10 = []
    finalres_q90 = []
    dataframe = read_csv('yahoo3.csv', usecols=[0], engine='python')
    dataset = dataframe.values
    dataset = scaler.fit_transform(dataset)
    ts = dataset
    ts_accumulate=[]
    comparison_dataset=[]
    while j <= len(dataset):
        q50_array = []


        temp = dataset[i:j]
        actual_quantile_interval.append(
            numpy.absolute(numpy.quantile(dataset[i + 1:j + 1], lower_q_threshold) - numpy.quantile(dataset[i + 1:j + 1], upper_q_threshold)))
        print('print here', temp)

        for m in range(0, len(temp), steps):
            q50array = []
            q50 = numpy.quantile(temp[m:m + steps], upper_q_threshold)
            q50array.append(q50)
            q50_array.append(q50array)

        final_q50_array = []
        final_q50_array.append(q50_array)
        print('final_q10_array', final_q50_array)
        q50_predict = modelq10.predict(final_q50_array)
        print('q50_predict', q50_predict)

        if j+1 < len(dataset) :

            diff=q50_predict-dataset[j+1]
            print('data',dataset[j+1],'diff',diff)
            anomalies.append(diff)
            comparison_dataset.append(dataset[j+1])
        j = j + 1
        i = i + 1

    anomalies_array=[]

    for h in range(len(anomalies)):
        internal = anomalies[h]
        internal_array = []
        anomalies_array.append(internal[0])
    anomalies_array = scaler.inverse_transform(anomalies_array)
    comparison_dataset=scaler.inverse_transform(comparison_dataset)
    print(anomalies_array)
    for itr in range(len(anomalies_array)):
        print('data',comparison_dataset[itr],'diff',anomalies_array[itr])

    anomalies = identify_anomaly(anomalies_array)
    print(anomalies)
