In [142]:
import json


In [143]:
#data = [datum["data"] for datum in data]
def smooth(x,window_len=11,window='flat'):
    """smooth the data using a window with requested size.
    
    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.
    
    input:
        x: the input signal 
        window_len: the dimension of the smoothing window; should be an odd integer
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

    output:
        the smoothed signal
        
    example:

    t=linspace(-2,2,0.1)
    x=sin(t)+randn(len(t))*0.1
    y=smooth(x)
    
    see also: 
    
    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    scipy.signal.lfilter
 
    TODO: the window parameter could be the window itself if an array instead of a string
    NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
    """

    if x.ndim != 1:
        raise ValueError("smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise ValueError("Input vector needs to be bigger than window size.")


    if window_len<3:
        return x


    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError("Window is one of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")


    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='valid')
    return y

In [170]:
from scipy.signal import find_peaks, peak_prominences, peak_widths
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
import rrcf
from statistics import stdev, mean

for i in range(900, 1000, 100):
    data = []
    with open(f"../data/slow_down-{i}.data.jsonl") as f:
        for line in f.readlines():
            data.append(json.loads(line))

    data = sorted(data, key=lambda x: x["timestamp"])
    from collections import defaultdict
    sensorDict = defaultdict(list)
    for d in data:
        sensorDict[d['device']].append(d['data'])

    count = 0
    check = False
    for sensor in sensorDict:
        
        x = np.array(sensorDict[sensor])
        x = smooth(x)
        peaks, properties = find_peaks(x, prominence=0.1, width=1)
#         print(peaks, properties)
        combined = np.vstack((properties["widths"], properties["prominences"])).T
        #print(properties["prominences"])
        #break
        if check == False:
#             print(combined)
            check = True
#         plt.figure(figsize=(10,5))
#         plt.plot(x)
#         plt.plot(peaks, x[peaks], "x")
#         plt.vlines(x=peaks, ymin=x[peaks] - properties["prominences"], ymax = x[peaks], color = "C1")
#         plt.hlines(y=properties["width_heights"], xmin=properties["left_ips"], xmax=properties["right_ips"], color = "C1")
#         plt.show()
#         break


        #print(peaks, properties)


        # Set tree parameters
        num_trees = 50
        shingle_size = 1
        tree_size = 256
        codisp_threshold = 10
        stdev_threshold = 3

        # Create a forest of empty trees
        forest = []
        for _ in range(num_trees):
            tree = rrcf.RCTree()
            forest.append(tree)

        # Use the "shingle" generator to create rolling window
        #points = rrcf.shingle(properties["prominences"][1:], shingle_size)
        points = rrcf.shingle(x, shingle_size)

        # Create a dict to store anomaly score of each point
        avg_codisp = {}

        # For each shingle...
        for index, point in enumerate(points):
            # For each tree in the forest...
            for tree in forest:
                # If tree is above permitted size...
                if len(tree.leaves) > tree_size:
                    # Drop the oldest point (FIFO)
                    tree.forget_point(index - tree_size)
                # Insert the new point into the tree
                tree.insert_point(point, index=index)
                # Compute codisp on the new point...
                new_codisp = tree.codisp(index)
                # And take the average over all trees
                if not index in avg_codisp:
                    avg_codisp[index] = 0
                avg_codisp[index] += new_codisp / num_trees
            #print(avg_codisp[index], point)
            if avg_codisp[index] > codisp_threshold:
                #print("Anomaly detected!: ", avg_codisp[index], codisp_threshold, point)
#                 if peaks[index + 1] >= i - 20:
#                     count += 1
#                     break
                if index >= i - 20:
                    count += 1
                    break
                else:
                    print("False positive via threshold")
            elif index > 2 and mean(avg_codisp.values()) + stdev(avg_codisp.values()) * stdev_threshold < avg_codisp[index]:
                #print("Anomaly detected via stdev!:", index, point, peaks[index + 1])
#                 if peaks[index + 1] >= i - 20:
#                     count += 1
#                     break
                if index >= i - 20:
                    count += 1
                    break
                else:
                    print("False positive via std")
#             break
        #break
#             elif peaks[index + 1] >= i - 20:
#                 print(point, peaks[index + 1], combined)


    print(count)

False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive

False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive via threshold
False positive

KeyboardInterrupt: 

In [83]:
import numpy as np

In [None]:
%matplotlib notebook

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(np.array(data))

In [None]:
data[199:]