In [147]:
import numpy as np
arr = [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2032.0, 2007.0, 1985.0, 1965.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 957.0, 1099.0, 1145.0, 1143.0, 1149.0, 1152.0, 1153.0, 1158.0, 1161.0, 1163.0, 1167.0, 1173.0, 1175.0, 1178.0, 1180.0, 1185.0, 1188.0]
arr_q = [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2047.0, 2047.0]

In [150]:
def dot_products_1(q, t):
    # concatenante n zeors for Time Series, 2n-m zeros for Query
    m, n = len(q), len(t)
    t_a = np.concatenate([t, np.zeros(n)])
    # reverse the Query
    q_r = q[::-1]
    q_ra = np.concatenate([q_r, np.zeros(2 * n - m)])
    q_raf = np.fft.fft(q_ra)
    t_af = np.fft.fft(t_a)
    qt = np.fft.ifft(q_raf * t_af)
    return qt

def dot_products_2(q, t):
    # concatenante n-m zeros for Query
    m, n = len(q), len(t)
    # reverse the Query
    q_r = q[::-1]
    q_ra = np.concatenate([q_r, np.zeros(n - m)])
    q_raf = np.fft.fft(q_ra)
    t_af = np.fft.fft(t)
    qt = np.fft.ifft(q_raf * t_af)
    return qt

def mass_v2(x, y):
    #x is the data, y is the query
    n, m = len(x), len(y)

    #%compute y stats -- O(n)
    meany = np.mean(y);
    sigmay = np.std(y);

    #compute x stats -- O(n)
    #compute the average of the first m elements in 'x'
    def running_mean(x, N):
        cumsum = numpy.cumsum(numpy.insert(x, 0, np.zeros(N)))
        return (cumsum[N:] - cumsum[:-N]) / float(N)

    def running_std(x, N):
        x2 = np.power(x, 2)
        cumsum2 = numpy.cumsum(numpy.insert(x2, 0, np.zeros(N)))
        return (cumsum2[N:] - cumsum2[:-N]) / float(N) - running_mean(x, N)

    meanx = running_mean(x, n)
    sigmax = running_std(x, n)

    #The main trick of getting dot products in O(n log n) time
    z = dot_products_2(y, x)
    dist = 2*(m-(z[m-1:n]-m*meanx[m-1:n]*meany)/(sigmax[m-1:n]*sigmay))

    dist = np.sqrt(dist);
    return np.abs(dist)



print("size of Q: ", arr_q.__len__())
print("size of T: ", arr.__len__())
mass_v2(arr, arr_q)

size of Q:  19
size of T:  64


array([6.14835667, 6.14836848, 6.15389031, 6.15664938, 6.15830423,
       6.15940722, 6.16019494, 6.16078567, 6.16124599, 6.16161341,
       6.16192138, 6.16219052, 6.16242028, 6.16261305, 6.16377271,
       6.16492102, 6.16492164, 6.16492168, 6.16492171, 6.16492291,
       6.16492411, 6.16492532, 6.16492596, 6.16492717, 6.16492837,
       6.16493015, 6.16493135, 6.16493198, 6.16493318, 6.16439317,
       6.16379463, 6.16372444, 6.16373496, 6.16376546, 6.16379224,
       6.16382053, 6.16384689, 6.16387158, 6.16389663, 6.16392038,
       6.16394156, 6.16396292, 6.16398493, 6.1640062 , 6.16402598,
       6.16404472])

In [149]:
def mass_v1(q, t):
    m, n = len(q), len(t)
    # Z-normalization of Query
    q = (q-np.mean(q)) / np.std(q)
    qt = dot_products_1(q, t)
    #compute the mean and standard deviation of Time Series
    sum_q = np.sum(q)
    sum_q2 = np.sum(np.power(q,2))

    #cache a cumulative sum of values
    cum_sum_t = np.cumsum(t)
    cum_sum_t2 = np.cumsum(np.power(t,2))

    #sum of x and x square for [0, n-m] subsequences of length m
    sumt2 = cum_sum_t2[m-1:] - cum_sum_t2[:- m+1]
    sumt = cum_sum_t[m-1:] - cum_sum_t[:- m+1]
    meant = sumt / m
    #standard deviation of every subsequence of length m
    sigmat2 = (sumt2 / m) - (np.power(meant,2))
    sigmat = np.sqrt(sigmat2)

    dist = (sumt2 - 2 * sumt * meant + m * (np.power(meant,2))) / sigmat2 - 2 * (qt[m-1:n] - sum_q * meant) / sigmat + sum_q2
    dist = np.sqrt(dist)
    return np.abs(dist)
mass_v1(arr_q, arr)

array([3.44784577, 0.04684951, 2.81084486, 3.57221328, 4.02372682,
       4.33756427, 4.57537758, 4.76607701, 4.92607051, 5.06356473,
       5.19310798, 5.32470353, 5.4523899 , 5.57303375, 6.74510428,
       7.73276608, 7.73323708, 7.73323708, 7.659427  , 7.51633952,
       7.39479322, 7.28868568, 7.19356337, 7.10746815, 7.0279404 ,
       6.95388045, 6.88336695, 6.81553974, 6.75027256, 5.96844278,
       4.49064991, 3.10129587, 3.33744234, 3.87063879, 4.21708375,
       4.47778587, 4.68098459, 4.84610288, 4.98911516, 5.11326778,
       5.22038395, 5.31982002, 5.4156408 , 5.5063289 , 5.59198877,
       5.67345898])

In [153]:
import time
t1 = time.clock()
print(mass_v1(arr_q, arr))
t2 = time.clock()
print (str(t2-t1))
print(mass_v2(arr, arr_q))
t3 = time.clock()

print (str(t3-t2))


[3.44784577 0.04684951 2.81084486 3.57221328 4.02372682 4.33756427
 4.57537758 4.76607701 4.92607051 5.06356473 5.19310798 5.32470353
 5.4523899  5.57303375 6.74510428 7.73276608 7.73323708 7.73323708
 7.659427   7.51633952 7.39479322 7.28868568 7.19356337 7.10746815
 7.0279404  6.95388045 6.88336695 6.81553974 6.75027256 5.96844278
 4.49064991 3.10129587 3.33744234 3.87063879 4.21708375 4.47778587
 4.68098459 4.84610288 4.98911516 5.11326778 5.22038395 5.31982002
 5.4156408  5.5063289  5.59198877 5.67345898]
0.004516000000002407
[6.14835667 6.14836848 6.15389031 6.15664938 6.15830423 6.15940722
 6.16019494 6.16078567 6.16124599 6.16161341 6.16192138 6.16219052
 6.16242028 6.16261305 6.16377271 6.16492102 6.16492164 6.16492168
 6.16492171 6.16492291 6.16492411 6.16492532 6.16492596 6.16492717
 6.16492837 6.16493015 6.16493135 6.16493198 6.16493318 6.16439317
 6.16379463 6.16372444 6.16373496 6.16376546 6.16379224 6.16382053
 6.16384689 6.16387158 6.16389663 6.16392038 6.16394156 6.1639

In [154]:
def running_mean(x, N):
        cumsum = numpy.cumsum(numpy.insert(x, 0, np.zeros(N)))
        return (cumsum[N:] - cumsum[:-N]) / float(N)
print(arr)
running_mean(arr, 3)

[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2047.0, 2032.0, 2007.0, 1985.0, 1965.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 957.0, 1099.0, 1145.0, 1143.0, 1149.0, 1152.0, 1153.0, 1158.0, 1161.0, 1163.0, 1167.0, 1173.0, 1175.0, 1178.0, 1180.0, 1185.0, 1188.0]


array([6.66666667e-01, 1.33333333e+00, 2.00000000e+00, 2.00000000e+00,
       2.00000000e+00, 2.00000000e+00, 2.00000000e+00, 2.00000000e+00,
       2.00000000e+00, 2.00000000e+00, 2.00000000e+00, 1.66666667e+00,
       1.66666667e+00, 1.66666667e+00, 2.00000000e+00, 2.00000000e+00,
       2.00000000e+00, 2.00000000e+00, 6.83666667e+02, 1.36533333e+03,
       2.04700000e+03, 2.04700000e+03, 2.04700000e+03, 2.04700000e+03,
       2.04700000e+03, 2.04700000e+03, 2.04700000e+03, 2.04700000e+03,
       2.04200000e+03, 2.02866667e+03, 2.00800000e+03, 1.98566667e+03,
       1.31766667e+03, 6.56666667e+02, 2.33333333e+00, 2.00000000e+00,
       2.00000000e+00, 2.00000000e+00, 2.00000000e+00, 2.00000000e+00,
       2.00000000e+00, 2.00000000e+00, 2.00000000e+00, 1.66666667e+00,
       1.66666667e+00, 1.66666667e+00, 2.00000000e+00, 3.20333333e+02,
       6.86000000e+02, 1.06700000e+03, 1.12900000e+03, 1.14566667e+03,
       1.14800000e+03, 1.15133333e+03, 1.15433333e+03, 1.15733333e+03,
      

In [84]:
import os
import numpy
import collections
import pickle
import sys
import psutil as ps
import random
import csv

def load(directory, option):
        if option.lower() == 'raw data':
            dirname = "/RawData/"
            extension = ".csv"
        
        files_list = [f for f in os.listdir(directory + dirname) if f.lower().endswith(extension)]
        list_objects = []
        for file in files_list:
            path = directory + dirname + file
            an_object = numpy.genfromtxt(path)
            list_objects.extend(an_object)
        # return a list[Array_TimeSeries]
        return list_objects
        
        #path = directory + dirname + "Chlorine_TRAIN.csv"
        #an_object = pickle.load(open(path, "rb"))
        #return numpy.genfromtxt(path, dtype=numpy.str)
file = load("/Users/Jingwei/Desktop/TestDataset","raw data")
file

[array([ 1.        ,  2.6173319 ,  3.230977  ,  2.8508    ,  2.7514559 ,
         2.3456567 ,  2.2745974 ,  1.9897641 ,  1.848995  ,  1.4532902 ,
         1.3171126 ,  1.1622984 ,  0.97909615,  0.767475  , -0.27062548,
         1.494378  ,  1.4456604 ,  1.2830421 ,  1.1554104 ,  0.7314147 ,
         0.53897859,  0.26044735,  0.05832891, -0.30428806, -0.60378256,
        -0.92625117, -1.0126142 ,  1.855898  ,  1.5001129 ,  1.3892802 ,
         0.9980904 ,  0.6891325 ,  0.35188677,  0.07876438, -0.41917567,
        -0.7472362 , -1.0089426 , -1.000945  , -1.0750905 ,  0.95166232,
         1.4071658 ,  0.83867565,  0.91775112, -0.18871151, -0.76322644,
        -0.57788537, -1.0485214 , -0.68798786, -0.94220792, -1.0462616 ,
        -1.025915  ,  0.41774349,  1.337181  ,  0.96330081,  0.65598369,
        -0.14523554, -0.58696302, -0.8708398 , -0.89158561, -1.0500553 ,
        -1.058458  , -1.0933881 , -1.0150176 , -1.128092  ,  1.0929972 ,
         0.71907064,  0.46546846,  0.03024612, -0.3

In [90]:
data = [float('inf')]
if (data[0]> 1):
    data[0] = + 1
data

[1]

In [105]:
k = 10
keys = range(0, k)
dict.fromkeys(keys, 0)

{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0}