# Experiment on Different Time-series Similarity Measures 1

### Loading

In [1]:
# load image and libraries
%matplotlib inline
import pandas as pd
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
from itertools import groupby
from scipy import signal
from sklearn import preprocessing

root_path = './processed_datasets/'
datasets = ['has', 'sp', 'fp', 'rb', 'sd', 'sr', 'hasb', 'ihas']

# load ground truth
ori_data_X = []
ori_data_y = []
sketch_X = []
for dataset in datasets:
    file_name = root_path + 'original_' + dataset  
    ori_data_X.append(np.load(file_name + '_X' + '.npy'))
    ori_data_y.append(np.load(file_name + '_y' + '.npy'))
    file_name = root_path + 'sketch_' + dataset + '.npy'
    sketch_X.append(np.load(file_name, allow_pickle=True)[:100])
print(f"number of loaded samples per class: {[len(x) for x in sketch_X]}")
print(f"Original data: {len(ori_data_X)} datasets")
print(f"Sketch data: {len(sketch_X)} datasets")

number of loaded samples per class: [100, 100, 100, 100, 100, 100, 100, 100]
Original data: 8 datasets
Sketch data: 8 datasets


### Sliding Window

In [2]:
def sliding_window(ori_series, clip_series, measure):
    """
    Compute the similarities of the original and the clipped series using sliding window
    input: original time series, clipped series, similarity measure function
    output: similarity_distribution, matching result, i.e., starting and ending points
    """
    ori_len = ori_series.shape[0]
    clip_len = clip_series.shape[0]
    if ori_len < clip_len:
        return None

    # compute the similarity between the original and the clipped series
    dist = []
    # compute the similarity between the original and the clipped series using sliding window
    for i in range(ori_len - clip_len + 1):
        dist.append(measure(ori_series[i:i+clip_len], clip_series))
    # find the maximum similarity and the corresponding starting and ending points
    min_idx = np.argmin(dist)
    return dist, [min_idx, min_idx + clip_len - 1]


In [3]:
def pointwise_exp(ori_data_X, ori_data_y, sketch_X, measure):
    results = []
    for i, dataset in enumerate(datasets):
        original = ori_data_X[i]
        label = ori_data_y[i]
        dummy_record = []
        for sample in sketch_X[i]:
            clip =  signal.resample(sample, label[1]-label[0]+1)
            sim_dist, pred_loc = sliding_window(original, clip, measure)
            dummy_record.append([sim_dist, pred_loc])
        results.append(dummy_record)
    return results

### Experiment 1.1: Euclidean Distance

#### Define measure

In [4]:
def euclidean_distance(x,y):
    t = preprocessing.Normalizer()
    x = np.expand_dims(x, axis=0)
    y = np.expand_dims(y, axis=0)
    return np.linalg.norm(t.transform(x) - t.transform(y))

#### Get results

In [6]:
results_eu = pointwise_exp(ori_data_X, ori_data_y, sketch_X, euclidean_distance)

KeyboardInterrupt: 

#### Anlysis

In [None]:
for i in range(len(results_eu)):
    print(f"Dataset: {datasets[i]}")
    print(f"Number of samples: {len(results_eu[i])}")
    print(f"Average distance: {np.mean([np.max(x[0]) for x in results_eu[i]])}")
    print(f"Average location error w.r.t. segment length: {np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ (ori_data_y[i][1]-ori_data_y[i][0]+1)*100 for x in results_eu[i]])}%")
print(f'Average distance: {np.mean([np.mean([np.max(x[0]) for x in results_eu[i]]) for i in range(len(results_eu))])}')
print('--------------------Overall:------------------------------------')
print(f'Average location error w.r.t. segment length: {np.mean([np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ (ori_data_y[i][1]-ori_data_y[i][0]+1)*100 for x in results_eu[i]]) for i in range(len(results_eu))])}%')
print(f'Average location error w.r.t. total length: {np.mean([np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ ori_data_X[i].shape[0]*100 for x in results_eu[i]]) for i in range(len(results_eu))])}%')

for i in range(len(results_eu)):
    plt.figure()
    plt.title(f"Dataset: {datasets[i]}")
    plt.xlabel("Time")
    plt.ylabel("Similarity")
    plt.plot(results_eu[i][0][0])
    plt.plot(results_eu[i][0][1], [0.5, 0.5], marker='*', ls='none')
    plt.plot(ori_data_y[i], [0.5, 0.5], marker='o', color='r', ls='none')
    plt.show()


In [2]:
#Smoothing
import pandas as pd
def smoother(series,smoothing):
    series_df = pd.DataFrame(series,columns=['Data'])
    return series_df.ewm(smoothing).mean().to_numpy() 
# test = np.array([1, 2, 3, 4,2,5,2,232,323,23,2,3,23,2,3])
# op = smoother(test,0.5)
# print(op)

In [52]:
#Qetch Algorithm -- inclomplete --
def width(series):
    # Should return width of series -  Size of a 1D array is the same as the length.
    return series.size
    
def height(series):
    #Finds Height of time series based difference in max and minimum values.
    h = np.max(series) - np.min(series) 
    return h


def heightGlobal(series):
    hmax = 0
    hmin = 999
    for i in series:
        hmax = max(np.max(i),hmax)
        hmin = min(np.min(i),hmin)

    h = hmax - hmin
    return h

def widthGlobal(series):
    return series.size

In [4]:
def Split_Correcter(split_arr,h_threshold):
    # Checks if the height is less than 1% of total height and mergers small segments.
    corr_split = []
    p = 0
    buff = []
    split_at = []
    counter = 0
    for i in split_arr:
        if(len(i)==1 or (height(i)<h_threshold)):
            buff.append(i)
        else:
            if(len(buff)>0):
                buff.append(i)
                temp = np.concatenate(buff)
                corr_split.append(temp)

                split_at.append(counter) #Starting position of segment is noted
                counter+=temp.size
                buff = []
            else:
                split_at.append(counter) #Starting position of segment is noted
                counter+=i.size
                corr_split.append(i)

    split_at.append(counter) # Adding the end position of the last segment

    # print("Final",corr_split[:5],"Number of segments:",len(corr_split))
    return corr_split,len(corr_split),split_at

In [5]:
def split_based_derivative(series):

    h_threshold = 0.01 * height(series)
    diff_arr = np.diff(series)
    sign_arr = np.sign(diff_arr)
    p = 0
    split_indices = []
    split_at = []
    for i in range(0,len(sign_arr)):
        if(i==0):
            p = sign_arr[i]
        else:
            if((sign_arr[i] == 0) or (sign_arr[i]==1)):
                if(p==-1):
                    split_indices.append(i)
                    p = sign_arr[i]
            elif((sign_arr[i] == -1) and ((p==1) or (p==0))):
                split_indices.append(i)
                p = sign_arr[i]
                
    # print(series[:10],diff_arr[:10])
    split_arr = np.split(series, split_indices, axis=0)
    # print(len(split_arr))
    # print(len(series))
    # print(series[:10])
    # print(diff_arr[:10])
    # print(sign_arr[:10])
    # print(split_indices)
    # print(split_arr[:3])

    #print("Before Split Correcter")
    corrected_split,k,split_at = Split_Correcter(split_arr,h_threshold)
    return corrected_split,k,split_at


In [6]:
def get_LDE(sketch_split,Candidate_split,Gx,Gy):
    Rx = width(Candidate_split)/(Gx * width(sketch_split))    
    Ry = height(Candidate_split)/(Gy * height(sketch_split))  
    return (np.log(Rx)**2)+(np.log(Ry)**2)

# from scipy.spatial.distance import cityblock
# print(cityblock(x1, x2))

def get_ShapeError(sketch_split,candidate_split,Gy):

    Ni = min(candidate_split.size,sketch_split.size)
    Sum_of_Shape = 0 
    #print("Candidate split", candidate_split,"sketch split", sketch_split, "NI",Ni,"size:",sketch_split.size,candidate_split.size)


    #print("NI",Ni,"sketch and candidate size:",sketch_split.size,candidate_split.size)

    resampled_sketch_split = signal.resample(sketch_split,Ni)
    resampled_candidate_split = signal.resample(candidate_split,Ni)

    Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split)) 


    for i in range(0,Ni):
        Sum_of_Shape += abs(((Gy*Ry*resampled_sketch_split[i]) - resampled_candidate_split[i])/height(candidate_split))        

    return Sum_of_Shape/Ni
    
def calculateDistance(Sketch, Candidate,k):
    Sketch = np.array(Sketch)
    Candidate = np.array(Candidate)

    #print("calculateDistance: Length of the Candidate and Sketch Segment",Candidate.size,Sketch.size,k)

    # Calculating Global non uniform Scaling factors
    Gx = widthGlobal(Candidate)/widthGlobal(Sketch)
    Gy = heightGlobal(Candidate)/heightGlobal(Sketch)
    # Calculating Local distortion and shape errors
    LDE = 0
    SE = 0
    for i in range(0,k-1):
        LDE += get_LDE(Sketch[i],Candidate[i],Gx,Gy)
        SE += get_ShapeError(Sketch[i],Candidate[i],Gy)

    # Calculating total error
    Dist = LDE + SE
    return Dist

In [None]:
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

#### Precision


In [65]:
def result_interpreter_precision(results,curve):

    op = []
    sorted_results = sorted(results, key=lambda x: x[0])

    j = 0
    if(sorted_results[0][0] == 999):
        return [0,0,0]
    while(j<5):
        if(sorted_results[j][3]==curve):
            if(j==0):
                return [1,1,1]
            elif(j<3):
                return [0,1,1]
            elif(j<5):
                return [0,0,1]
    return [0,0,0]

In [70]:
def qetch_plus_Precision(ori_data_X, ori_data_y, sketch_X, smooth_val_stepsize,curve):
    z = 0
    prec_output = []
    while(z<=100):
        ResultDistanceObject = []
        original = ori_data_X[curve]
        testing_sketch = sketch_X[curve][z]
        #Smoothing by factor of smooth_val_stepsize
        smoothed_candidate_list = []
        smooth_value_list = []
        smooth_value = 0
        while(smooth_value < 1):
            smoothed_candidate_list.append(smoother(original,smooth_value))
            smooth_value_list.append(smooth_value)
            smooth_value += smooth_val_stepsize
        Sketch_split_at = []
        split_sketch,k,Sketch_split_at = split_based_derivative(testing_sketch)

        for a in range(len(smoothed_candidate_list)):
            Candidate_split_at = []
            #Segments Loaded Data into T segments
            split_original,T,Candidate_split_at = split_based_derivative(smoothed_candidate_list[a])
           # print("T value and k Value are: ",T,k)
            if(T<k):
                ResultDistanceObject.append([999,[0,0],smooth_value_list[a],i])
                print("not possible") #Need to address case where this happens -> Smoothen Sketches with too much K
                continue
            itr = 0
            while(itr<=T-k):
                candidate_segments = split_original[itr:k+itr]
                query_segment = split_sketch
                itr+=1
                DistanceObject = []
                distance = calculateDistance(query_segment,candidate_segments,k)
                #Add the starting and ending position Identified
                start_pos = Candidate_split_at[itr]
                end_pos = Candidate_split_at[itr+1]
                DistanceObject.append([distance,[start_pos,end_pos],smooth_value_list[a],i])

            ResultDistanceObject.append(min(DistanceObject, key = lambda sublist: sublist[0])) # Will Contain a list of T-k minimum distances
        prec_output.append(result_interpreter_precision(ResultDistanceObject,curve)) #Should contain 8 best minimum distances
        print("--- Completed a sketch --- ")
        z+=50

    s1,s3,s5 = 0,0,0
    for q in prec_output:
        s1+=q[0]
        s3+=q[1]
        s5 += q[2]
    l = len(prec_output)
    s1 = s1/len
    s3 = s3/len
    s5 = s5/len
    precision_op = [s1,s3,s5]

    return precision_op

In [69]:
def qetch_plus_tester_precision(curve):
    smooth_val_stepsize = 0.05
    # for i in range(len(datasets)):
    return qetch_plus_Precision(ori_data_X, ori_data_y, sketch_X, smooth_val_stepsize,curve)


In [62]:
#datasets = ['has', 'sp', 'fp', 'rb', 'sd', 'sr', 'hasb', 'ihas']
precision = []
for i in range(0,8):
    precision.append(qetch_plus_tester_precision(i))
    print("Completed single type")

not possible


KeyboardInterrupt: 

In [None]:
print("hello")

#### Accuracy

In [74]:
# d['Average location error (%)'].append(np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ ori_data_X[i].shape[0]*100 for x in results[i]]))
def result_interpreter_accuracy(results,ori_data_y,curve,length_of_original):

    accuracy = np.mean([np.abs(x[1][0] - ori_data_y[i][0])/length_of_original*100 for x in results])

    return accuracy

In [99]:
def qetch_plus_accuracy(ori_data_X, ori_data_y, sketch_X, smooth_val_stepsize,curve):
    z = 0
    results = []
    while(z<=100):
        ResultDistanceObject = []
        original = ori_data_X[curve]
        length_of_original = original.shape[0]
        testing_sketch = sketch_X[curve][z]

        #Smoothing by factor of smooth_val_stepsize
        smoothed_candidate_list = []
        smooth_value_list = []
        smooth_value = 0
        while(smooth_value < 1):
            smoothed_candidate_list.append(smoother(original,smooth_value))
            smooth_value_list.append(smooth_value)
            smooth_value += smooth_val_stepsize
        print("smoothed output size ",len(smoothed_candidate_list),len(smoothed_candidate_list[0]))

        Sketch_split_at = []
        split_sketch,k,Sketch_split_at = split_based_derivative(testing_sketch)

        for a in range(0,len(smoothed_candidate_list)):
            Candidate_split_at = []
            #Segments Loaded Data into T segments
            split_original,T,Candidate_split_at = split_based_derivative(smoothed_candidate_list[a])
            print("OP after smoothing: ",len(split_original[0]))
            print("T value and k Value are: ",T,k)
            if(T<k):
                ResultDistanceObject.append([999,[0,0],smooth_value_list[a],i])
                print("not possible") #Need to address case where this happens -> Smoothen Sketches with too much K
                continue
            itr = 0
            while(itr<=T-k):
                candidate_segments = split_original[itr:k+itr]
                query_segment = split_sketch
                itr+=1
                DistanceObject = []
                distance = calculateDistance(query_segment,candidate_segments,k)
                #Add the starting and ending position Identified
                start_pos = Candidate_split_at[itr]
                end_pos = Candidate_split_at[itr+1]
                DistanceObject.append([distance,[start_pos,end_pos],smooth_value_list[a],i])

            ResultDistanceObject.append(min(DistanceObject, key = lambda sublist: sublist[0])) # Will Contain a list of T-k minimum distances

        results.append(min(ResultDistanceObject, key = lambda sublist: sublist[0])) # Should contain 100 minimum distances.
        z+=20
    return result_interpreter_accuracy(results,ori_data_y,curve,length_of_original)

In [100]:
def qetch_plus_tester_accuracy(curve):
    smooth_val_stepsize = 0.05

    # for i in range(len(datasets)):
    return qetch_plus_accuracy(ori_data_X, ori_data_y, sketch_X, smooth_val_stepsize,curve)

In [101]:
qetch_plus_tester_accuracy(0)

smoothed output size  20 414
OP after smoothing:  414 [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11149826]
 [0.1358885 ]
 [0.1358885 ]
 [0.13240418]
 [0.1358885 ]
 [0.1533101 ]
 [0.17770035]] [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11149826]
 [0.1358885 ]
 [0.1358885 ]
 [0.13240418]
 [0.1358885 ]
 [0.1533101 ]
 [0.17770035]]
T value and k Value are:  1 16
not possible
OP after smoothing:  414 [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11116645]
 [0.13471127]
 [0.13583244]
 [0.13256743]
 [0.13573036]
 [0.15247297]
 [0.17649904]] [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11116645]
 [0.13471127]
 [0.13583244]
 [0.13256743]
 [0.13573036]
 [0.15247297]
 [0.17649904]]
T value and k Value are:  1 16
not possible
OP after smoothing:  414 [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11086518]
 [0.1336138 ]
 [0.13568171]
 [0.13270214]
 [0.13559883]
 [0.15169999]
 [0.17533668]] [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11086518]
 [0.1336138 ]
 [0.13568171]
 [0.13270214]
 [0.1

  if((sign_arr[i] == 0) or (sign_arr[i]==1)):
  elif((sign_arr[i] == -1) and ((p==1) or (p==0))):
  if((sign_arr[i] == 0) or (sign_arr[i]==1)):
  elif((sign_arr[i] == -1) and ((p==1) or (p==0))):
  if((sign_arr[i] == 0) or (sign_arr[i]==1)):
  elif((sign_arr[i] == -1) and ((p==1) or (p==0))):
  if((sign_arr[i] == 0) or (sign_arr[i]==1)):
  elif((sign_arr[i] == -1) and ((p==1) or (p==0))):
  if((sign_arr[i] == 0) or (sign_arr[i]==1)):
  elif((sign_arr[i] == -1) and ((p==1) or (p==0))):


 414 [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11116645]
 [0.13471127]
 [0.13583244]
 [0.13256743]
 [0.13573036]
 [0.15247297]
 [0.17649904]] [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11116645]
 [0.13471127]
 [0.13583244]
 [0.13256743]
 [0.13573036]
 [0.15247297]
 [0.17649904]]
T value and k Value are:  1 7
not possible
OP after smoothing:  414 [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11086518]
 [0.1336138 ]
 [0.13568171]
 [0.13270214]
 [0.13559883]
 [0.15169999]
 [0.17533668]] [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11086518]
 [0.1336138 ]
 [0.13568171]
 [0.13270214]
 [0.13559883]
 [0.15169999]
 [0.17533668]]
T value and k Value are:  1 7
not possible
OP after smoothing:  414 [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11059106]
 [0.13258967]
 [0.13545823]
 [0.13280253]
 [0.13548598]
 [0.15098522]
 [0.17421577]] [[0.10452962]
 [0.10452962]
 [0.10452962]
 [0.11059106]
 [0.13258967]
 [0.13545823]
 [0.13280253]
 [0.13548598]
 [0.15098522]
 [0.17421577]]
T value and k

IndexError: index 100 is out of bounds for axis 0 with size 100

In [None]:
#datasets = ['has', 'sp', 'fp', 'rb', 'sd', 'sr', 'hasb', 'ihas']
accuracy = []
for i in range(0,8):
    accuracy.append(qetch_plus_tester_precision(i))
    print("Accuracy is:",accuracy[i])