# Experiment on Different Time-series Similarity Measures 1

### Loading

In [1]:
# load image and libraries
%matplotlib inline
import pandas as pd
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
from itertools import groupby
from scipy import signal
from sklearn import preprocessing

root_path = './processed_datasets/'
datasets = ['has', 'sp', 'fp', 'rb', 'sd', 'sr', 'hasb', 'ihas']

# load ground truth
ori_data_X = []
ori_data_y = []
sketch_X = []
for dataset in datasets:
    file_name = root_path + 'original_' + dataset  
    ori_data_X.append(np.load(file_name + '_X' + '.npy'))
    ori_data_y.append(np.load(file_name + '_y' + '.npy'))
    file_name = root_path + 'sketch_' + dataset + '.npy'
    sketch_X.append(np.load(file_name, allow_pickle=True)[:100])
print(f"number of loaded samples per class: {[len(x) for x in sketch_X]}")
print(f"Original data: {len(ori_data_X)} datasets")
print(f"Sketch data: {len(sketch_X)} datasets")

number of loaded samples per class: [100, 100, 100, 100, 100, 100, 100, 100]
Original data: 8 datasets
Sketch data: 8 datasets


### Sliding Window

In [2]:
def sliding_window(ori_series, clip_series, measure):
    """
    Compute the similarities of the original and the clipped series using sliding window
    input: original time series, clipped series, similarity measure function
    output: similarity_distribution, matching result, i.e., starting and ending points
    """
    ori_len = ori_series.shape[0]
    clip_len = clip_series.shape[0]
    if ori_len < clip_len:
        return None

    # compute the similarity between the original and the clipped series
    dist = []
    # compute the similarity between the original and the clipped series using sliding window
    for i in range(ori_len - clip_len + 1):
        dist.append(measure(ori_series[i:i+clip_len], clip_series))
    # find the maximum similarity and the corresponding starting and ending points
    min_idx = np.argmin(dist)
    return dist, [min_idx, min_idx + clip_len - 1]


In [3]:
def pointwise_exp(ori_data_X, ori_data_y, sketch_X, measure):
    results = []
    for i, dataset in enumerate(datasets):
        original = ori_data_X[i]
        label = ori_data_y[i]
        dummy_record = []
        for sample in sketch_X[i]:
            clip =  signal.resample(sample, label[1]-label[0]+1)
            sim_dist, pred_loc = sliding_window(original, clip, measure)
            dummy_record.append([sim_dist, pred_loc])
        results.append(dummy_record)
    return results

### Experiment 1.1: Euclidean Distance

#### Define measure

In [4]:
def euclidean_distance(x,y):
    t = preprocessing.Normalizer()
    x = np.expand_dims(x, axis=0)
    y = np.expand_dims(y, axis=0)
    return np.linalg.norm(t.transform(x) - t.transform(y))

#### Get results

In [6]:
results_eu = pointwise_exp(ori_data_X, ori_data_y, sketch_X, euclidean_distance)

KeyboardInterrupt: 

#### Anlysis

In [None]:
for i in range(len(results_eu)):
    print(f"Dataset: {datasets[i]}")
    print(f"Number of samples: {len(results_eu[i])}")
    print(f"Average distance: {np.mean([np.max(x[0]) for x in results_eu[i]])}")
    print(f"Average location error w.r.t. segment length: {np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ (ori_data_y[i][1]-ori_data_y[i][0]+1)*100 for x in results_eu[i]])}%")
print(f'Average distance: {np.mean([np.mean([np.max(x[0]) for x in results_eu[i]]) for i in range(len(results_eu))])}')
print('--------------------Overall:------------------------------------')
print(f'Average location error w.r.t. segment length: {np.mean([np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ (ori_data_y[i][1]-ori_data_y[i][0]+1)*100 for x in results_eu[i]]) for i in range(len(results_eu))])}%')
print(f'Average location error w.r.t. total length: {np.mean([np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ ori_data_X[i].shape[0]*100 for x in results_eu[i]]) for i in range(len(results_eu))])}%')

for i in range(len(results_eu)):
    plt.figure()
    plt.title(f"Dataset: {datasets[i]}")
    plt.xlabel("Time")
    plt.ylabel("Similarity")
    plt.plot(results_eu[i][0][0])
    plt.plot(results_eu[i][0][1], [0.5, 0.5], marker='*', ls='none')
    plt.plot(ori_data_y[i], [0.5, 0.5], marker='o', color='r', ls='none')
    plt.show()


In [2]:
#Smoothing
import pandas as pd
def smoother(series,smoothing):
    series_df = pd.DataFrame(series,columns=['Data'])
    return series_df.ewm(smoothing).mean().to_numpy() 
# test = np.array([1, 2, 3, 4,2,5,2,232,323,23,2,3,23,2,3])
# op = smoother(test,0.5)
# print(op)

In [3]:
#Qetch Algorithm -- inclomplete --
def width(series):
    # Should return width of series -  Size of a 1D array is the same as the length.
    return series.size
    
def height(series):
    #Finds Height of time series based difference in max and minimum values.
    h = np.max(series) - np.min(series) 
    return h


def heightGlobal(series):
    hmax = 0
    hmin = 999
    for i in series:
        hmax = max(np.max(i),hmax)
        hmin = min(np.min(i),hmin)

    h = hmax - hmin
    return h


def widthGlobal(series):
    return series.size

In [4]:
def Split_Correcter(split_arr,h_threshold):
    # Checks if the height is less than 1% of total height and mergers small segments.
    corr_split = []
    p = 0
    buff = []
    split_at = []
    counter = 0
    for i in split_arr:
        if(len(i)==1 or (height(i)<h_threshold)):
            buff.append(i)
        else:
            if(len(buff)>0):
                buff.append(i)
                temp = np.concatenate(buff)
                corr_split.append(temp)

                split_at.append(counter) #Starting position of segment is noted
                counter+=temp.size
                buff = []
            else:
                split_at.append(counter) #Starting position of segment is noted
                counter+=i.size
                corr_split.append(i)

    split_at.append(counter) # Adding the end position of the last segment

    # print("Final",corr_split[:5],"Number of segments:",len(corr_split))
    return corr_split,len(corr_split),split_at

In [5]:
def split_based_derivative(series):

    h_threshold = 0.01 * height(series)
    diff_arr = np.diff(series)
    sign_arr = np.sign(diff_arr)
    p = 0
    split_indices = []
    split_at = []
    for i in range(0,len(sign_arr)):
        if(i==0):
            p = sign_arr[i]
        else:
            if((sign_arr[i] == 0) or (sign_arr[i]==1)):
                if(p==-1):
                    split_indices.append(i)
                    p = sign_arr[i]
            elif((sign_arr[i] == -1) and ((p==1) or (p==0))):
                split_indices.append(i)
                p = sign_arr[i]
                
    # print(series[:10],diff_arr[:10])
    split_arr = np.split(series, split_indices, axis=0)
    # print(len(split_arr))
    # print(len(series))
    # print(series[:10])
    # print(diff_arr[:10])
    # print(sign_arr[:10])
    # print(split_indices)
    # print(split_arr[:3])

    #print("Before Split Correcter")
    corrected_split,k,split_at = Split_Correcter(split_arr,h_threshold)
    return corrected_split,k,split_at


In [6]:
def get_LDE(sketch_split,Candidate_split,Gx,Gy):
    Rx = width(Candidate_split)/(Gx * width(sketch_split))    
    Ry = height(Candidate_split)/(Gy * height(sketch_split))  
    return (np.log(Rx)**2)+(np.log(Ry)**2)

# from scipy.spatial.distance import cityblock
# print(cityblock(x1, x2))

def get_ShapeError(sketch_split,candidate_split,Gy):

    Ni = min(candidate_split.size,sketch_split.size)
    Sum_of_Shape = 0 
    #print("Candidate split", candidate_split,"sketch split", sketch_split, "NI",Ni,"size:",sketch_split.size,candidate_split.size)


    #print("NI",Ni,"sketch and candidate size:",sketch_split.size,candidate_split.size)

    resampled_sketch_split = signal.resample(sketch_split,Ni)
    resampled_candidate_split = signal.resample(candidate_split,Ni)

    Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split)) 


    for i in range(0,Ni):
        Sum_of_Shape += abs(((Gy*Ry*resampled_sketch_split[i]) - resampled_candidate_split[i])/height(candidate_split))        

    return Sum_of_Shape/Ni
    
def calculateDistance(Sketch, Candidate,k):
    Sketch = np.array(Sketch)
    Candidate = np.array(Candidate)

    #print("calculateDistance: Length of the Candidate and Sketch Segment",Candidate.size,Sketch.size,k)

    # Calculating Global non uniform Scaling factors
    Gx = widthGlobal(Candidate)/widthGlobal(Sketch)
    Gy = heightGlobal(Candidate)/heightGlobal(Sketch)
    # Calculating Local distortion and shape errors
    LDE = 0
    SE = 0
    for i in range(0,k-1):
        LDE += get_LDE(Sketch[i],Candidate[i],Gx,Gy)
        SE += get_ShapeError(Sketch[i],Candidate[i],Gy)

    # Calculating total error
    Dist = LDE + SE
    return Dist

In [7]:
def result_interpreter(results,ori_data_y,curve):

    op1 = min(results, key = lambda sublist: sublist[0])
    print("The sketch is predicted as part of the ",datasets[op1[3]])
    print("Smoothing level:",op1[2])
    print("Calculated Distance",op1[0])
    print("Calculated Starting and ending positions",op1[1])
    print("Expected Starting and ending positions",ori_data_y[op1[3]])

    print("What it should be ----")
    op = results[curve]
    print("The sketch is predicted as part of the ",datasets[op[3]])
    print("Smoothing level:",op[2])
    print("Calculated Distance",op[0])
    print("Calculated Starting and ending positions",op[1])
    print("Expected Starting and ending positions",ori_data_y[op[3]])
    return op

In [73]:
result_interpreter(ressults_qe,ori_data_y,0)

The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [35.84125892]
Calculated Starting and ending positions [301, 310]
Expected Starting and ending positions [247 359]
What it should be ----
The sketch is predicted as part of the  has
Smoothing level: 0.9500000000000003
Calculated Distance [65.09744962]
Calculated Starting and ending positions [83, 94]
Expected Starting and ending positions [107 243]


[array([65.09744962]), [83, 94], 0.9500000000000003, 0]

In [12]:
def qetch_plus(ori_data_X, ori_data_y, sketch_X, smooth_val_stepsize,curve):
    z = 0
    while(z<=100):
        results = []
        for i, dataset in enumerate(datasets):
            original = ori_data_X[i]
            ResultDistanceObject = []
            Candidate_split_at = []
            Sketch_split_at = []
            #Segments Loaded Data into T segments
            # print("Before splitting Original")
            split_original,T,Candidate_split_at = split_based_derivative(original)
            # print(" T value",T)

            testing_sketch = sketch_X[curve][z]

            # print("Before splitting sketch")
            #Assuming input is converted to a modifiable bezier curve
            split_sketch,k,Sketch_split_at = split_based_derivative(testing_sketch)

            # print("T value and k Value are: ",T,k)
            if(T<k):
                print("not possible") #Need to address case where this happens -> Smoothen Sketches with too much K
                continue
            itr = 0
            while(itr<=T-k):
                candidate_segments = split_original[itr:k+itr]
                query_segment = split_sketch
                itr+=1
                smooth_value = 0.1
                while(smooth_value < 1):
                    smoothed_candidate_segments = []
                    DistanceObject = []
                    for l in range(0,len(candidate_segments)):
                        smoothed_candidate_segments.append(smoother(candidate_segments[l],smooth_value))

                    #print("Smoothed Candidate segments: ",len(smoothed_candidate_segments))
                    # for l in range(0,len(smoothed_candidate_segments)):
                    distance = calculateDistance(query_segment,smoothed_candidate_segments,k)
                    #Add the starting and ending position Identified
                    start_pos = Candidate_split_at[itr]
                    end_pos = Candidate_split_at[itr+1]
                    DistanceObject.append([distance,[start_pos,end_pos],smooth_value,i])

                    smooth_value += smooth_val_stepsize

                ResultDistanceObject.append(min(DistanceObject, key = lambda sublist: sublist[0])) # Will Contain a list of 10 minimum distances -> min distance in each segment
                    # print(len(l))
                # print(len(smoothed_candidate_segments[0]))

            results.append(min(ResultDistanceObject, key = lambda sublist: sublist[0]))
        result_interpreter(results,ori_data_y,curve)
        print("--- Completed a sketch --- ")
        z+=15
    return

In [18]:
def qetch_plus_tester(curve):
    smooth_val_stepsize = 0.05
    # for i in range(len(datasets)):
    qetch_plus(ori_data_X, ori_data_y, sketch_X, smooth_val_stepsize,curve)


In [14]:
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

In [15]:
qetch_plus_tester()

not possible
The sketch is predicted as part of the  fp
Smoothing level: 0.9500000000000003
Calculated Distance [107.26191651]
Calculated Starting and ending positions [170, 180]
Expected Starting and ending positions [131 229]
What it should be ----
The sketch is predicted as part of the  has
Smoothing level: 0.9500000000000003
Calculated Distance [178.03258257]
Calculated Starting and ending positions [33, 41]
Expected Starting and ending positions [107 243]
--- Completed a sketch --- 
The sketch is predicted as part of the  sr
Smoothing level: 0.9500000000000003
Calculated Distance [44.34431884]
Calculated Starting and ending positions [73, 80]
Expected Starting and ending positions [170 230]
What it should be ----
The sketch is predicted as part of the  has
Smoothing level: 0.9500000000000003
Calculated Distance [80.64543137]
Calculated Starting and ending positions [61, 83]
Expected Starting and ending positions [107 243]
--- Completed a sketch --- 
The sketch is predicted as part

In [17]:
qetch_plus_tester()

not possible
The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [38.55183921]
Calculated Starting and ending positions [262, 271]
Expected Starting and ending positions [247 359]
What it should be ----
The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [38.55183921]
Calculated Starting and ending positions [262, 271]
Expected Starting and ending positions [247 359]
--- Completed a sketch --- 
not possible
The sketch is predicted as part of the  fp
Smoothing level: 0.9500000000000003
Calculated Distance [46.18790757]
Calculated Starting and ending positions [223, 230]
Expected Starting and ending positions [131 229]
What it should be ----
The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [50.63611965]
Calculated Starting and ending positions [262, 271]
Expected Starting and ending positions [247 359]
--- Completed a sketch --- 
not possible
The

In [19]:
qetch_plus_tester(2)

not possible
The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [30.26354166]
Calculated Starting and ending positions [148, 154]
Expected Starting and ending positions [247 359]
What it should be ----
The sketch is predicted as part of the  fp
Smoothing level: 0.9500000000000003
Calculated Distance [30.5178949]
Calculated Starting and ending positions [148, 154]
Expected Starting and ending positions [131 229]
--- Completed a sketch --- 
not possible
The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [36.6336712]
Calculated Starting and ending positions [41, 52]
Expected Starting and ending positions [247 359]
What it should be ----
The sketch is predicted as part of the  fp
Smoothing level: 0.9500000000000003
Calculated Distance [38.56647755]
Calculated Starting and ending positions [41, 51]
Expected Starting and ending positions [131 229]
--- Completed a sketch --- 
not possible
The sketc

In [20]:
qetch_plus_tester(3)

not possible
The sketch is predicted as part of the  hasb
Smoothing level: 0.9500000000000003
Calculated Distance [333.3862642]
Calculated Starting and ending positions [188, 193]
Expected Starting and ending positions [ 80 145]
What it should be ----
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [372.90661532]
Calculated Starting and ending positions [19, 25]
Expected Starting and ending positions [162 211]
--- Completed a sketch --- 
The sketch is predicted as part of the  sr
Smoothing level: 0.9500000000000003
Calculated Distance [38.00572599]
Calculated Starting and ending positions [294, 319]
Expected Starting and ending positions [170 230]
What it should be ----
The sketch is predicted as part of the  rb
Smoothing level: 0.9500000000000003
Calculated Distance [79.31706365]
Calculated Starting and ending positions [67, 94]
Expected Starting and ending positions [118 246]
--- Completed a sketch --- 
The sketch is predicted as par

In [21]:
qetch_plus_tester(4)

The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [59.21706468]
Calculated Starting and ending positions [181, 192]
Expected Starting and ending positions [162 211]
What it should be ----
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [59.21706468]
Calculated Starting and ending positions [181, 192]
Expected Starting and ending positions [162 211]
--- Completed a sketch --- 
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [28.64041036]
Calculated Starting and ending positions [309, 335]
Expected Starting and ending positions [162 211]
What it should be ----
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [28.64041036]
Calculated Starting and ending positions [309, 335]
Expected Starting and ending positions [162 211]
--- Completed a sketch --- 
The sketch is predicted as part of the  rb

  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split))
  Ry = height(resampled_candidate_split)/(Gy * heigh

The sketch is predicted as part of the  sr
Smoothing level: 0.9500000000000003
Calculated Distance [41.63504098]
Calculated Starting and ending positions [319, 327]
Expected Starting and ending positions [170 230]
What it should be ----
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [58.22637416]
Calculated Starting and ending positions [298, 309]
Expected Starting and ending positions [162 211]
--- Completed a sketch --- 
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [40.80922085]
Calculated Starting and ending positions [264, 267]
Expected Starting and ending positions [162 211]
What it should be ----
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [40.80922085]
Calculated Starting and ending positions [264, 267]
Expected Starting and ending positions [162 211]
--- Completed a sketch --- 
The sketch is predicted as part of the  sr

In [22]:
qetch_plus_tester(5)

not possible
The sketch is predicted as part of the  sd
Smoothing level: 0.9500000000000003
Calculated Distance [87.93052515]
Calculated Starting and ending positions [78, 101]
Expected Starting and ending positions [162 211]
What it should be ----
The sketch is predicted as part of the  hasb
Smoothing level: 0.9500000000000003
Calculated Distance [128.30646308]
Calculated Starting and ending positions [278, 285]
Expected Starting and ending positions [ 80 145]
--- Completed a sketch --- 
The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [43.51906879]
Calculated Starting and ending positions [197, 202]
Expected Starting and ending positions [247 359]
What it should be ----
The sketch is predicted as part of the  sr
Smoothing level: 0.9500000000000003
Calculated Distance [48.43866611]
Calculated Starting and ending positions [86, 89]
Expected Starting and ending positions [170 230]
--- Completed a sketch --- 
not possible
The sketch is pr

In [23]:
qetch_plus_tester(6)

not possible
The sketch is predicted as part of the  fp
Smoothing level: 0.9500000000000003
Calculated Distance [107.26191651]
Calculated Starting and ending positions [170, 180]
Expected Starting and ending positions [131 229]
What it should be ----
The sketch is predicted as part of the  ihas
Smoothing level: 0.9500000000000003
Calculated Distance [142.81176762]
Calculated Starting and ending positions [203, 206]
Expected Starting and ending positions [170 254]
--- Completed a sketch --- 
The sketch is predicted as part of the  sp
Smoothing level: 0.9500000000000003
Calculated Distance [32.60427453]
Calculated Starting and ending positions [310, 317]
Expected Starting and ending positions [247 359]
What it should be ----
The sketch is predicted as part of the  hasb
Smoothing level: 0.9500000000000003
Calculated Distance [62.87496508]
Calculated Starting and ending positions [307, 316]
Expected Starting and ending positions [ 80 145]
--- Completed a sketch --- 
The sketch is predicted

In [27]:
qetch_plus_tester(7)

not possible
The sketch is predicted as part of the  ihas
Smoothing level: 0.9500000000000003
Calculated Distance [71.08337477]
Calculated Starting and ending positions [165, 168]
Expected Starting and ending positions [170 254]
What it should be ----


IndexError: list index out of range

In [25]:
qetch_plus_tester(8)

IndexError: list index out of range

In [26]:
qetch_plus_tester(9)

IndexError: list index out of range