# Experiment on Different Time-series Similarity Measures 1

### Loading

In [5]:
# load image and libraries
%matplotlib inline
import pandas as pd
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
from itertools import groupby
from scipy import signal
from sklearn import preprocessing

root_path = './processed_datasets/'
datasets = ['has', 'sp', 'fp', 'rb', 'sd', 'sr', 'hasb', 'ihas']

# load ground truth
ori_data_X = []
ori_data_y = []
sketch_X = []
for dataset in datasets:
    file_name = root_path + 'original_' + dataset  
    ori_data_X.append(np.load(file_name + '_X' + '.npy'))
    ori_data_y.append(np.load(file_name + '_y' + '.npy'))
    file_name = root_path + 'sketch_' + dataset + '.npy'
    sketch_X.append(np.load(file_name, allow_pickle=True)[:100])
print(f"number of loaded samples per class: {[len(x) for x in sketch_X]}")
print(f"Original data: {len(ori_data_X)} datasets")
print(f"Sketch data: {len(sketch_X)} datasets")

number of loaded samples per class: [100, 100, 100, 100, 100, 100, 100, 100]
Original data: 8 datasets
Sketch data: 8 datasets


### Sliding Window

In [6]:
def sliding_window(ori_series, clip_series, measure):
    """
    Compute the similarities of the original and the clipped series using sliding window
    input: original time series, clipped series, similarity measure function
    output: similarity_distribution, matching result, i.e., starting and ending points
    """
    ori_len = ori_series.shape[0]
    clip_len = clip_series.shape[0]
    if ori_len < clip_len:
        return None

    # compute the similarity between the original and the clipped series
    dist = []
    # compute the similarity between the original and the clipped series using sliding window
    for i in range(ori_len - clip_len + 1):
        dist.append(measure(ori_series[i:i+clip_len], clip_series))
    # find the maximum similarity and the corresponding starting and ending points
    min_idx = np.argmin(dist)
    return dist, [min_idx, min_idx + clip_len - 1]


In [7]:
def pointwise_exp(ori_data_X, ori_data_y, sketch_X, measure):
    results = []
    for i, dataset in enumerate(datasets):
        original = ori_data_X[i]
        label = ori_data_y[i]
        dummy_record = []
        for sample in sketch_X[i]:
            clip =  signal.resample(sample, label[1]-label[0]+1)
            sim_dist, pred_loc = sliding_window(original, clip, measure)
            dummy_record.append([sim_dist, pred_loc])
        results.append(dummy_record)
    return results

### Experiment 1.1: Euclidean Distance

#### Define measure

In [8]:
def euclidean_distance(x,y):
    t = preprocessing.Normalizer()
    x = np.expand_dims(x, axis=0)
    y = np.expand_dims(y, axis=0)
    return np.linalg.norm(t.transform(x) - t.transform(y))

#### Get results

In [9]:
results_eu = pointwise_exp(ori_data_X, ori_data_y, sketch_X, euclidean_distance)

#### Anlysis

In [None]:
for i in range(len(results_eu)):
    print(f"Dataset: {datasets[i]}")
    print(f"Number of samples: {len(results_eu[i])}")
    print(f"Average distance: {np.mean([np.max(x[0]) for x in results_eu[i]])}")
    print(f"Average location error w.r.t. segment length: {np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ (ori_data_y[i][1]-ori_data_y[i][0]+1)*100 for x in results_eu[i]])}%")
print(f'Average distance: {np.mean([np.mean([np.max(x[0]) for x in results_eu[i]]) for i in range(len(results_eu))])}')
print('--------------------Overall:------------------------------------')
print(f'Average location error w.r.t. segment length: {np.mean([np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ (ori_data_y[i][1]-ori_data_y[i][0]+1)*100 for x in results_eu[i]]) for i in range(len(results_eu))])}%')
print(f'Average location error w.r.t. total length: {np.mean([np.mean([np.abs(x[1][0] - ori_data_y[i][0])/ ori_data_X[i].shape[0]*100 for x in results_eu[i]]) for i in range(len(results_eu))])}%')

for i in range(len(results_eu)):
    plt.figure()
    plt.title(f"Dataset: {datasets[i]}")
    plt.xlabel("Time")
    plt.ylabel("Similarity")
    plt.plot(results_eu[i][0][0])
    plt.plot(results_eu[i][0][1], [0.5, 0.5], marker='*', ls='none')
    plt.plot(ori_data_y[i], [0.5, 0.5], marker='o', color='r', ls='none')
    plt.show()


In [14]:
#Smoothing
import pandas as pd
def smoother(series,smoothing):
    series_df = pd.DataFrame(series,columns=['Data'])
    return series_df.ewm(smoothing).mean().to_numpy()
# test = np.array([1, 2, 3, 4,2,5,2,232,323,23,2,3,23,2,3])
# op = smoother(test,0.5)
# print(op)

In [15]:
#Qetch Algorithm -- inclomplete --
def width(series):
    # Should return width of series -  Size of a 1D array is the same as the length.
    return series.size
    
def height(series):
    #Finds Height of time series based difference in max and minimum values.
    h = np.nanmax(series) - np.nanmin(series) 
    return h

def get_LDE(sketch_split,Candidate_split,Gx,Gy):
    Rx = width(Candidate_split)/(Gx * width(sketch_split))    
    Ry = height(Candidate_split)/(Gy * height(sketch_split))  
    return (np.log(Rx)**2)+(np.log(Ry)**2)


# from scipy.spatial.distance import cityblock
# print(cityblock(x1, x2))

def get_ShapeError(sketch_split,candidate_split,Gy,Ni):
    Sum_of_Shape = 0 
    resampled_sketch_split = signal.resample(sketch_split,Ni)
    resampled_candidate_split = signal.resample(candidate_split,Ni)

    Ry = height(resampled_candidate_split)/(Gy * height(resampled_sketch_split)) 

    for i in range(1,Ni+1):
        Sum_of_Shape += abs(((Gy*Ry*resampled_sketch_split[i]) - resampled_candidate_split[i])/height(candidate_split))        

    return Sum_of_Shape/Ni
    
def calculateDistance(Sketch, Candidate,k,Ni):
    # Calculating Global non uniform Scaling facctors
    Gx = width(Candidate)/width(Sketch)
    Gy = height(Candidate)/height(Sketch)
    # Calculating Shape error
    SE = get_ShapeError(Sketch,Candidate,Gy,Ni)

    # Calculating Local distortion errors
    LDE = 0
    for i in range(k):
        LDE += get_LDE(Sketch[i],Candidate[i],Gy,k)

    # Calculating total error
    Dist = LDE + SE
    return Dist


In [56]:

 
def qetch(ori_data_X, ori_data_y, sketch_X, smoothing, k,Ni):

    # TO DO
    # implement height checker - ? is it done already?
    # Sampling? 

    results = []
    for i, dataset in enumerate(datasets):
        original = ori_data_X[i]
        label = ori_data_y[i]
        ResultDistanceObject = []
        for sample in sketch_X[i]:

            split_original = np.split(original,k,axis = 0) # Come up with proper segmenting method to take into account derivatives and Height!
            
            j = 0.1
            smoothed_sample = [] # Will store list of smoothed sketch series data
            while(j<1):
                smoothed_sample.append(smoother(sample,j))
                j += 0.1
            

            #For each smoothed sketch, measure Distance with shape error and local distortion error and find best distance for each smoothed sample(9 smoothed samples for a single sketch sample)
            for j in range(0,10): 
                DistanceObject = [] # Will Store all distance outputs
                for split_orig_element in split_original:
                    distance = calculateDistance(split_orig_element,smoothed_sample[j],k,Ni)
                    DistanceObject.append([distance,split_orig_element,smoothed_sample[j]])

                ResultDistanceObject.append(min(DistanceObject, key = lambda sublist: sublist[0])) # Will Contain a list of 10 minimum distances -> min distance in each segment

        results.append(ResultDistanceObject)
    return results


In [48]:
smooth_val = 1
Ni = 100
k = 6
ressults_qe = qetch(ori_data_X, ori_data_y, sketch_X, smooth_val, k,Ni)

old [array([0.10452962, 0.10452962, 0.10452962, 0.11149826, 0.1358885 ,
       0.1358885 , 0.13240418, 0.1358885 , 0.1533101 , 0.17770035,
       0.18815331, 0.19860627, 0.20209059, 0.16027875, 0.14634146,
       0.14634146, 0.16376307, 0.16376307, 0.17421603, 0.19512195,
       0.19860627, 0.19512195, 0.18118467, 0.17770035, 0.17770035,
       0.16724739, 0.16376307, 0.1533101 , 0.1533101 , 0.15679443,
       0.16376307, 0.16724739, 0.17421603, 0.18815331, 0.18466899,
       0.17421603, 0.17421603, 0.18466899, 0.18466899, 0.18815331,
       0.19163763, 0.19163763, 0.18118467, 0.15679443, 0.13240418,
       0.12195122, 0.1184669 , 0.1184669 , 0.12195122, 0.12543554,
       0.1184669 , 0.10452962, 0.09756098, 0.11149826, 0.11498258,
       0.11149826, 0.11149826, 0.1184669 , 0.14982578, 0.1533101 ,
       0.16027875, 0.16376307, 0.16027875, 0.15679443, 0.1533101 ,
       0.1533101 , 0.1533101 , 0.12543554, 0.12543554, 0.12891986,
       0.12891986, 0.1358885 , 0.13937282, 0.14634146, 0.

In [55]:
print(ori_data_X[7])

[0.46808511 0.46808511 0.63297872 0.62765957 0.59574468 0.59042553
 0.59574468 0.62234043 0.61702128 0.59042553 0.60638298 0.63297872
 0.68085106 0.73404255 0.85106383 0.85106383 0.84042553 0.84042553
 0.84042553 0.83510638 0.77659574 0.75       0.76595745 0.80851064
 0.79787234 0.81382979 0.81382979 0.78191489 0.78191489 0.79787234
 0.80319149 0.78191489 0.78191489 0.78723404 0.79787234 0.84042553
 0.87234043 0.87765957 0.87234043 0.87234043 0.88829787 0.87234043
 0.86702128 0.86170213 0.82446809 0.7712766  0.74468085 0.75
 0.75       0.7287234  0.72340426 0.71808511 0.75531915 0.7606383
 0.81382979 0.82446809 0.81914894 0.81382979 0.80851064 0.82446809
 0.83510638 0.85106383 0.86170213 0.88297872 0.93617021 0.94680851
 0.92553191 0.92553191 0.94148936 1.         1.         0.93085106
 0.88297872 0.84042553 0.79787234 0.79787234 0.81382979 0.80851064
 0.75       0.7287234  0.70744681 0.65425532 0.57446809 0.56914894
 0.56914894 0.56382979 0.54787234 0.54787234 0.54787234 0.53723404
 0