# IMPORT

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
import math

# METHOD

### Euclidean Distance(For Numpy Array)

In [2]:
def eu_distance(e1,e2):
    return math.sqrt(math.fsum(list(map(lambda x: x**2 ,e1-e2))))

### Generate Data and Results for each File

In [3]:
def read_all_lines(file) :
    with open(file, 'rt') as fd:
        convert = [e.split() for e in fd.readlines()]
        all_data = []
        all_results = []
        for line in convert:
            data = line
            result = data.pop(-1)
            all_data.append(data)
            all_results.append(result)
            
        float_data = []
        for element in all_data:
            float_data.append([float(e) for e in element])
        float_data = np.array(float_data)
        all_results = np.array(all_results)
        
        normalized_data = []
        for element in float_data:
            mean = math.fsum(element)/len(element)
            std = math.sqrt(sum((element-mean)**2)/(len(element)-1))
            normalized_data.append((element - mean)/std)
        normalized_data = np.array(normalized_data)
        return normalized_data, all_results

### Dynamic Time Warping

In [4]:
def dtw_distance(s1, s2):
    DTW={}

    for i in range(len(s1)):
        DTW[(i, -1)] = float('inf')
    for i in range(len(s2)):
        DTW[(-1, i)] = float('inf')
    DTW[(-1, -1)] = 0

    for i in range(len(s1)):
        for j in range(len(s2)):
            dist= (s1[i]-s2[j])**2
            DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])

    return np.sqrt(DTW[len(s1)-1, len(s2)-1])

### DBA

In [5]:
from __future__ import division
from functools import reduce


__author__ ="Francois Petitjean"

def performDBA(series, n_iterations=30):
    n_series = len(series)
    max_length = reduce(max, map(len, series))

    cost_mat = np.zeros((max_length, max_length))
    delta_mat = np.zeros((max_length, max_length))
    path_mat = np.zeros((max_length, max_length), dtype=np.int8)

    medoid_ind = approximate_medoid_index(series,cost_mat,delta_mat)
    center = series[medoid_ind]

    for i in range(0,n_iterations):
        center = DBA_update(center, series, cost_mat, path_mat, delta_mat)

    return center

def approximate_medoid_index(series,cost_mat,delta_mat):
    if len(series)<=50:
        indices = range(0,len(series))
    else:
        indices = np.random.choice(range(0,len(series)),50,replace=False)

    medoid_ind = -1
    best_ss = 1e20
    for index_candidate in indices:
        candidate = series[index_candidate]
        ss = sum_of_squares(candidate,series,cost_mat,delta_mat)
        if(medoid_ind==-1 or ss<best_ss):
            best_ss = ss
            medoid_ind = index_candidate
    return medoid_ind

def sum_of_squares(s,series,cost_mat,delta_mat):
    return sum(map(lambda t:squared_DTW(s,t,cost_mat,delta_mat),series))

def DTW(s,t,cost_mat,delta_mat):
    return np.sqrt(squared_DTW(s,t,cost_mat,delta_mat))

def squared_DTW(s,t,cost_mat,delta_mat):
    s_len = len(s)
    t_len = len(t)
    length = len(s)
    fill_delta_mat_dtw(s, t, delta_mat)
    cost_mat[0, 0] = delta_mat[0, 0]
    for i in range(1, s_len):
        cost_mat[i, 0] = cost_mat[i-1, 0]+delta_mat[i, 0]

    for j in range(1, t_len):
        cost_mat[0, j] = cost_mat[0, j-1]+delta_mat[0, j]

    for i in range(1, s_len):
        for j in range(1, t_len):
            diag,left,top =cost_mat[i-1, j-1], cost_mat[i, j-1], cost_mat[i-1, j]
            if(diag <=left):
                if(diag<=top):
                    res = diag
                else:
                    res = top
            else:
                if(left<=top):
                    res = left
                else:
                    res = top
            cost_mat[i, j] = res+delta_mat[i, j]
    return cost_mat[s_len-1,t_len-1]

def fill_delta_mat_dtw(center, s, delta_mat):
    slim = delta_mat[:len(center),:len(s)]
    np.subtract.outer(center, s,out=slim)
    np.square(slim, out=slim)

def DBA_update(center, series, cost_mat, path_mat, delta_mat):
    options_argmin = [(-1, -1), (0, -1), (-1, 0)]
    updated_center = np.zeros(center.shape)
    n_elements = np.array(np.zeros(center.shape), dtype=int)
    center_length = len(center)
    for s in series:
        s_len = len(s)
        fill_delta_mat_dtw(center, s, delta_mat)
        cost_mat[0, 0] = delta_mat[0, 0]
        path_mat[0, 0] = -1

        for i in range(1, center_length):
            cost_mat[i, 0] = cost_mat[i-1, 0]+delta_mat[i, 0]
            path_mat[i, 0] = 2

        for j in range(1, s_len):
            cost_mat[0, j] = cost_mat[0, j-1]+delta_mat[0, j]
            path_mat[0, j] = 1

        for i in range(1, center_length):
            for j in range(1, s_len):
                diag,left,top =cost_mat[i-1, j-1], cost_mat[i, j-1], cost_mat[i-1, j]
                if(diag <=left):
                    if(diag<=top):
                        res = diag
                        path_mat[i,j] = 0
                    else:
                        res = top
                        path_mat[i,j] = 2
                else:
                    if(left<=top):
                        res = left
                        path_mat[i,j] = 1
                    else:
                        res = top
                        path_mat[i,j] = 2

                cost_mat[i, j] = res+delta_mat[i, j]

        i = center_length-1
        j = s_len-1

        while(path_mat[i, j] != -1):
            updated_center[i] += s[j]
            n_elements[i] += 1
            move = options_argmin[path_mat[i, j]]
            i += move[0]
            j += move[1]
        assert(i == 0 and j == 0)
        updated_center[i] += s[j]
        n_elements[i] += 1

    return np.divide(updated_center, n_elements)

### SubClassSplitting

In [6]:
def subClassSplitting(mrcp_data,noise_data,mrcp_results,noise_results,threshold):
    sum_mrcp_list = list()
    for each_mrcp_data in mrcp_data:
        sum_mrcp_list.append(math.fsum(each_mrcp_data))
    avg_mrcp = float(math.fsum(sum_mrcp_list)/len(mrcp_data))

    sum_noise_list = list()
    for each_noise_data in noise_data:
        sum_noise_list.append(math.fsum(each_noise_data))
    avg_noise = float(math.fsum(sum_noise_list)/len(noise_data))

#         print('Avg_MRCP: ', avg_mrcp, '\nAvg_Noise: ', avg_noise)

    mrcp_pivot = -1
    mrcp_min = float('inf')
    for i in range(len(sum_mrcp_list)):
        abs_subs = abs(sum_mrcp_list[i] - avg_mrcp)
        if  abs_subs < mrcp_min :
            mrcp_pivot = i
            mrcp_min = abs_subs

    noise_pivot = -1
    noise_min = float('inf')
    for i in range(len(sum_noise_list)):
        abs_subs = abs(sum_noise_list[i] - avg_noise)
        if  abs_subs < noise_min :
            noise_pivot = i
            noise_min = abs_subs

#         print('MRCP_pivot: ', mrcp_pivot, '\nNoise_pivot: ', noise_pivot)

    #MRCP-------------------------------------------------------------------------------------------------

    dist = []
    for each_mrcp_data,idx in zip(mrcp_data,range(len(mrcp_data))):
        dist.append([eu_distance(each_mrcp_data,mrcp_data[mrcp_pivot]),idx])

    sorted_dist = sorted(dist,key=lambda x:x[0])
#         print('Sorted_Dist_MRCP: ',sorted_dist[:3])

    diff = []
    for i in range(1,len(sorted_dist)):
        diff.append(sorted_dist[i][0]-sorted_dist[i-1][0]);
    diff = np.array(diff)
#         print('Diff_MRCP: ',diff[:3])

    T_mean = math.fsum(diff)/len(diff)
    T_std = math.sqrt(math.fsum((diff-T_mean)**2)/(len(diff)-1)) 
    T = T_std/2
#         print('T_MRCP: ',T)

    Class = []
    temp_c = []
    temp_c.append(sorted_dist[0][1])
    for i in range(len(diff)):
        if(diff[i] > T):
            Class.append(temp_c)
            temp_c = []
            temp_c.append(sorted_dist[i+1][1])
        else:
            temp_c.append(sorted_dist[i+1][1])

    selected_mrcp_class = list()
    removed_mrcp_class = list()
    for e in Class:
        if len(e)>threshold: selected_mrcp_class.append(e)
        else: removed_mrcp_class.append(e)

#     mrcp_avg = []
#     for i in range(len(selected_mrcp_class)):
#         l = []
#         for e in selected_mrcp_class[i]:
#             l.append(mrcp_data[e])
#         mrcp_avg.append(performDBA(l))

    l = list()
    for i in range(len(removed_mrcp_class)):
        for e in removed_mrcp_class[i]:
            l.append(e)
    l = np.array(l)
    filtered_mrcp_data = np.delete(mrcp_data,l,axis=0)

    #Noise-------------------------------------------------------------------------------------------------

    dist = []
    for each_noise_data,idx in zip(noise_data,range(len(noise_data))):
        dist.append([eu_distance(each_noise_data,noise_data[noise_pivot]),idx])

    sorted_dist = sorted(dist,key=lambda x:x[0])
#         print('Sorted_Dist_Noise: ',sorted_dist[:3])

    diff = []
    for i in range(1,len(sorted_dist)):
        diff.append(sorted_dist[i][0]-sorted_dist[i-1][0]);
    diff = np.array(diff)
#         print('Diff_Noise: ',diff[:3])

    T_mean = math.fsum(diff)/len(diff)
    T_std = math.sqrt(math.fsum((diff-T_mean)**2)/(len(diff)-1)) 
    T = T_std/2
#         print('T_Noise: ',T)

    Class = []
    temp_c = []
    temp_c.append(sorted_dist[0][1])
    for i in range(len(diff)):
        if(diff[i] > T):
            Class.append(temp_c)
            temp_c = []
            temp_c.append(sorted_dist[i+1][1])
        else:
            temp_c.append(sorted_dist[i+1][1])

    selected_noise_class = list()
    removed_noise_class = list()
    for e in Class:
        if len(e)>threshold: selected_noise_class.append(e)
        else: removed_noise_class.append(e)

#     noise_avg = []
#     for i in range(len(selected_noise_class)):
#         l = []
#         for e in selected_noise_class[i]:
#             l.append(noise_data[e])
#         noise_avg.append(performDBA(l))

    l = list()
    for i in range(len(removed_noise_class)):
        for e in removed_noise_class[i]:
            l.append(e)
    l = np.array(l)
    filtered_noise_data = np.delete(noise_data,l,axis=0)
    
    return filtered_mrcp_data,filtered_noise_data

### Calculate F1 Score

In [7]:
def score_eu(test_data,test_results,mrcp_avg,noise_avg):
    e_results = list()
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    for each_test_data in test_data:
        e_class = None
        min_dist = float('inf')
        for avg in mrcp_avg:
            dist = eu_distance(each_test_data, avg) 
            if dist < min_dist:
                min_dist = dist
                e_class = 'MRCP'
        for avg in noise_avg:
            dist = eu_distance(each_test_data, avg) 
            if dist < min_dist:
                min_dist = dist
                e_class = 'Noise'
        e_results.append(e_class)
    e_results = np.array(e_results)
    for i in range(len(test_data)):
        if e_results[i] == 'MRCP' and test_results[i] == 'MRCP' :
            TP += 1
        elif e_results[i] == 'MRCP' and test_results[i] == 'Noise' :
            FP += 1
        elif e_results[i] == 'Noise' and test_results[i] == 'MRCP' :
            FN += 1
        elif e_results[i] == 'Noise' and test_results[i] == 'Noise' :
            TN += 1
    return TP,FP,FN,TN

def score_dtw(test_data,test_results,mrcp_avg,noise_avg):
    e_results = list()
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    for each_test_data in test_data:
        e_class = None
        min_dist = float('inf')
        for avg in mrcp_avg:
            dist = dtw_distance(each_test_data, avg) 
            if dist < min_dist:
                min_dist = dist
                e_class = 'MRCP'
        for avg in noise_avg:
            dist = dtw_distance(each_test_data, avg) 
            if dist < min_dist:
                min_dist = dist
                e_class = 'Noise'
        e_results.append(e_class)
    e_results = np.array(e_results)
    for i in range(len(test_data)):
        if e_results[i] == 'MRCP' and test_results[i] == 'MRCP' :
            TP += 1
        elif e_results[i] == 'MRCP' and test_results[i] == 'Noise' :
            FP += 1
        elif e_results[i] == 'Noise' and test_results[i] == 'MRCP' :
            FN += 1
        elif e_results[i] == 'Noise' and test_results[i] == 'Noise' :
            TN += 1
    return TP,FP,FN,TN

def cal_f1(TP,FP,FN,TN):
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    accuracy = (TP+TN)/(TP+TN+FP+FN)*100
    f1_score = (2*precision*recall)/(precision+recall)*100
    return f1_score

# IMPORT DATA

### Generate Data and Results for each File

In [8]:
start_time = time.time()
print('***************************************(( 1-NN DTW with Filtered Data ))***************************************')
for par in range(1,10):
    all_data , all_results = read_all_lines('all data/participant_'+str(par)+'.txt')
    print('Data Participant:',par,'got',len(all_data),'Data')
    for threshold in range(5,6):
        print('( Threshold: {} )'.format(threshold))
        TP = 0
        TN = 0
        FP = 0
        FN = 0
        for i in range(len(all_data)):
            test_data = all_data[i]
            test_result = all_results[i]
            data = np.delete(all_data,i,0)
            results = np.delete(all_results,i,0)

            mrcp_data = list()
            noise_data = list()
            mrcp_result = list()
            noise_result = list()

            for each_data, each_result in zip(data,results):
                if each_result == 'MRCP':
                    mrcp_data.append(each_data)
                    mrcp_result.append(each_result)
                else:
                    noise_data.append(each_data)
                    noise_result.append(each_result)

            mrcp_data = np.array(mrcp_data)
            noise_data = np.array(noise_data)
            mrcp_results = np.array(mrcp_result)
            noise_results = np.array(noise_result)

            filtered_mrcp_data,filtered_noise_data = subClassSplitting(mrcp_data,noise_data,mrcp_results,noise_results,threshold)

            temp_TP,temp_FP,temp_FN,temp_TN = score_dtw([test_data],[test_result],filtered_mrcp_data,filtered_noise_data)
            TP += temp_TP
            FP += temp_FP
            FN += temp_FN
            TN += temp_TN
    
        print('TP:', TP)
        print('FP:', FP)
        print('FN:', FN)
        print('TN:', TN)
        f1_score = cal_f1(TP,FP,FN,TN)
        print('F1_Score:',f1_score)
    print('----------------------------------------------------------------------------------------------------')
elapsed_time = time.time() - start_time
print("Took {} hours".format(elapsed_time/3600))

***************************************(( 1-NN DTW with Filtered Data ))***************************************
Data Participant: 1 got 347 Data
( Threshold: 5 )
TP: 135
FP: 49
FN: 49
TN: 114
F1_Score: 73.36956521739133
----------------------------------------------------------------------------------------------------
Data Participant: 2 got 351 Data
( Threshold: 5 )
TP: 168
FP: 33
FN: 13
TN: 137
F1_Score: 87.95811518324606
----------------------------------------------------------------------------------------------------
Data Participant: 3 got 347 Data
( Threshold: 5 )
TP: 166
FP: 4
FN: 21
TN: 156
F1_Score: 92.99719887955182
----------------------------------------------------------------------------------------------------
Data Participant: 4 got 346 Data
( Threshold: 5 )
TP: 102
FP: 101
FN: 76
TN: 67
F1_Score: 53.54330708661418
----------------------------------------------------------------------------------------------------
Data Participant: 5 got 331 Data
( Threshold: 5 )
TP:

In [9]:
start_time = time.time()
print('***************************************(( 1-NN DTW with Filtered Data ))***************************************')
for par in range(1,10):
    all_data , all_results = read_all_lines('all data/participant_'+str(par)+'.txt')
    print('Data Participant:',par,'got',len(all_data),'Data')
    for threshold in range(3,4):
        print('( Threshold: {} )'.format(threshold))
        TP = 0
        TN = 0
        FP = 0
        FN = 0
        for i in range(len(all_data)):
            test_data = all_data[i]
            test_result = all_results[i]
            data = np.delete(all_data,i,0)
            results = np.delete(all_results,i,0)

            mrcp_data = list()
            noise_data = list()
            mrcp_result = list()
            noise_result = list()

            for each_data, each_result in zip(data,results):
                if each_result == 'MRCP':
                    mrcp_data.append(each_data)
                    mrcp_result.append(each_result)
                else:
                    noise_data.append(each_data)
                    noise_result.append(each_result)

            mrcp_data = np.array(mrcp_data)
            noise_data = np.array(noise_data)
            mrcp_results = np.array(mrcp_result)
            noise_results = np.array(noise_result)

            filtered_mrcp_data,filtered_noise_data = subClassSplitting(mrcp_data,noise_data,mrcp_results,noise_results,threshold)

            temp_TP,temp_FP,temp_FN,temp_TN = score_dtw([test_data],[test_result],filtered_mrcp_data,filtered_noise_data)
            TP += temp_TP
            FP += temp_FP
            FN += temp_FN
            TN += temp_TN
    
        print('TP:', TP)
        print('FP:', FP)
        print('FN:', FN)
        print('TN:', TN)
        f1_score = cal_f1(TP,FP,FN,TN)
        print('F1_Score:',f1_score)
    print('----------------------------------------------------------------------------------------------------')
elapsed_time = time.time() - start_time
print("Took {} hours".format(elapsed_time/3600))

***************************************(( 1-NN DTW with Filtered Data ))***************************************
Data Participant: 1 got 347 Data
( Threshold: 3 )
TP: 134
FP: 42
FN: 50
TN: 121
F1_Score: 74.44444444444444
----------------------------------------------------------------------------------------------------
Data Participant: 2 got 351 Data
( Threshold: 3 )
TP: 176
FP: 8
FN: 5
TN: 162
F1_Score: 96.43835616438356
----------------------------------------------------------------------------------------------------
Data Participant: 3 got 347 Data
( Threshold: 3 )
TP: 169
FP: 5
FN: 18
TN: 155
F1_Score: 93.62880886426592
----------------------------------------------------------------------------------------------------
Data Participant: 4 got 346 Data
( Threshold: 3 )
TP: 81
FP: 90
FN: 97
TN: 78
F1_Score: 46.418338108882516
----------------------------------------------------------------------------------------------------
Data Participant: 5 got 331 Data
( Threshold: 3 )
TP: 14