In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from common import *
from preprocess import base
import sys

In [12]:
def DTWDistance1(s1, s2):
    DTW = {}

    for i in range(len(s1)):
        DTW[(i, -1)] = float('inf')
    for i in range(len(s2)):
        DTW[(-1, i)] = float('inf')
    DTW[(-1, -1)] = 0

    for i in range(len(s1)):
        for j in range(len(s2)):
            dist = (s1[i] - s2[j]) ** 2
            DTW[(i, j)] = dist + min(DTW[(i - 1, j)], DTW[(i, j - 1)], DTW[(i - 1, j - 1)])

    return np.sqrt(DTW[len(s1) - 1, len(s2) - 1])


def DTWDistanceW(s1, s2, w):
    DTW = {}

    w = max(w, abs(len(s1) - len(s2)))

    for i in range(-1, len(s1)):
        for j in range(-1, len(s2)):
            DTW[(i, j)] = float('inf')
    DTW[(-1, -1)] = 0

    for i in range(len(s1)):
        for j in range(max(0, i - w), min(len(s2), i + w)):
            dist = (s1[i] - s2[j]) ** 2
            DTW[(i, j)] = dist + min(DTW[(i - 1, j)], DTW[(i, j - 1)], DTW[(i - 1, j - 1)])

    return np.sqrt(DTW[len(s1) - 1, len(s2) - 1])


def cluster(data, threshold=1000):
    varnames = data.columns
    i_clust = 0
    clust = {}
    for name in varnames:
        result = []
        for name2 in varnames:
            result.append(DTWDistanceW(data[name], data[name2], 10))
        name_list = list()
        dist_list = list()
        for i in np.arange(len(result)):
            if result[i] < threshold:
                name_list.append(varnames[i])
                dist_list.append(result[i])

        find = False
        for key in clust:
            if set(name_list) == set(clust[key]):
                find = True
                break
        if find == False:
            clust[i_clust] = name_list
            i_clust += 1

    return clust

def cluster(data, threshold=600):
    varnames = data.columns
    i_clust = 0
    clust = {}
    for name in varnames:
        result = []
        for name2 in varnames:
            result.append(DTWDistanceW(data[name], data[name2], 10))
        name_list = list()
        dist_list = list()
        for i in np.arange(len(result)):
            if(result[i]<threshold):
                name_list.append(varnames[i])
                dist_list.append(result[i])
        find = False
        for key in clust:
            if set(name_list) == set(clust[key]):
                find = True
                break
        if find == False:
            clust[i_clust] = name_list
            i_clust +=1
    return clust


def cluster_batch(data_dir):
    filelist = base.get_files_csv(data_dir)
    for file in filelist:
        data = pd.read_csv(data_dir + file)
        del data['BTSJ']
        result = cluster(data)
        print("-------------------------------------------------------")
        print("********process file: {}*********".format(file))
        for key in result:
            print(result[key])

In [13]:
root_dir = os.getcwd()

In [14]:
data_no = 'data_0134'
data_dir = root_dir + "/{}/smooth_mean_interpolate_bin_mean/".format(data_no)
cluster_batch(data_dir)
print("---------------processing for data：{}".format(data_no))

-------------------------------------------------------
********process file: 00_233_134_2016-05-20.csv*********
['ZD_CNT']
['ZD_LCG']
['ZD_TFG']
['ZD_JHG']
['ZD_LLJ', 'ZD_SPEED', 'ZX_BJ_1', 'ZX_BJ_2', 'ZX_BJ_3', 'ZX_BJ_4', 'ZX_BJ_5', 'ZX_BJ_6']
['ZD_LLJ', 'ZD_SPEED', 'ZX_BJ_1', 'ZX_SDMC_1', 'ZX_BJ_2', 'ZX_SDMC_2', 'ZX_BJ_3', 'ZX_SDMC_3', 'ZX_BJ_4', 'ZX_SDMC_4', 'ZX_BJ_5', 'ZX_SDMC_5', 'ZX_BJ_6', 'ZX_SDMC_6']
['ZX_HW1_1', 'ZX_HW2_1', 'ZX_HW1_2', 'ZX_HW2_2', 'ZX_HW1_3', 'ZX_HW2_3', 'ZX_HW1_4', 'ZX_HW2_4', 'ZX_HW1_5', 'ZX_HW2_5', 'ZX_HW1_6', 'ZX_HW2_6', 'ZX_WD_1_1', 'ZX_WD_1_2', 'ZX_WD_1_3', 'ZX_WD_1_4', 'ZX_WD_1_5', 'ZX_WD_1_6', 'ZX_WD_2_1', 'ZX_WD_2_2', 'ZX_WD_2_3', 'ZX_WD_2_4', 'ZX_WD_2_5', 'ZX_WD_2_6', 'ZX_WD_3_1', 'ZX_WD_3_2', 'ZX_WD_3_3', 'ZX_WD_3_4', 'ZX_WD_3_5', 'ZX_WD_3_6', 'ZX_WD_4_1', 'ZX_WD_4_2', 'ZX_WD_4_3', 'ZX_WD_4_4', 'ZX_WD_4_5', 'ZX_WD_4_6', 'ZX_WD_5_1', 'ZX_WD_5_2', 'ZX_WD_5_3', 'ZX_WD_5_4', 'ZX_WD_5_5', 'ZX_WD_5_6', 'ZX_WD_6_1', 'ZX_WD_6_2', 'ZX_WD_6_3', 'ZX_WD_6_4', 

In [15]:
data_no = 'data_0141'
data_dir = root_dir + "/{}/smooth_mean_interpolate_bin_mean/".format(data_no)
cluster_batch(data_dir)
print("---------------processing for data：{}".format(data_no))

-------------------------------------------------------
********process file: 00_233_0141_2016-05-20.csv*********
['ZD_CNT']
['ZD_LCG']
['ZD_TFG']
['ZD_JHG']
['ZD_LLJ', 'ZD_SPEED', 'ZX_BJ_1', 'ZX_BJ_2', 'ZX_BJ_3', 'ZX_BJ_4', 'ZX_BJ_5', 'ZX_BJ_6']
['ZX_HW1_1', 'ZX_HW2_1', 'ZX_HW1_2', 'ZX_HW2_2', 'ZX_HW1_3', 'ZX_HW2_3', 'ZX_HW1_4', 'ZX_HW2_4', 'ZX_HW1_5', 'ZX_HW2_5', 'ZX_HW1_6', 'ZX_HW2_6', 'ZX_WD_1_1', 'ZX_WD_1_2', 'ZX_WD_1_3', 'ZX_WD_1_4', 'ZX_WD_1_5', 'ZX_WD_1_6', 'ZX_WD_2_1', 'ZX_WD_2_2', 'ZX_WD_2_3', 'ZX_WD_2_4', 'ZX_WD_2_5', 'ZX_WD_2_6', 'ZX_WD_3_1', 'ZX_WD_3_2', 'ZX_WD_3_3', 'ZX_WD_3_4', 'ZX_WD_3_5', 'ZX_WD_3_6', 'ZX_WD_4_1', 'ZX_WD_4_2', 'ZX_WD_4_3', 'ZX_WD_4_4', 'ZX_WD_4_5', 'ZX_WD_4_6', 'ZX_WD_5_1', 'ZX_WD_5_2', 'ZX_WD_5_3', 'ZX_WD_5_4', 'ZX_WD_5_5', 'ZX_WD_5_6', 'ZX_WD_6_1', 'ZX_WD_6_2', 'ZX_WD_6_3', 'ZX_WD_6_4', 'ZX_WD_6_5', 'ZX_WD_6_6']
['ZX_SDMC_1', 'ZX_SDMC_2', 'ZX_SDMC_3', 'ZX_SDMC_4', 'ZX_SDMC_5', 'ZX_SDMC_6']
-------------------------------------------------------
*****

In [16]:
data_no = 'data_0192'
data_dir = root_dir + "/{}/smooth_mean_interpolate_bin_mean/".format(data_no)
cluster_batch(data_dir)
print("---------------processing for data：{}".format(data_no))

-------------------------------------------------------
********process file: 00_233_0192_2016-05-20.csv*********
['ZD_CNT']
['ZD_LCG']
['ZD_TFG']
['ZD_JHG']
['ZD_LLJ', 'ZX_BJ_1', 'ZX_BJ_2', 'ZX_BJ_3', 'ZX_BJ_4', 'ZX_BJ_5', 'ZX_BJ_6']
['ZD_SPEED']
['ZX_HW1_1', 'ZX_HW2_1', 'ZX_HW1_2', 'ZX_HW2_2', 'ZX_HW1_3', 'ZX_HW2_3', 'ZX_HW1_4', 'ZX_HW2_4', 'ZX_HW1_5', 'ZX_HW2_5', 'ZX_HW1_6', 'ZX_HW2_6', 'ZX_WD_1_1', 'ZX_WD_1_2', 'ZX_WD_1_3', 'ZX_WD_1_4', 'ZX_WD_1_5', 'ZX_WD_1_6', 'ZX_WD_2_1', 'ZX_WD_2_2', 'ZX_WD_2_3', 'ZX_WD_2_4', 'ZX_WD_2_5', 'ZX_WD_2_6', 'ZX_WD_3_1', 'ZX_WD_3_2', 'ZX_WD_3_3', 'ZX_WD_3_4', 'ZX_WD_3_5', 'ZX_WD_3_6', 'ZX_WD_4_1', 'ZX_WD_4_2', 'ZX_WD_4_3', 'ZX_WD_4_4', 'ZX_WD_4_5', 'ZX_WD_4_6', 'ZX_WD_5_1', 'ZX_WD_5_2', 'ZX_WD_5_3', 'ZX_WD_5_4', 'ZX_WD_5_5', 'ZX_WD_5_6', 'ZX_WD_6_1', 'ZX_WD_6_2', 'ZX_WD_6_3', 'ZX_WD_6_4', 'ZX_WD_6_5', 'ZX_WD_6_6']
['ZX_SDMC_1', 'ZX_SDMC_2', 'ZX_SDMC_3', 'ZX_SDMC_4', 'ZX_SDMC_5', 'ZX_SDMC_6']
-------------------------------------------------------
****

In [17]:
data_no = 'data_0394'
data_dir = root_dir + "/{}/smooth_mean_interpolate_bin_mean/".format(data_no)
cluster_batch(data_dir)
print("---------------processing for data：{}".format(data_no))

-------------------------------------------------------
********process file: 00_233_0394_2016-05-20.csv*********
['ZD_CNT']
['ZD_LCG']
['ZD_TFG']
['ZD_JHG']
['ZD_LLJ', 'ZD_SPEED', 'ZX_BJ_1', 'ZX_SDMC_1', 'ZX_BJ_2', 'ZX_SDMC_2', 'ZX_BJ_3', 'ZX_SDMC_3', 'ZX_BJ_4', 'ZX_SDMC_4', 'ZX_BJ_5', 'ZX_SDMC_5', 'ZX_BJ_6', 'ZX_SDMC_6']
['ZX_HW1_1', 'ZX_HW2_1', 'ZX_HW1_2', 'ZX_HW2_2', 'ZX_HW1_3', 'ZX_HW2_3', 'ZX_HW1_4', 'ZX_HW2_4', 'ZX_HW1_5', 'ZX_HW2_5', 'ZX_HW1_6', 'ZX_HW2_6', 'ZX_WD_1_1', 'ZX_WD_1_2', 'ZX_WD_1_3', 'ZX_WD_1_4', 'ZX_WD_1_5', 'ZX_WD_1_6', 'ZX_WD_2_1', 'ZX_WD_2_2', 'ZX_WD_2_3', 'ZX_WD_2_4', 'ZX_WD_2_5', 'ZX_WD_2_6', 'ZX_WD_3_1', 'ZX_WD_3_2', 'ZX_WD_3_3', 'ZX_WD_3_4', 'ZX_WD_3_5', 'ZX_WD_3_6', 'ZX_WD_4_1', 'ZX_WD_4_2', 'ZX_WD_4_3', 'ZX_WD_4_4', 'ZX_WD_4_5', 'ZX_WD_4_6', 'ZX_WD_5_1', 'ZX_WD_5_2', 'ZX_WD_5_3', 'ZX_WD_5_4', 'ZX_WD_5_5', 'ZX_WD_5_6', 'ZX_WD_6_1', 'ZX_WD_6_2', 'ZX_WD_6_3', 'ZX_WD_6_4', 'ZX_WD_6_5', 'ZX_WD_6_6']
-------------------------------------------------------
******

In [18]:
data_no = 'data_0790'
data_dir = root_dir + "/{}/smooth_mean_interpolate_bin_mean/".format(data_no)
cluster_batch(data_dir)
print("---------------processing for data：{}".format(data_no))

-------------------------------------------------------
********process file: 00_233_0790_2016-05-20.csv*********
['ZD_CNT']
['ZD_LCG']
['ZD_TFG']
['ZD_JHG']
['ZD_LLJ', 'ZX_BJ_1', 'ZX_BJ_2', 'ZX_BJ_3', 'ZX_BJ_4', 'ZX_BJ_5', 'ZX_BJ_6']
['ZD_SPEED']
['ZX_HW1_1', 'ZX_HW2_1', 'ZX_HW1_2', 'ZX_HW2_2', 'ZX_HW1_3', 'ZX_HW2_3', 'ZX_HW1_4', 'ZX_HW2_4', 'ZX_HW1_5', 'ZX_HW2_5', 'ZX_HW1_6', 'ZX_HW2_6', 'ZX_WD_1_1', 'ZX_WD_1_2', 'ZX_WD_1_3', 'ZX_WD_1_4', 'ZX_WD_1_5', 'ZX_WD_1_6', 'ZX_WD_2_1', 'ZX_WD_2_2', 'ZX_WD_2_3', 'ZX_WD_2_4', 'ZX_WD_2_5', 'ZX_WD_2_6', 'ZX_WD_3_1', 'ZX_WD_3_2', 'ZX_WD_3_3', 'ZX_WD_3_4', 'ZX_WD_3_5', 'ZX_WD_3_6', 'ZX_WD_4_1', 'ZX_WD_4_2', 'ZX_WD_4_3', 'ZX_WD_4_4', 'ZX_WD_4_5', 'ZX_WD_4_6', 'ZX_WD_5_1', 'ZX_WD_5_2', 'ZX_WD_5_3', 'ZX_WD_5_4', 'ZX_WD_5_5', 'ZX_WD_5_6', 'ZX_WD_6_1', 'ZX_WD_6_2', 'ZX_WD_6_3', 'ZX_WD_6_4', 'ZX_WD_6_5', 'ZX_WD_6_6']
['ZX_SDMC_1', 'ZX_SDMC_3', 'ZX_SDMC_5']
['ZX_SDMC_2']
['ZX_SDMC_1', 'ZX_SDMC_3', 'ZX_SDMC_4', 'ZX_SDMC_5', 'ZX_SDMC_6']
['ZX_SDMC_3', 'ZX_S

In [None]:

data_no_list = ['data_0134', 'data_0141', 'data_0192', 'data_0394', 'data_0790']
for data_no in data_no_list:
    data_dir = root_dir + "/{}/smooth_mean_interpolate_bin_mean/".format(data_no)
    cluster_batch(data_dir)
    print("finish to process dir：{}".format(data_no))


In [11]:
test1 = ['ZX_HW1_1', 'ZX_HW2_1', 'ZX_HW1_2', 'ZX_HW2_2', 'ZX_HW1_3', 'ZX_HW2_3', 'ZX_HW1_4', 'ZX_HW2_4', 'ZX_HW1_5', 'ZX_HW2_5', 'ZX_HW1_6', 'ZX_HW2_6', 'ZX_WD_1_1', 'ZX_WD_1_2', 'ZX_WD_1_3', 'ZX_WD_1_4', 'ZX_WD_1_5', 'ZX_WD_1_6', 'ZX_WD_2_1', 'ZX_WD_2_2', 'ZX_WD_2_3', 'ZX_WD_2_4', 'ZX_WD_2_5', 'ZX_WD_2_6', 'ZX_WD_3_1', 'ZX_WD_3_2', 'ZX_WD_3_3', 'ZX_WD_3_4', 'ZX_WD_3_5', 'ZX_WD_3_6', 'ZX_WD_4_1', 'ZX_WD_4_2', 'ZX_WD_4_3', 'ZX_WD_4_4', 'ZX_WD_4_5', 'ZX_WD_4_6', 'ZX_WD_5_1', 'ZX_WD_5_2', 'ZX_WD_5_3', 'ZX_WD_5_4', 'ZX_WD_5_5', 'ZX_WD_5_6', 'ZX_WD_6_1', 'ZX_WD_6_2', 'ZX_WD_6_3', 'ZX_WD_6_4', 'ZX_WD_6_5', 'ZX_WD_6_6']
test2 = ['ZX_HW1_1', 'ZX_HW2_1', 'ZX_HW1_2', 'ZX_HW2_2', 'ZX_HW1_3', 'ZX_HW2_3', 'ZX_HW1_4', 'ZX_HW2_4', 'ZX_HW1_5', 'ZX_HW2_5', 'ZX_HW1_6', 'ZX_HW2_6', 'ZX_WD_1_1', 'ZX_WD_1_2', 'ZX_WD_1_3', 'ZX_WD_1_4', 'ZX_WD_1_5', 'ZX_WD_1_6', 'ZX_WD_2_1', 'ZX_WD_2_2', 'ZX_WD_2_3', 'ZX_WD_2_4', 'ZX_WD_2_5', 'ZX_WD_2_6', 'ZX_WD_3_1', 'ZX_WD_3_2', 'ZX_WD_3_3', 'ZX_WD_3_4', 'ZX_WD_3_5', 'ZX_WD_3_6', 'ZX_WD_4_1', 'ZX_WD_4_2', 'ZX_WD_4_3', 'ZX_WD_4_4', 'ZX_WD_4_5', 'ZX_WD_4_6', 'ZX_WD_5_1', 'ZX_WD_5_2', 'ZX_WD_5_3', 'ZX_WD_5_4', 'ZX_WD_5_5', 'ZX_WD_5_6', 'ZX_WD_6_1', 'ZX_WD_6_2', 'ZX_WD_6_3', 'ZX_WD_6_4', 'ZX_WD_6_5', 'ZX_WD_6_6']
set(test1) == set(test2)

True