In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

from time import sleep, time
from threading import Thread
from scipy.cluster.hierarchy import fcluster
from scipy.stats import zscore

from data_processing import DataIterator
from testing import TestFactory, ClusteredInfo
from dtw import dtw as cur_dtw
from dtw_wrapper import DtwWrapper

In [None]:
def norm_1(x, y):
    return np.linalg.norm(x - y, ord=1)
def norm_2(x, y):
    return np.linalg.norm(x - y, ord=2)

# Rewriting data in our format

In [None]:
# data = pd.read_csv("../data/preprocessed_large.csv", header=None)

# indexes = pd.Index(range(0, 200))
# data_preprocessed = pd.DataFrame(columns=["obj", "ch", "label", *indexes])

# l = []
# for j in data.index:
#     df = pd.DataFrame({"obj": [j for i in range(3)], 
#                   "label": [data.loc[j, 0] for i in range(3)]},
#                  columns=["obj", "label", *indexes])
#     df.index.name = "ch"
#     df.loc[0, indexes] = data.loc[j, 1:200].values
#     df.loc[1, indexes] = data.loc[j, 201:400].values
#     df.loc[2, indexes] = data.loc[j, 401:600].values 
#     df.reset_index(inplace=True)
#     data_preprocessed = data_preprocessed.append(df, sort=False)
    
# data_preprocessed.reset_index(drop=True, inplace=True)


In [None]:
data = pd.read_csv("../data/preprocessed_3ch_50point_overlap0.csv", index_col=0)
it = DataIterator(data, 50, random_state=42)
tests = TestFactory(it=it)
x = tests.set_sample(2048)

In [None]:
wrap = DtwWrapper(tests.X, hash(tests.infos), cur_dtw, norm_1, dtw_args={"z_normalize": True, "l": 0.2})
wrap.fill_distances(n_threads=8)

wrap = DtwWrapper(tests.X, hash(tests.infos), cur_dtw, norm_2, dtw_args={"z_normalize": True, "l": 0.2})
wrap.fill_distances(n_threads=8)

In [None]:
tmp = np.genfromtxt("../data/distances/dtwnorm_1z_normalizeTruel0.2-43544539162478229291.csv")
(tmp < 0).sum() / (tmp != -1000).sum()

In [None]:
info = tests.test_dtw(cur_dtw, norm_2, 
                      dtw_args={"z_normalize": True, "l": 0.2}, cluster_dist="weighted")

In [None]:
info.cluster(50)
info.stats
index = info.stats.head(10).index.values
classifier_stat = {}
for i in info.stats.index:
    classifier_stat[i] = pd.Series(info.label[np.where(info.clusters_labels == i)[0]]).value_counts()
    
pd.DataFrame(classifier_stat).fillna(0).iloc[:, :30]

In [None]:
info = tests.test_dtw(cur_dtw, norm_1, 
                      dtw_args={"z_normalize": True, "l": 0.2},)

In [None]:
info.cluster(24)
info.stats
index = info.stats.head(10).index.values
classifier_stat = {}
for i in info.stats.index:
    classifier_stat[i] = pd.Series(info.label[np.where(info.clusters_labels == i)[0]]).value_counts()
    
pd.DataFrame(classifier_stat).fillna(0).iloc[:, :30]

# Rewriting accelerometer dataset 2
[Link](https://github.com/mmalekzadeh/motion-sense/blob/master/data/B_Accelerometer_data.zip "")

In [None]:
SAMPLE_SIZE = 200
labels = np.unique([x[:3] for x in os.listdir("../data/B_Accelerometer_data/")])
dirs = os.listdir("../data/B_Accelerometer_data/")
d = dict((label, i) for (i, label) in enumerate(labels))
counter = 0

indexes = pd.Index(range(SAMPLE_SIZE))
data_preprocessed = pd.DataFrame(columns=["obj", "ch", "label", *indexes])

for dir in dirs:
    for f in os.listdir("../data/B_Accelerometer_data/{}".format(dir)):
        data = pd.read_csv("../data/B_Accelerometer_data/{}/{}".format(dir, f), index_col=0)
        data = data.T.rename({"x": 0, "y": 1, "z": 2}).reset_index().rename(columns={"index": "ch"})

        for start in range(0, data.shape[-1] - SAMPLE_SIZE, SAMPLE_SIZE):
            df = pd.DataFrame(
                {
                    "obj": [counter for i in range(3)], 
                    "label": [d[dir[:3]] for i in range(3)]},
                    columns=["obj", "label", *indexes])
            
            df.index.name = "ch"
            df.loc[:, indexes] = data.iloc[:, start:start + size].values
            df.reset_index(inplace=True)
            data_preprocessed = data_preprocessed.append(df, sort=False)
            counter += 1

data_preprocessed.reset_index(drop=True, inplace=True)
data_preprocessed.to_csv("../data/clustering/accelerometer2.csv")