In [1]:
import numpy as np
import pandas as pd
from os import listdir
import random
import time
from os.path import isfile, join
np.random.seed(42)
random.seed(42)

In [2]:
path='train'
classes=['wave', 'inf', 'eight', 'circle', 'beat3', 'beat4']
features=['time','Ax','Ay','Az','Wx','Wy','Wz']

In [3]:
tr_data={}
tr_all=pd.DataFrame(columns=features[1:])

for i in listdir(path):
    
    df=pd.read_csv((path+'/'+i),header=None,sep='\t', lineterminator='\n')
    i=classes[np.where([x in i for x in classes])[0][0]]
    df=df.rename(columns=dict(enumerate(features)))
    df=df.drop(['time'],axis=1)
    
    tr_all=tr_all.append(df,ignore_index=True)
    if i in tr_data:
        tr_data[i]=tr_data[i].append(df,ignore_index=True)
    else:
        tr_data[i]=df

In [4]:
display(tr_all.head())

Unnamed: 0,Ax,Ay,Az,Wx,Wy,Wz
0,2.169327,7.736588,5.010849,-0.039505,-0.013031,0.039139
1,2.121719,7.796097,4.929916,-0.039505,-0.00502,0.02739
2,2.119339,7.75325,4.968002,-0.035233,0.009399,0.022049
3,2.021744,7.69136,5.063217,-0.029892,0.032898,0.012436
4,2.021744,7.672318,4.979904,-0.022949,0.053192,0.009766


In [5]:
tr_data

{'beat3':              Ax        Ay        Az        Wx        Wy        Wz
 0      2.169327  7.736588  5.010849 -0.039505 -0.013031  0.039139
 1      2.121719  7.796097  4.929916 -0.039505 -0.005020  0.027390
 2      2.119339  7.753250  4.968002 -0.035233  0.009399  0.022049
 3      2.021744  7.691360  5.063217 -0.029892  0.032898  0.012436
 4      2.021744  7.672318  4.979904 -0.022949  0.053192  0.009766
 ...         ...       ...       ...       ...       ...       ...
 13426  2.014603  7.086746  5.729721  0.045914 -0.019135  0.085190
 13427  2.090775  6.993912  5.586899  0.055527 -0.037292  0.093735
 13428  2.119339  6.898697  5.610703  0.059799 -0.072540  0.104950
 13429  2.190750  6.848709  5.674973  0.052322 -0.114731  0.120438
 13430  2.140762  6.855850  5.577377  0.036301 -0.153183  0.133255
 
 [13431 rows x 6 columns],
 'beat4':              Ax        Ay        Az        Wx        Wy        Wz
 0      2.773941  8.895828  1.114182  0.027756  0.044952 -0.125229
 1      2.91676

### k-means

In [6]:
def euclidean(A,B):
    return np.sqrt(np.sum((A - B)**2, axis=1))

In [7]:
def kmeans(dt,kc,th):
    start = time.time()
    centers=dt.iloc[random.sample(range(len(dt)), kc),:].to_numpy(float)
    result=np.zeros((kc,6))
    min_d=100
    dt=(dt.sample(frac=1)).to_numpy(float)
    iters=0
    while iters<=30 and min_d>th:
        threshold=[]
        sums=np.zeros((len(dt),6))
        nums=np.zeros((len(dt)))
        
        for p in range(len(dt[:,0])):
            dist=np.argmin(euclidean(centers,dt[p,:]))
            sums[dist,:]+=dt[p,:]
            nums[dist]+=1
            
        for c in range(kc):
            result[c]=sums[c]/nums[c]
            threshold.append(np.sqrt(np.sum((result[c]-centers[c])**2)))
            centers[c]=result[c]
            
        min_d=np.min(threshold)
        iters+=1
    end = time.time()
    print('seconds',end - start)   
    print(min_d)
    return result

In [8]:
res=kmeans(tr_all,50,0.001)
display(res.shape)
display(res)

seconds 13.117950201034546
0.0


(50, 6)

array([[-7.61023545e+00, -3.00780802e+00,  3.42749952e+00,
        -3.40480006e-01,  9.10028186e-02, -2.34541550e-01],
       [ 7.70370232e+00,  1.88813404e+00,  9.91011131e+00,
         3.16651204e+00,  4.03242700e-01,  1.07995596e+00],
       [-6.24195788e+00,  2.58132675e+00,  9.99708557e+00,
         3.31229195e+00, -2.26992937e+00, -2.05803729e+00],
       [-1.03944804e+01,  3.62267560e+00, -7.21130598e-02,
        -8.62463543e-01, -1.43829336e+00, -9.94525173e-01],
       [-5.41319532e+00,  3.93612218e+00,  4.72162258e+00,
         3.96742711e-01, -3.47210835e-01,  1.17282577e-01],
       [-6.17643459e-01, -6.78782647e-01,  2.39701597e+00,
        -3.04729631e+00,  4.83019873e-01, -2.46313591e-02],
       [ 2.44296739e+00,  8.72470454e+00,  2.01522283e+00,
         7.74656398e-02, -8.07530066e-02,  1.43517659e-04],
       [ 1.26303573e+01, -7.15202933e+00,  1.66150277e+00,
        -3.33154345e+00,  3.04493609e+00, -7.05317671e+00],
       [ 5.86771371e+00,  3.69228322e+00,  1.633

In [9]:
pd.DataFrame(res,columns=features[1:]).to_csv('train_centers.csv',index=False)

### k-nn

In [10]:
def knn(data,centers):
    data=data.to_numpy(float)
    new = np.zeros((len(data)))
    for p in range(len(data)):
        new[p]=np.argmin(euclidean(data[p,:],centers))
         
    return new

In [11]:
tr_new = {}
for c in classes:
    tr_new[c]=pd.DataFrame(knn(tr_data[c],res),columns=['centers'])
    tr_new[c].to_csv('train/'+c+'.csv',index=False)

In [12]:
display(tr_new)

{'wave':        centers
 0          6.0
 1          6.0
 2          6.0
 3          6.0
 4          6.0
 ...        ...
 14525     41.0
 14526     41.0
 14527     41.0
 14528     41.0
 14529     41.0
 
 [14530 rows x 1 columns], 'inf':        centers
 0         39.0
 1         39.0
 2         39.0
 3         39.0
 4         39.0
 ...        ...
 12412     37.0
 12413     37.0
 12414     37.0
 12415     37.0
 12416     37.0
 
 [12417 rows x 1 columns], 'eight':        centers
 0         47.0
 1         47.0
 2         47.0
 3         47.0
 4         47.0
 ...        ...
 14296     37.0
 14297     37.0
 14298     37.0
 14299     37.0
 14300     37.0
 
 [14301 rows x 1 columns], 'circle':        centers
 0          0.0
 1          0.0
 2          0.0
 3          0.0
 4          0.0
 ...        ...
 10622      4.0
 10623      4.0
 10624      4.0
 10625      4.0
 10626      4.0
 
 [10627 rows x 1 columns], 'beat3':        centers
 0         35.0
 1         35.0
 2         35.0
 3         35