In [46]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split  
from sklearn.neighbors import KNeighborsClassifier  
from sklearn.metrics import classification_report, confusion_matrix

In [7]:

data = pd.read_csv ("equipmentsData.csv", header=0);


In [30]:
## Set the timestamp as index
data['time'] = pd.to_datetime (data['time'])
data = data.set_index(data.time)

## Resample
resampled = data.resample('min').agg({'class':['mean', 'count'],
                                      'voltage': 'mean',
                                      'current':['mean','var'],
                                      'activepower': 'mean',
                                      'reactivepower': 'mean',
                                      'powerfactor': 'mean',
                                      'fundamentalpower': 'mean',
                                      'hamonicpower':['mean','var']
                                     })
resampled.columns = resampled.columns.map('_'.join)
resampled = resampled.query('class_count > 50')
resampled = resampled.drop('class_count', axis=1)
resampled.head(9999)

Unnamed: 0_level_0,class_mean,voltage_mean,current_mean,current_std,activepower_mean,reactivepower_mean,powerfactor_mean,fundamentalpower_mean,hamonicpower_mean,hamonicpower_std
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-03-20 05:09:00,1.0,212.99895,0.612017,0.000385,130.158167,-7.214167,1.0,130.025667,0.132667,0.006342
2019-03-20 05:10:00,1.0,213.159867,0.612608,0.00048,130.383333,-7.218333,1.0,130.246833,0.135167,0.005039
2019-03-20 05:20:00,1.0,214.880767,0.618023,0.000814,132.615001,-7.111833,1.0,132.477833,0.136333,0.005197
2019-03-20 05:21:00,1.0,214.8677,0.618152,0.00056,132.633334,-7.104833,1.0,132.498,0.134667,0.005031
2019-03-20 05:22:00,1.0,215.15478,0.619571,0.000494,133.112882,-7.128814,1.0,132.977627,0.134746,0.006786
2019-03-20 05:23:00,1.0,215.039085,0.61928,0.000201,132.97983,-7.126102,1.0,132.845424,0.134915,0.005042
2019-03-20 05:24:00,1.0,214.931153,0.618941,0.000641,132.838983,-7.124237,1.0,132.707627,0.130678,0.005529
2019-03-20 05:25:00,1.0,215.546576,0.620678,0.000339,133.595594,-7.161356,1.0,133.462712,0.132712,0.004484
2019-03-20 05:26:00,1.0,215.768138,0.621405,0.000287,133.887069,-7.182931,1.0,133.755,0.13069,0.00413
2019-03-20 05:27:00,1.0,215.7482,0.621475,0.000454,133.8935,-7.183833,1.0,133.765333,0.128,0.005462


In [31]:
# minMax normalization 
normalized=(resampled-resampled.min())/(resampled.max()-resampled.min())
normalized

Unnamed: 0_level_0,class_mean,voltage_mean,current_mean,current_std,activepower_mean,reactivepower_mean,powerfactor_mean,fundamentalpower_mean,hamonicpower_mean,hamonicpower_std
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-03-20 05:09:00,0.0,0.0,0.978751,0.004803,0.961703,0.57982,1.0,0.961598,0.991404,0.053878
2019-03-20 05:10:00,0.0,0.02029,0.979973,0.006977,0.963803,0.579449,1.0,0.963667,0.994549,0.031269
2019-03-20 05:20:00,0.0,0.237278,0.991165,0.01455,0.984612,0.588944,1.0,0.98453,0.996016,0.033998
2019-03-20 05:21:00,0.0,0.235631,0.99143,0.008779,0.984783,0.589568,1.0,0.984718,0.99392,0.031123
2019-03-20 05:22:00,0.0,0.271829,0.994364,0.007285,0.989254,0.58743,1.0,0.989203,0.994019,0.061582
2019-03-20 05:23:00,0.0,0.257241,0.993761,0.000626,0.988014,0.587672,1.0,0.987967,0.994232,0.031317
2019-03-20 05:24:00,0.0,0.243631,0.993061,0.010621,0.9867,0.587838,1.0,0.986679,0.988902,0.03976
2019-03-20 05:25:00,0.0,0.32123,0.996651,0.003766,0.993755,0.584529,1.0,0.99374,0.991461,0.02163
2019-03-20 05:26:00,0.0,0.349167,0.998154,0.002596,0.996473,0.582605,1.0,0.996473,0.988917,0.015496
2019-03-20 05:27:00,0.0,0.346653,0.998298,0.006378,0.996533,0.582525,1.0,0.99657,0.985533,0.038597


In [51]:

X = normalized.iloc[:, 1:].values  #values
y = normalized.iloc[:, 0].values    # labels


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) 
classifier = KNeighborsClassifier(n_neighbors=5)  
classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [52]:
y_pred = classifier.predict(X_test)  
print(confusion_matrix(y_test, y_pred))  
print(classification_report(y_test, y_pred)) 

[[8 0]
 [0 4]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00         8
         1.0       1.00      1.00      1.00         4

   micro avg       1.00      1.00      1.00        12
   macro avg       1.00      1.00      1.00        12
weighted avg       1.00      1.00      1.00        12

