In [1]:
# Dependencies
# sktime: 0.9.0
# scikit-learn: 0.24.2
# numpy: 1.19.3
# pandas 1.1.5
# joblib: 1.1.0

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import csv

from math import sqrt

from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

from sklearn.ensemble import RandomForestClassifier

from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor

import joblib

In [2]:
X = []
y = []

In [3]:
for i in range(2, 8):
    path = "./data/formatData/Data" + str(i) + '.csv'
    with open(path, newline='') as csvFile:
        rows = csv.reader(csvFile)
        for row in rows:
            # load XYZ train data
            arrX = np.array(row[0][1:-1].split(', ')).astype(np.float)
            arrY = np.array(row[1][1:-1].split(', ')).astype(np.float)
            arrZ = np.array(row[2][1:-1].split(', ')).astype(np.float)
            # cal relative acceleration
            vibrate = []
            for j in range(1, arrX.size):
                acc = sqrt((arrX[j] - arrX[j-1])**2 + (arrY[j] - arrY[j-1])**2 + (arrZ[j] - arrZ[j-1])**2)*100
                vibrate.append(acc)
            X.append([pd.Series(vibrate)])
            if int(row[3]) == 2:
                y.append('0~2')
            else:
                y.append(row[3])

In [4]:
random_state_num = 49

In [5]:
X = pd.DataFrame(X)
y = pd.Series(y)

In [6]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.125, random_state=random_state_num, stratify=y)
print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape)

(1049, 1) (1049,) (150, 1) (150,)


In [7]:
t = TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False)
Xt = t.fit_transform(X_train)
Xt.head()

Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:59<00:00, 11.87s/it]


Unnamed: 0,0__variance_larger_than_standard_deviation,0__has_duplicate_max,0__has_duplicate_min,0__has_duplicate,0__sum_values,0__abs_energy,0__mean_abs_change,0__mean_change,0__mean_second_derivative_central,0__median,...,0__permutation_entropy__dimension_5__tau_1,0__permutation_entropy__dimension_6__tau_1,0__permutation_entropy__dimension_7__tau_1,0__query_similarity_count__query_None__threshold_0.0,"0__matrix_profile__feature_""min""__threshold_0.98","0__matrix_profile__feature_""max""__threshold_0.98","0__matrix_profile__feature_""mean""__threshold_0.98","0__matrix_profile__feature_""median""__threshold_0.98","0__matrix_profile__feature_""25""__threshold_0.98","0__matrix_profile__feature_""75""__threshold_0.98"
547,1.0,0.0,0.0,1.0,406.165663,14864.094585,13.115031,-2.494272,-0.377597,16.173542,...,2.70805,2.639057,2.564949,0.0,0.739362,4.571277,3.592298,4.320541,4.016876,4.385688
39,1.0,0.0,0.0,1.0,118.793893,861.686643,2.540539,0.159533,0.0,5.547479,...,2.615631,2.639057,2.564949,0.0,0.739362,4.571277,3.592298,4.320541,4.016876,4.385688
514,1.0,0.0,0.0,0.0,442.385503,12540.618103,11.862579,0.514064,-0.318913,19.6133,...,2.70805,2.639057,2.564949,0.0,0.739362,4.571277,3.592298,4.320541,4.016876,4.385688
377,1.0,0.0,1.0,1.0,116.318412,784.750335,1.713749,0.04651,0.214262,5.547479,...,2.70805,2.639057,2.564949,0.0,0.739362,4.571277,3.592298,4.320541,4.016876,4.385688
671,1.0,0.0,0.0,0.0,713.255232,29189.635019,9.924329,1.560682,-0.042968,37.21362,...,2.615631,2.639057,2.564949,0.0,0.739362,4.571277,3.592298,4.320541,4.016876,4.385688


In [8]:
classifier = make_pipeline(
    t,
    RandomForestClassifier(random_state=random_state_num),
)

In [9]:
scores = cross_val_score(classifier, X_train, y_train, scoring="accuracy", cv=7)
print(scores)
print(scores.mean())

Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:50<00:00, 10.19s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.10s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:51<00:00, 10.24s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:11<00:00,  2.22s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:48<00:00,  9.75s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.03s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:47<00:00,  9.54s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00,  1.99s/it]
Feature Extraction: 100%|███████████████

[0.8        0.86       0.86       0.79333333 0.79333333 0.81333333
 0.81879195]
0.8198274209012464


In [10]:
classifier.fit(X_train, y_train)
print('accuracy: ' + str(classifier.score(X_valid, y_valid)))

Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [01:04<00:00, 12.84s/it]
Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.18s/it]


accuracy: 0.8466666666666667


In [11]:
y_pred = classifier.predict(X_valid)
# f1_score(y_valid, y_pred, average="weighted")
print(classification_report(y_valid, y_pred))

Feature Extraction: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.17s/it]


              precision    recall  f1-score   support

         0~2       0.85      0.88      0.86        25
           3       0.79      0.76      0.78        25
           4       0.92      0.92      0.92        25
           5       0.85      0.92      0.88        25
           6       0.82      0.72      0.77        25
           7       0.85      0.88      0.86        25

    accuracy                           0.85       150
   macro avg       0.85      0.85      0.85       150
weighted avg       0.85      0.85      0.85       150



In [12]:
print(accuracy_score(y_valid, y_pred))

0.8466666666666667


In [13]:
valid_predict = np.array(y_pred)
valid_reality = np.array(y_valid)

In [14]:
joblib.dump(classifier, 'modelwithTsFresh')

['modelwithTsFresh']