In [2]:
import numpy as np
import pandas as pd
import os
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier

def compute_time_domain_features(signal):
    features = []
    features.append(np.mean(signal))  # mean
    features.append(np.std(signal))  # std_dev
    features.append(np.var(signal))  # variance
    features.append(np.max(signal))  # max
    features.append(np.min(signal))  # min
    features.append(np.max(signal) - np.min(signal))  # range
    features.append(np.sqrt(np.mean(signal**2)))  # rms
    features.append(pd.Series(signal).kurtosis())  # kurtosis
    features.append(pd.Series(signal).skew())  # skewness
    features.append((np.diff(np.sign(signal)) != 0).sum())  # zero_crossings
    return features

def process_files_in_directory(directory):
    features_list = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                data = pd.read_csv(file_path)
                # Припускаємо, що дані є одновимірними. Якщо є кілька колонок, слід уточнити яку саме використовувати.
                for column in data.columns:
                    signal = data[column].values
                    features = compute_time_domain_features(signal)
                    features_list.append(features)
    return features_list

def main(data_directory):
    all_features = []
    labels = []
    class_label = 0
    for subdir in sorted(os.listdir(data_directory)):
        subdir_path = os.path.join(data_directory, subdir)
        if os.path.isdir(subdir_path):
            class_features = process_files_in_directory(subdir_path)
            all_features.extend(class_features)
            labels.extend([class_label] * len(class_features))
            class_label += 1
    
    # Створюємо DataFrame з усіма обчисленими ознаками
    column_names = [
        'mean', 'std_dev', 'variance', 'max', 'min', 'range',
        'rms', 'kurtosis', 'skewness', 'zero_crossings'
    ]
    features_df = pd.DataFrame(all_features, columns=column_names)
    labels_df = pd.DataFrame(labels, columns=['class_label'])
    
    return features_df, labels_df



In [3]:
all_dataframes = main('data')
print(all_dataframes)

(            mean   std_dev   variance        max        min      range  \
0       0.178448  1.018942   1.038242   5.099650  -0.909797   6.009447   
1       0.167435  1.138147   1.295378   4.616021  -0.282516   4.898537   
2       9.605697  0.400064   0.160051   9.806650   8.418014   1.388636   
3      -0.098641  0.123732   0.015310   0.407014  -0.320823   0.727837   
4      -0.131202  0.047251   0.002233   0.023942  -0.244209   0.268151   
...          ...       ...        ...        ...        ...        ...   
19381 -11.146126  5.205216  27.094278   0.181959 -25.201366  25.383325   
19382   0.233036  7.809926  60.994952  32.599450 -16.903065  49.502515   
19383   1.131979  4.141624  17.153053  11.113884  -7.359776  18.473660   
19384 -10.937192  5.351603  28.639651   0.181959 -25.201366  25.383325   
19385   0.260170  7.808017  60.965136  32.599450 -16.903065  49.502515   

             rms   kurtosis  skewness  zero_crossings  
0       1.034450  18.543171  4.044308               5 

In [4]:
features_df, labels_df = main('data')

X = features_df.values
y = labels_df.values.flatten()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
clf = svm.SVC()
clf.fit(X_train, y_train)


In [6]:
y_pred = clf.predict(X_test)
y_pred

array([1, 1, 1, ..., 1, 3, 1], dtype=int64)

In [7]:
y_pred[:20]


array([1, 1, 1, 0, 1, 3, 1, 3, 3, 3, 1, 1, 3, 0, 1, 3, 3, 3, 0, 1],
      dtype=int64)

In [8]:
y_test[:20]


array([1, 1, 1, 0, 1, 3, 1, 3, 3, 3, 3, 1, 3, 0, 1, 3, 3, 3, 0, 1],
      dtype=int64)

In [9]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Процент правильних відповідей: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")


Процент правильних відповідей: 91.16%
Precision: 91.56%
Recall: 91.16%
F1 Score: 90.11%


In [13]:
clf_rf = RandomForestClassifier(random_state=42)
clf_rf.fit(X_train, y_train)

y_pred_rf = clf_rf.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted', zero_division=1)
recall_rf = recall_score(y_test, y_pred_rf, average='weighted', zero_division=1)
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

print(f"Процент правильних відповідей Random Forest: {accuracy_rf * 100:.2f}%")
print(f"Precision (R Forest): {precision_rf * 100:.2f}%")
print(f"Recall (R Forest): {recall_rf * 100:.2f}%")
print(f"F1 (R Forest): {f1_rf * 100:.2f}%")

Процент правильних відповідей Random Forest: 98.68%
Precision (R Forest): 98.69%
Recall (R Forest): 98.68%
F1 (R Forest): 98.67%
