In [2]:
import glob
import os
import sys

def directoryListing(dirPath, depth=1):
    for item in os.listdir(dirPath):
        if item.startswith('.git'):
            continue  # .git으로 시작하는 파일 및 디렉토리는 무시
        try:
            filepath = os.path.join(dirPath, item)
            if os.path.isfile(filepath):
                print(f"[F] {'-' * (depth * 1)} {item}")
            elif os.path.isdir(filepath):
                print(f"[D] {'-' * (depth * 1)} {item}")
                directoryListing(filepath, depth+1)
            else:
                print(f"[?] {'-' * (depth * 1)} {item}")
        except OSError:
            print("ERROR")

directoryListing("./")

[F] - a.ipynb
[D] - Data
[D] -- 12k_DE
[F] --- B007_0.mat
[F] --- B007_1.mat
[F] --- B007_2.mat
[F] --- B007_3.mat
[F] --- B014_0.mat
[F] --- B014_1.mat
[F] --- B014_2.mat
[F] --- B014_3.mat
[F] --- B021_0.mat
[F] --- B021_1.mat
[F] --- B021_2.mat
[F] --- B021_3.mat
[F] --- B028_0.mat
[F] --- B028_1.mat
[F] --- B028_2.mat
[F] --- B028_3.mat
[F] --- IR007_0.mat
[F] --- IR007_1.mat
[F] --- IR007_2.mat
[F] --- IR007_3.mat
[F] --- IR014_0.mat
[F] --- IR014_1.mat
[F] --- IR014_2.mat
[F] --- IR014_3.mat
[F] --- IR021_0.mat
[F] --- IR021_1.mat
[F] --- IR021_2.mat
[F] --- IR021_3.mat
[F] --- IR028_0.mat
[F] --- IR028_1.mat
[F] --- IR028_2.mat
[F] --- IR028_3.mat
[F] --- OR007@12_0.mat
[F] --- OR007@12_1.mat
[F] --- OR007@12_2.mat
[F] --- OR007@12_3.mat
[F] --- OR007@3_0.mat
[F] --- OR007@3_1.mat
[F] --- OR007@3_2.mat
[F] --- OR007@3_3.mat
[F] --- OR007@6_0.mat
[F] --- OR007@6_1.mat
[F] --- OR007@6_2.mat
[F] --- OR007@6_3.mat
[F] --- OR014@6_0.mat
[F] --- OR014@6_1.mat
[F] --- OR014@6_2.mat
[F]

In [38]:
import scipy.io as sio
import numpy as np
import pandas as pd

def load_mat_file(filepath):
    mat_data = sio.loadmat(filepath)
    # 'DE_time'이라는 키를 사용해 데이터를 추출합니다.
    for key in mat_data.keys():
        if 'DE_time' in key:
            return mat_data[key].flatten()

# 신호 데이터를 일정 시간 구간으로 나누고, 각 구간에 대해 통계적 변수를 추출하는 함수
def extract_segment_features(data, segment_length):
    num_segments = len(data) // segment_length
    features_list = []

    for i in range(num_segments):
        segment = data[i * segment_length:(i + 1) * segment_length]
        features = {
            'mean': np.mean(segment),
            'std': np.std(segment),
            'max': np.max(segment),
            'min': np.min(segment),
            'skewness': pd.Series(segment).skew(),
            'kurtosis': pd.Series(segment).kurt()
        }
        features_list.append(features)

    return pd.DataFrame(features_list)

# 샘플링 주파수 설정
sampling_frequency = 12000  # 12000 sps
segment_time = 1  # 분석할 시간 구간 (단위: 초)
segment_length = sampling_frequency * segment_time  # 구간 당 샘플 수

normal_files = glob.glob('./Data/Normal/*.mat')
normal_features_list = []

for file in normal_files:
    data = load_mat_file(file)
    segment_features = extract_segment_features(data, segment_length=12000)  # 1초 구간
    segment_features['label'] = 0  # 정상 데이터의 레이블은 0
    normal_features_list.append(segment_features)

# 모든 정상 데이터 통합
normal_features_df = pd.concat(normal_features_list, ignore_index=True)
normal_features_df['label'] = 0 

In [44]:
# Faulty 데이터 로드 및 통계적 변수 추출 (12k 샘플링 주파수에서 동일 개수만큼 선택)
faulty_files = glob.glob('./Data/12k_DE/*.mat')[:14]
faulty_features_list = []

for file in faulty_files:
    data = load_mat_file(file)
    segment_features = extract_segment_features(data, segment_length=12000)  # 1초 구간
    segment_features['label'] = 1  # 불량 데이터의 레이블은 1
    faulty_features_list.append(segment_features)

# 모든 불량 데이터 통합
faulty_features_df = pd.concat(faulty_features_list, ignore_index=True)

In [45]:
faulty_features_df.shape

(140, 7)

In [46]:
combined_df = pd.concat([normal_features_df, faulty_features_df], ignore_index=True)

# 데이터셋을 특성과 레이블로 분리
X = combined_df.drop('label', axis=1)
y = combined_df['label']

In [48]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# 학습 데이터와 테스트 데이터로 분리 (80% 학습, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SVM 모델 생성 및 학습
model = SVC(kernel='linear', random_state=42)
model.fit(X_train, y_train)

# 테스트 데이터에 대한 예측
y_pred = model.predict(X_test)

# 모델 평가
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[28  0]
 [ 0 28]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      1.00      1.00        28

    accuracy                           1.00        56
   macro avg       1.00      1.00      1.00        56
weighted avg       1.00      1.00      1.00        56

