<a href="https://colab.research.google.com/github/TheCaveOfAdullam/study2/blob/main/apple33.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import os
import time  # time 모듈 import
import psutil  # psutil 라이브러리 import
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from keras.utils import to_categorical
import keras.backend as K

In [3]:
# 기본 경로 설정
base_dir = '/content/drive/MyDrive/marine_motor'
categories = ['normal', 'fault_BB', 'fault_RI', 'fault_SM']

# 데이터 로드 및 전처리 함수 정의
def load_data(base_dir, split):
    X = []
    y = []
    split_dir = os.path.join(base_dir, split)
    for category in categories:
        category_dir = os.path.join(split_dir, category)
        for file in os.listdir(category_dir):
            file_path = os.path.join(category_dir, file)
            data = pd.read_csv(file_path, header=None).values
            data = pd.to_numeric(data.flatten(), errors='coerce').reshape(-1, data.shape[1])
            data = np.nan_to_num(data).astype('float32')  # NaN 값을 0으로 대체하고, float32로 변환
            X.append(data)
            y.append(category)
    return np.array(X), np.array(y)

In [4]:
# 데이터 로드
X_train, y_train = load_data(base_dir, 'train')
X_val, y_val = load_data(base_dir, 'validation')
X_test, y_test = load_data(base_dir, 'test')

In [5]:
# 데이터 차원 변경 (CNN 입력 형식에 맞게)
X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# 레이블 인코딩
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# 원-핫 인코딩
y_train_categorical = to_categorical(y_train_encoded)
y_val_categorical = to_categorical(y_val_encoded)
y_test_categorical = to_categorical(y_test_encoded)

In [6]:
# 기본 CNN 모델 정의
def create_model():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(len(categories), activation='softmax'))
    return model

model = create_model()

# 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [7]:
# 모델 학습
history = model.fit(X_train, y_train_categorical, epochs=10, batch_size=32, validation_data=(X_val, y_val_categorical))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
# # CNN 모델 정의
# model = Sequential()
# model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Flatten())
# model.add(Dense(100, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(len(categories), activation='softmax'))

# # 모델 컴파일
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [9]:
# # 모델 학습
# history = model.fit(X_train, y_train_categorical, epochs=10, batch_size=32, validation_data=(X_val, y_val_categorical))

In [10]:
# 모델 사이즈 확인
model.save('model.h5')
model_size = os.path.getsize('model.h5') / (1024 * 1024)  # 모델 사이즈를 MB 단위로 변환
print(f"Model Size: {model_size:.2f} MB")

  saving_api.save_model(


Model Size: 439.50 MB


In [11]:
# 메모리 사용량 확인
process = psutil.Process(os.getpid())
memory_usage = process.memory_info().rss / (1024 * 1024)  # 메모리 사용량을 MB 단위로 변환
print(f"Memory Usage: {memory_usage:.2f} MB")

Memory Usage: 7233.63 MB


In [12]:
# 추론 시간 측정
start_time = time.time()
y_pred_categorical = model.predict(X_test)
end_time = time.time()
inference_time = end_time - start_time
print(f"Inference Time: {inference_time:.2f} seconds")

y_pred = np.argmax(y_pred_categorical, axis=1)

Inference Time: 8.69 seconds


In [13]:
# 검증 데이터 평가
val_loss, val_accuracy = model.evaluate(X_val, y_val_categorical)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

Validation Loss: 0.045065268874168396
Validation Accuracy: 0.9880239367485046


In [14]:
# 테스트 데이터 평가
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Test Loss: 0.04388662427663803
Test Accuracy: 0.9872499704360962


In [15]:
# 혼동 행렬 및 성능 지표 출력 (검증 데이터)
y_pred_val = model.predict(X_val)
y_pred_val_classes = np.argmax(y_pred_val, axis=1)

conf_matrix_val = confusion_matrix(y_val_encoded, y_pred_val_classes)
class_report_val = classification_report(y_val_encoded, y_pred_val_classes, target_names=categories)

print("Confusion Matrix (Validation):")
print(conf_matrix_val)
print("\nClassification Report (Validation):")
print(class_report_val)

Confusion Matrix (Validation):
[[504   0   0   0]
 [  0 500   0   0]
 [ 10   0 490   0]
 [  0   0  14 486]]

Classification Report (Validation):
              precision    recall  f1-score   support

      normal       0.98      1.00      0.99       504
    fault_BB       1.00      1.00      1.00       500
    fault_RI       0.97      0.98      0.98       500
    fault_SM       1.00      0.97      0.99       500

    accuracy                           0.99      2004
   macro avg       0.99      0.99      0.99      2004
weighted avg       0.99      0.99      0.99      2004



In [16]:
# 혼동 행렬 및 성능 지표 출력 (테스트 데이터)
conf_matrix_test = confusion_matrix(y_test_encoded, y_pred)
class_report_test = classification_report(y_test_encoded, y_pred, target_names=categories)

print("Confusion Matrix (Test):")
print(conf_matrix_test)
print("\nClassification Report (Test):")
print(class_report_test)

Confusion Matrix (Test):
[[ 998    0    2    0]
 [   0 1000    0    0]
 [  32    0  959    9]
 [   0    0    8  992]]

Classification Report (Test):
              precision    recall  f1-score   support

      normal       0.97      1.00      0.98      1000
    fault_BB       1.00      1.00      1.00      1000
    fault_RI       0.99      0.96      0.97      1000
    fault_SM       0.99      0.99      0.99      1000

    accuracy                           0.99      4000
   macro avg       0.99      0.99      0.99      4000
weighted avg       0.99      0.99      0.99      4000



In [17]:
# # 새로운 CSV 파일 분류
# def classify_new_file(file_path, model):
#     data = pd.read_csv(file_path, header=None).values
#     data = np.nan_to_num(data).astype('float32')  # NaN 값을 0으로 대체하고, float32로 변환
#     data = np.expand_dims(data, axis=(0, -1))
#     prediction = model.predict(data)
#     predicted_class = np.argmax(prediction, axis=1)
#     return label_encoder.inverse_transform(predicted_class)

# # 예시: 새로운 파일 분류
# new_file_path = 'path_to_new_csv_file.csv'
# predicted_category = classify_new_file(new_file_path, model)
# print(f'The predicted category for the new file is: {predicted_category[0]}')