In [46]:
import glob
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
# -------------------
# 0. 데이터 경로
# -------------------
train_folder = r"G:\다른 컴퓨터\KHU PC\HI Lab\0. Projects\0. On going\4. Secret Noise Analyzing project\1. 실험자료\train1"

test_file    = r"G:\다른 컴퓨터\KHU PC\HI Lab\0. Projects\0. On going\4. Secret Noise Analyzing project\1. 실험자료\train2-wire\wire_metal.csv"

import os

print("폴더 존재 여부:", os.path.exists(train_folder))
print("폴더 내용:", os.listdir(train_folder))



폴더 존재 여부: True
폴더 내용: ['250709_Noise_hand+metal.csv', '250709_Noise_water.csv', 'hand+metal.csv', 'water.csv', 'skin', 'dielectric', 'metal']


In [52]:

# -------------------
# 1. 슬라이딩 윈도우 함수
# -------------------
def create_windows(data, window_size=500, step=250):
    windows = []
    for start in range(0, len(data)-window_size+1, step):
        end = start + window_size
        windows.append(data[start:end])
    return np.array(windows)

# -------------------
# 2. 훈련 데이터셋 불러오기
# -------------------
X, y = [], []

# train1/하위폴더/*.csv 불러오기
all_files = glob.glob(os.path.join(train_folder, "*", "*.csv"))
print("발견한 CSV 개수:", len(all_files))

# 하위 폴더 이름 = 클래스명
all_classes = sorted({os.path.basename(os.path.dirname(f)).lower() for f in all_files})
label_map = {cls: idx for idx, cls in enumerate(all_classes)}
print("자동 생성된 라벨 맵:", label_map)

print("불러온 파일 예시:", all_files[:3])
for f in all_files[:3]:
    folder_name = os.path.basename(os.path.dirname(f)).lower()
    print("파일:", f)
    print("폴더명:", folder_name)
    print("라벨맵 키들:", label_map.keys())

for f in all_files:
    df = pd.read_csv(f, header=None)
    signal = df.values.flatten()

    # 정규화
    signal = (signal - np.mean(signal)) / np.std(signal)

    # 윈도우 자르기
    windows = create_windows(signal, window_size=500, step=250)

    # 라벨 = 상위 폴더 이름
    folder_name = os.path.basename(os.path.dirname(f)).lower()
    label = label_map[folder_name]

    for w in windows:
        X.append(w)
        y.append(label)

X = np.array(X)
y = np.array(y)

if len(y) == 0:
    raise ValueError("🚨 라벨 데이터가 비어 있습니다. 폴더 구조와 CSV 파일명을 확인하세요.")

# 라벨 분포 출력
unique, counts = np.unique(y, return_counts=True)
inv_label_map = {v: k for k, v in label_map.items()}
print("라벨별 데이터 개수 분포:")
for u, c in zip(unique, counts):
    print(f" - {inv_label_map[u]} : {c} 개")

# 입력 형태 맞추기
X = np.expand_dims(X, -1)
y = to_categorical(y, num_classes=len(label_map))

# -------------------
# 3. Train/Test 분할
# -------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------------------
# 4. 모델 정의 및 학습
# -------------------
model = Sequential()
model.add(LSTM(64, input_shape=(X.shape[1], 1)))
model.add(Dense(32, activation="relu"))
model.add(Dense(y.shape[1], activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# 모델 저장
model.save("rnn_classifier.h5")

# -------------------
# 5. 새로운 CSV 예측 함수
# -------------------
def classify_csv(filepath, model_path="rnn_classifier.h5"):
    model = load_model(model_path)

    df = pd.read_csv(filepath, header=None)
    signal = df.values.flatten()

    # 정규화
    signal = (signal - np.mean(signal)) / np.std(signal)

    # 윈도우 생성
    windows = create_windows(signal, window_size=500, step=250)
    windows = np.expand_dims(windows, -1)

    # 예측
    preds = model.predict(windows)
    pred_classes = np.argmax(preds, axis=1)

    # 가장 많이 나온 클래스 → 최종 판정
    final_class = np.bincount(pred_classes).argmax()

    inv_label_map = {v: k for k, v in label_map.items()}
    return inv_label_map[final_class]

# -------------------
# 6. 실전 데이터 테스트
# -------------------
result = classify_csv(test_file)
print("이 CSV는:", result)


발견한 CSV 개수: 8
자동 생성된 라벨 맵: {'dielectric': 0, 'metal': 1, 'skin': 2}
불러온 파일 예시: ['G:\\다른 컴퓨터\\KHU PC\\HI Lab\\0. Projects\\0. On going\\4. Secret Noise Analyzing project\\1. 실험자료\\train1\\skin\\skin_2.csv', 'G:\\다른 컴퓨터\\KHU PC\\HI Lab\\0. Projects\\0. On going\\4. Secret Noise Analyzing project\\1. 실험자료\\train1\\skin\\skin_1.csv', 'G:\\다른 컴퓨터\\KHU PC\\HI Lab\\0. Projects\\0. On going\\4. Secret Noise Analyzing project\\1. 실험자료\\train1\\dielectric\\dielectric_1.csv']
파일: G:\다른 컴퓨터\KHU PC\HI Lab\0. Projects\0. On going\4. Secret Noise Analyzing project\1. 실험자료\train1\skin\skin_2.csv
폴더명: skin
라벨맵 키들: dict_keys(['dielectric', 'metal', 'skin'])
파일: G:\다른 컴퓨터\KHU PC\HI Lab\0. Projects\0. On going\4. Secret Noise Analyzing project\1. 실험자료\train1\skin\skin_1.csv
폴더명: skin
라벨맵 키들: dict_keys(['dielectric', 'metal', 'skin'])
파일: G:\다른 컴퓨터\KHU PC\HI Lab\0. Projects\0. On going\4. Secret Noise Analyzing project\1. 실험자료\train1\dielectric\dielectric_1.csv
폴더명: dielectric
라벨맵 키들: dict_keys(['dielectri

  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 95ms/step - accuracy: 0.4673 - loss: 1.0813 - val_accuracy: 0.5118 - val_loss: 1.0728
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 85ms/step - accuracy: 0.4970 - loss: 1.0581 - val_accuracy: 0.5118 - val_loss: 1.0444
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step - accuracy: 0.4970 - loss: 1.0451 - val_accuracy: 0.5118 - val_loss: 1.0305
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 85ms/step - accuracy: 0.4970 - loss: 1.0391 - val_accuracy: 0.5118 - val_loss: 1.0287
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 85ms/step - accuracy: 0.4970 - loss: 1.0414 - val_accuracy: 0.5118 - val_loss: 1.0267
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.4970 - loss: 1.0374 - val_accuracy: 0.5118 - val_loss: 1.0248
Epoch 7/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━











[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
이 CSV는: dielectric
