In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout, MaxPooling1D, Concatenate, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# 중력 가속도 (m/s²)
GRAVITY = 9.81  

# Inertial Signals 데이터 로드 함수
def load_inertial_data():
    """
    UCI HAR Inertial Signals 데이터를 로드하는 함수.
    
    출력:
        X_data (numpy array): 가속도 원본 데이터 (128, 3)
        feature_data (numpy array): 추가 피처 (128, 5)
        y_df (DataFrame): 활동 라벨
    """
    data_dir = "E:/dataset/HAR/UCI-HAR/train/Inertial Signals/"

    # Inertial Signals 가속도 데이터 로드
    acc_x = np.loadtxt(os.path.join(data_dir, "body_acc_x_train.txt"))
    acc_y = np.loadtxt(os.path.join(data_dir, "body_acc_y_train.txt"))
    acc_z = np.loadtxt(os.path.join(data_dir, "body_acc_z_train.txt"))

    # 활동 라벨 로드
    y_path = "E:/dataset/HAR/UCI-HAR/train/y_train.txt"
    y_df = pd.read_csv(y_path, delim_whitespace=True, header=None, names=["Activity"])

    # 추가 피처 계산
    Theta = np.degrees(np.arccos(acc_z / GRAVITY))
    Theta_XY = np.degrees(np.arctan2(acc_y, acc_x))
    Magnitude = np.sqrt(acc_x**2 + acc_y**2 + acc_z**2)
    Azimuth = np.degrees(np.arctan2(acc_y, acc_x))
    Elevation = np.degrees(np.arctan2(np.sqrt(acc_x**2 + acc_y**2), acc_z))
    
    feature_data = np.stack((Theta, Theta_XY, Magnitude, Azimuth, Elevation), axis=-1)
    X_data = np.stack((acc_x, acc_y, acc_z), axis=-1)  # (샘플, 128, 3) 형태
    
    return X_data, feature_data, y_df

# 데이터 로드
X_data, feature_data, y_df = load_inertial_data()

# 활동 라벨 매핑
activity_labels = {
    1: "걷기",
    2: "계단 오르기",
    3: "계단 내리기",
    4: "앉음",
    5: "서있음",
    6: "누움"
}
y_df["Activity"] = y_df["Activity"].map(activity_labels)

# 정적 행위만 필터링
static_activities = ["앉음", "서있음", "누움"]
filtered_indices = y_df["Activity"].isin(static_activities)
X_data = X_data[filtered_indices]
feature_data = feature_data[filtered_indices]
y_df = y_df[filtered_indices].reset_index(drop=True)

# 라벨 인코딩
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_df["Activity"])

# 데이터 정규화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_data.reshape(-1, X_data.shape[-1])).reshape(X_data.shape)
X_test_scaled = scaler.transform(X_data.reshape(-1, X_data.shape[-1])).reshape(X_data.shape)

# 시퀀스 데이터 변환 함수
def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        end_ix = i + n_steps
        if end_ix > len(sequences):
            break
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# 시퀀스 변환 적용
y_train_array = np.array(y_train)
train_set = np.c_[X_train, y_train_array]
y_test_array = np.array(y_test)
test_set = np.c_[X_test, y_test_array]

X_train_seq, y_train_seq = split_sequences(train_set, n_steps=5)
X_test_seq, y_test_seq = split_sequences(test_set, n_steps=5)

# 데이터 분할
X_train, X_test, F_train, F_test, y_train, y_test = train_test_split(X_train_scaled, feature_data, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# CNN 모델 정의 (두 개의 CNN 분기)
input_cnn = Input(shape=(X_train_seq.shape[1], X_train_seq.shape[2]))
c1 = Conv1D(filters=64, kernel_size=3, activation='relu')(input_cnn)
c1 = MaxPooling1D(pool_size=2)(c1)
c1 = Conv1D(filters=128, kernel_size=3, activation='relu')(c1)
c1 = MaxPooling1D(pool_size=2)(c1)
c1 = Flatten()(c1)

input_feature_cnn = Input(shape=(X_train_seq.shape[1], F_train.shape[1]))
c2 = Conv1D(filters=32, kernel_size=3, activation='relu')(input_feature_cnn)
c2 = MaxPooling1D(pool_size=2)(c2)
c2 = Conv1D(filters=64, kernel_size=3, activation='relu')(c2)
c2 = MaxPooling1D(pool_size=2)(c2)
c2 = Flatten()(c2)

combined = Concatenate()([c1, c2])
dense1 = Dense(128, activation='relu')(combined)
dropout = Dropout(0.5)(dense1)
output = Dense(3, activation='softmax')(dropout)

model = Model(inputs=[input_cnn, input_feature_cnn], outputs=output)

# 모델 컴파일
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit([X_train_seq, F_train], y_train_seq, epochs=20, batch_size=32, validation_data=([X_test_seq, F_test], y_test_seq))

# 모델 평가
loss, accuracy = model.evaluate([X_test_seq, F_test], y_test_seq)
print(f"CNN + Feature CNN 기반 정적 행위 인식 정확도: {accuracy:.2%}")


  y_df = pd.read_csv(y_path, delim_whitespace=True, header=None, names=["Activity"])


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 3 dimension(s) and the array at index 1 has 2 dimension(s)