In [4]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import regularizers
from imblearn.over_sampling import SMOTE
from scipy.interpolate import interp1d
from scipy.spatial.transform import Rotation

# 데이터 로드
data = pd.read_csv('./원본 데이터/transformed_train.csv')

# 라벨 인코딩
label_encoder = LabelEncoder()
data['activity'] = label_encoder.fit_transform(data['activity'])

# 데이터 정규화
mean = np.mean(data[['x-accl', 'y-accl', 'z-accl']], axis=0)
std = np.std(data[['x-accl', 'y-accl', 'z-accl']], axis=0)
data[['x-accl', 'y-accl', 'z-accl']] = (data[['x-accl', 'y-accl', 'z-accl']] - mean) / std

# 데이터 분할 (7:3 비율로 train/test 분할)
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42, stratify=data['activity'])

# 시계열 데이터 증강 함수들
def time_warp(x, sigma=0.2, knot=4):
    orig_steps = np.arange(x.shape[0])
    random_warps = np.random.normal(loc=1.0, scale=sigma, size=(knot+2,))
    warp_steps = np.zeros(knot+2)
    warp_steps[0] = 0
    warp_steps[-1] = x.shape[0] - 1
    warp_steps[1:-1] = np.sort(np.random.randint(1, x.shape[0]-1, size=(knot,)))
    warper = interp1d(warp_steps, random_warps, bounds_error=False, fill_value='extrapolate')
    warped_steps = warper(orig_steps)
    return np.interp(orig_steps, warped_steps, x.T).T

def magnitude_warp(x, sigma=0.2, knot=4):
    orig_steps = np.arange(x.shape[0])
    random_warps = np.random.normal(loc=1.0, scale=sigma, size=(knot+2,))
    warp_steps = np.zeros(knot+2)
    warp_steps[0] = 0
    warp_steps[-1] = x.shape[0] - 1
    warp_steps[1:-1] = np.sort(np.random.randint(1, x.shape[0]-1, size=(knot,)))
    warper = interp1d(warp_steps, random_warps, bounds_error=False, fill_value='extrapolate')
    warped_steps = warper(orig_steps)
    return x * warped_steps[:, np.newaxis]

def window_slice(x, reduce_ratio=0.9):
    target_length = int(reduce_ratio * x.shape[0])
    if target_length >= x.shape[0]:
        return x
    starts = np.random.randint(low=0, high=x.shape[0] - target_length + 1)
    return x[starts:starts + target_length]

def jitter(x, sigma=0.05):
    return x + np.random.normal(loc=0., scale=sigma, size=x.shape)

def scaling(x, sigma=0.1):
    factor = np.random.normal(loc=1., scale=sigma, size=(x.shape[-1],))
    return x * factor

def rotation(x):
    axis = np.random.uniform(low=-1, high=1, size=3)
    angle = np.random.uniform(low=-np.pi, high=np.pi)
    rot = Rotation.from_rotvec(angle * axis)
    return rot.apply(x)


def augment_data(x, y):
    augmented_x = []
    augmented_y = []
    for i in range(len(x)):
        sample = x[i].reshape(-1, 3)  # reshape to (n_features/3, 3)
        label = y[i]
        
        augmented_x.append(sample.flatten())
        augmented_y.append(label)

        # Time warping
        warped = time_warp(sample)
        augmented_x.append(warped.flatten())
        augmented_y.append(label)

        # Magnitude warping
        warped = magnitude_warp(sample)
        augmented_x.append(warped.flatten())
        augmented_y.append(label)

        # Window slicing
        sliced = window_slice(sample)
        if len(sliced) > 0:
            augmented_x.append(sliced.flatten())
            augmented_y.append(label)

        # Jittering
        jittered = jitter(sample)
        augmented_x.append(jittered.flatten())
        augmented_y.append(label)

        # Scaling
        scaled = scaling(sample)
        augmented_x.append(scaled.flatten())
        augmented_y.append(label)

        # Rotation
        rotated = rotation(sample)
        augmented_x.append(rotated.flatten())
        augmented_y.append(label)

    return np.array(augmented_x), np.array(augmented_y)


# 데이터 증강 적용
x_train = train_data.iloc[:, 2:].values
y_train = train_data['activity'].values
x_train = x_train.reshape(x_train.shape[0], -1)  # reshape to 2D
x_train_augmented, y_train_augmented = augment_data(x_train, y_train)

# SMOTE 적용 (데이터 불균형 해결)
smote = SMOTE(random_state=42)
x_train_resampled, y_train_resampled = smote.fit_resample(x_train_augmented, y_train_augmented)

# 데이터 세그먼트화 및 라벨링 함수 (-1, TIME_PERIODS, 3)
TIME_PERIODS = 80
STEP_DISTANCE = 40

def data_segments(data, labels):
    segments = []
    segment_labels = []
    for i in range(0, len(data) - TIME_PERIODS, STEP_DISTANCE):
        X = data.iloc[i:i + TIME_PERIODS].values
        segment_labels.append(labels.iloc[i])
        segments.append(X)

    return np.array(segments), np.array(segment_labels)

x_train, y_train = data_segments(pd.DataFrame(train_data_resampled), pd.Series(train_labels_resampled))
x_test, y_test = data_segments(test_data.iloc[:, 2:], test_data['activity'])

# 라벨 원-핫 인코딩
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

# 1D CNN 모델 생성
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(TIME_PERIODS, 3)),
    tf.keras.layers.Conv1D(filters=50, kernel_size=11, activation='relu', kernel_regularizer=regularizers.l2(0.05)),
    tf.keras.layers.MaxPool1D(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv1D(filters=5, kernel_size=5, activation='relu', kernel_regularizer=regularizers.l2(0.05)),
    tf.keras.layers.MaxPool1D(),
    tf.keras.layers.Dropout(rate=0.7),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=5, activation='softmax', kernel_regularizer=regularizers.l2(0.05))
])

# 모델 컴파일 및 학습
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=10000,
    decay_rate=0.9
)
opt = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
ret = model.fit(x_train, y_train, epochs=100, batch_size=400, validation_data=(x_test, y_test), verbose=2)

# 모델 평가 및 결과 출력
train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)

print(f'Train Accuracy: {train_acc}')
print(f'Test Accuracy: {test_acc}')

# 혼동 행렬 및 분류 보고서 출력
y_pred = np.argmax(model.predict(x_test), axis=-1)
y_true = np.argmax(y_test, axis=-1)

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))

# 학습 곡선 시각화
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.title("Accuracy")
plt.plot(ret.history['accuracy'], "b-", label="train accuracy")
plt.plot(ret.history['val_accuracy'], "r-", label="val accuracy")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc="best")

plt.subplot(1, 2, 2)
plt.title("Loss")
plt.plot(ret.history['loss'], "b-", label="train loss")
plt.plot(ret.history['val_loss'], "r-", label="val loss")
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(loc="best")

plt.tight_layout()
plt.show()


ValueError: low >= high