# **1. 필요한 라이브러리 불러오기**

In [1]:
import pandas as pd
import numpy as np
from scipy.fft import fft
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# **2. 데이터 전처리 및 특징공학**

In [2]:
# 모든 열을 숫자형으로 변환하는 함수
def convert_to_numeric(df):
    df = df.apply(pd.to_numeric, errors='coerce')  # 숫자형으로 변환 불가한 데이터는 NaN으로 대체
    df = df.dropna(axis=1)  # NaN이 포함된 열은 제거
    return df

# 데이터 로드 및 전처리
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    features = data[['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']]
    labels = data['classe']

    # 이동 평균 및 이동 표준 편차 추가
    def add_moving_features(data, window_size=5):
        for column in ['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']:
            data[f'{column}_ma'] = data[column].rolling(window=window_size).mean().fillna(0)
            data[f'{column}_std'] = data[column].rolling(window=window_size).std().fillna(0)
        return data

    # 파생 특징 추가
    def add_derivative_features(data):
        for column in ['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']:
            data[f'{column}_deriv'] = data[column].diff().fillna(0)
        return data

    # 에너지 특징 추가
    def add_energy_features(data):
        for column in ['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']:
            data[f'{column}_energy'] = data[column]**2
        return data

    # 피크 및 크레스트 요소 추가
    def add_peak_features(data):
        for column in ['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']:
            data[f'{column}_peak'] = data[column].rolling(window=5).max().fillna(0)
            data[f'{column}_crest'] = data[column].max() / (data[column].std() + 1e-10)
        return data

    # 방향성 및 각도 변화 추가
    def add_angular_features(data):
        data['xy_angle'] = np.arctan2(data['gyros_forearm_y'], data['gyros_forearm_x'])
        data['xz_angle'] = np.arctan2(data['gyros_forearm_z'], data['gyros_forearm_x'])
        data['yz_angle'] = np.arctan2(data['gyros_forearm_z'], data['gyros_forearm_y'])
        return data

    # 통계적 특징 추가
    def add_statistical_features(data):
        for column in ['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']:
            data[f'{column}_mean'] = data[column].mean()
            data[f'{column}_std'] = data[column].std()
            data[f'{column}_median'] = data[column].median()
            data[f'{column}_skew'] = data[column].skew()
            data[f'{column}_kurtosis'] = data[column].kurtosis()
        return data

    # 이상치 탐지 추가
    def add_outlier_features(data):
        from scipy.stats import zscore
        for column in ['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']:
            data[f'{column}_outlier'] = np.abs(zscore(data[column])) > 3
        return data

    # 푸리에 변환 적용
    def fourier_transform_features(data, columns):
        from scipy.fft import fft
        for column in columns:
            fft_values = fft(data[column].values)  # 데이터를 numpy 배열로 변환
            amplitude_spectrum = np.abs(fft_values)
            data[f'{column}_fft_mean'] = np.mean(amplitude_spectrum)
            data[f'{column}_fft_max'] = np.max(amplitude_spectrum)
            data[f'{column}_fft_min'] = np.min(amplitude_spectrum)
        return data

    # 모든 특징 공학 기법 적용
    features = add_moving_features(features)
    features = add_derivative_features(features)
    features = add_energy_features(features)
    features = add_peak_features(features)
    features = add_angular_features(features)
    features = add_statistical_features(features)
    features = add_outlier_features(features)
    features = fourier_transform_features(features, ['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z'])

    # 숫자형으로 변환
    features = convert_to_numeric(features)

    # 클래스 레이블 인코딩
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)

    return features, labels

# 데이터 로드
training_data_path = './원본 데이터/pml-training.csv'
features, labels = load_and_preprocess_data(training_data_path)

# Train/Test 분할
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# 데이터 유형 확인 및 변환
x_train = x_train.values.astype(np.float32)
x_test = x_test.values.astype(np.float32)

# PyTorch TensorDataset 생성
train_dataset = TensorDataset(torch.tensor(x_train).float(), torch.tensor(y_train).long())
test_dataset = TensorDataset(torch.tensor(x_test).float(), torch.tensor(y_test).long())

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


  data = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'{column}_ma'] = data[column].rolling(window=window_size).mean().fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'{column}_std'] = data[column].rolling(window=window_size).std().fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[f'{col

# **3. 데이터셋 준비 및 모델 정의**

In [3]:
# CNN-LSTM 모델 정의
class CNN_LSTM(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=64, kernel_size=1, stride=1)  # 커널 크기 1로 설정
        self.bn1 = nn.BatchNorm1d(64)
        self.pool = nn.AdaptiveMaxPool1d(10)  # 입력 크기에 따라 동적으로 풀링
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=1, stride=1)
        self.bn2 = nn.BatchNorm1d(128)
        self.lstm1 = nn.LSTM(input_size=128, hidden_size=64, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=64, hidden_size=32, num_layers=1, batch_first=True)
        self.fc = nn.Linear(32, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.pool(x)
        x, _ = self.lstm1(x.transpose(1, 2))  # LSTM expects input as (batch, seq_len, input_size)
        x, _ = self.lstm2(x)
        x = x[:, -1, :]  # Get the output of the last time step
        x = self.fc(x)
        return x

# 모델 초기화
input_size = 48  # 실제 feature 수로 설정
num_classes = len(set(y_train))  # 클래스 수로 설정
model = CNN_LSTM(input_size=input_size, num_classes=num_classes)

# **4. 모델 학습 및 평가**

In [4]:
# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 루프
epochs = 10
for epoch in range(epochs):
    model.train()
    train_loss, correct_train = 0, 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        inputs = inputs.unsqueeze(1)  # (batch_size, 1, features)
        inputs = inputs.transpose(1, 2)  # (batch_size, features, 1) -> (batch_size, 1, features)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()

    train_loss /= len(train_loader.dataset)
    train_accuracy = correct_train / len(train_loader.dataset)

    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')

    model.eval()
    test_loss, correct_test = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.unsqueeze(1)  # (batch_size, 1, features)
            inputs = inputs.transpose(1, 2)  # (batch_size, features, 1) -> (batch_size, 1, features)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_test += (predicted == labels).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = correct_test / len(test_loader.dataset)

    print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 1/10, Train Loss: 1.5110, Train Accuracy: 0.3391
Epoch 1/10, Test Loss: 1.6188, Test Accuracy: 0.2833
Epoch 2/10, Train Loss: 1.4646, Train Accuracy: 0.3732
Epoch 2/10, Test Loss: 1.6733, Test Accuracy: 0.2833
Epoch 3/10, Train Loss: 1.4427, Train Accuracy: 0.3863
Epoch 3/10, Test Loss: 1.6176, Test Accuracy: 0.2245
Epoch 4/10, Train Loss: 1.4266, Train Accuracy: 0.3954
Epoch 4/10, Test Loss: 1.7273, Test Accuracy: 0.1781
Epoch 5/10, Train Loss: 1.4160, Train Accuracy: 0.3994
Epoch 5/10, Test Loss: 1.7770, Test Accuracy: 0.1952
Epoch 6/10, Train Loss: 1.4050, Train Accuracy: 0.4019
Epoch 6/10, Test Loss: 2.2768, Test Accuracy: 0.1761
Epoch 7/10, Train Loss: 1.3972, Train Accuracy: 0.4075
Epoch 7/10, Test Loss: 1.7377, Test Accuracy: 0.2189
Epoch 8/10, Train Loss: 1.3843, Train Accuracy: 0.4144
Epoch 8/10, Test Loss: 1.6495, Test Accuracy: 0.2810
Epoch 9/10, Train Loss: 1.3781, Train Accuracy: 0.4158
Epoch 9/10, Test Loss: 1.9944, Test Accuracy: 0.2833
Epoch 10/10, Train Loss: 1.3