In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

In [2]:
# 데이터 전처리 및 특징 추출 코드
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)

    # 필요한 특징 선택
    features = data[['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']]
    labels = data['classe']

    # 가속도 계산 (각속도의 변화율)
    features['gyros_forearm_x_acc'] = np.gradient(features['gyros_forearm_x'])
    features['gyros_forearm_y_acc'] = np.gradient(features['gyros_forearm_y'])
    features['gyros_forearm_z_acc'] = np.gradient(features['gyros_forearm_z'])

    # 자이로스코프 신호의 1차 및 2차 미분
    features['gyros_forearm_x_diff'] = np.diff(features['gyros_forearm_x'], prepend=0)
    features['gyros_forearm_y_diff'] = np.diff(features['gyros_forearm_y'], prepend=0)
    features['gyros_forearm_z_diff'] = np.diff(features['gyros_forearm_z'], prepend=0)

    # 신호 에너지 계산 (RMS)
    features['gyros_forearm_x_energy'] = np.sqrt(np.mean(features['gyros_forearm_x']**2))
    features['gyros_forearm_y_energy'] = np.sqrt(np.mean(features['gyros_forearm_y']**2))
    features['gyros_forearm_z_energy'] = np.sqrt(np.mean(features['gyros_forearm_z']**2))

    # 이동 평균 및 표준 편차
    window_size = 5
    features['gyros_forearm_x_ma'] = features['gyros_forearm_x'].rolling(window=window_size).mean()
    features['gyros_forearm_y_ma'] = features['gyros_forearm_y'].rolling(window=window_size).mean()
    features['gyros_forearm_z_ma'] = features['gyros_forearm_z'].rolling(window=window_size).mean()

    features['gyros_forearm_x_std'] = features['gyros_forearm_x'].rolling(window=window_size).std()
    features['gyros_forearm_y_std'] = features['gyros_forearm_y'].rolling(window=window_size).std()
    features['gyros_forearm_z_std'] = features['gyros_forearm_z'].rolling(window=window_size).std()

    # 켤측값 및 inf 값 처리
    features = features.replace([np.inf, -np.inf], np.nan) # inf 값을 NaN으로 변환
    features = features.fillna(0) # NaN 값을 0으로 대체

    # 클래스 레이블 인코딩
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)

    return features, labels

# 데이터 로드
training_data_path = './원본 데이터/pml-training.csv'
features, labels = load_and_preprocess_data(training_data_path)

# Train/Test 분할
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# 표준화
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# PyTorch TensorDatset 생성
train_dataset = TensorDataset(torch.tensor(x_train).float(), torch.tensor(y_train).long())
test_dataset = TensorDataset(torch.tensor(x_test).float(), torch.tensor(y_test).long())

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

  data = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['gyros_forearm_x_acc'] = np.gradient(features['gyros_forearm_x'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['gyros_forearm_y_acc'] = np.gradient(features['gyros_forearm_y'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['gyros_forearm

In [3]:
class CNN_LSTM(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=64, kernel_size=1, stride=1)
        self.bn1 = nn.BatchNorm1d(64)
        # MaxPool1d 제거
        # self.pool = nn.MaxPool1d(kernel_size=2, stride=1)  # 이 레이어를 제거하거나 적절히 조정합니다.
        
        self.lstm1 = nn.LSTM(input_size=64, hidden_size=128, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=128, hidden_size=128, num_layers=1, batch_first=True)
        self.dropout = nn.Dropout(0.5)
        
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        # x = self.pool(x)  # MaxPool1d를 사용하지 않거나 대체 방법을 고려합니다.
        
        x = x.permute(0, 2, 1)  # (batch_size, sequence_length, features) 형태로 변환
        
        x, _ = self.lstm1(x)
        x = self.dropout(x)
        x, _ = self.lstm2(x)
        
        x = x[:, -1, :]  # 마지막 LSTM의 출력을 사용
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [4]:
# 모델 초기화
input_channels = x_train.shape[1]  # 실제 입력 데이터의 채널 수를 반영
model = CNN_LSTM(input_channels=input_channels, num_classes=6)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [5]:
# 학습 루프
epochs = 500
for epoch in range(epochs):
    model.train()
    train_loss, correct_train = 0, 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        
        # 입력 데이터의 차원을 확인하여 필요한 경우 3D로 변환
        if inputs.dim() == 2:  # inputs의 차원이 (batch_size, features)인 경우
            inputs = inputs.unsqueeze(1)  # (batch_size, 1, features)

        inputs = inputs.transpose(1, 2)  # (batch_size, 1, features) -> (batch_size, features, 1)
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()

    train_loss /= len(train_loader.dataset)
    train_accuracy = correct_train / len(train_loader.dataset)

    # 검증 루프
    model.eval()
    test_loss, correct_test = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            if inputs.dim() == 2:
                inputs = inputs.unsqueeze(1)
                
            inputs = inputs.transpose(1, 2)  # (batch_size, 1, features) -> (batch_size, features, 1)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_test += (predicted == labels).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = correct_test / len(test_loader.dataset)

    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
          f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

Epoch 1/100, Train Loss: 1.5397, Train Accuracy: 0.3168, Test Loss: 1.4887, Test Accuracy: 0.3465
Epoch 2/100, Train Loss: 1.4761, Train Accuracy: 0.3620, Test Loss: 1.5168, Test Accuracy: 0.3399
Epoch 3/100, Train Loss: 1.4540, Train Accuracy: 0.3757, Test Loss: 1.4387, Test Accuracy: 0.3865
Epoch 4/100, Train Loss: 1.4430, Train Accuracy: 0.3860, Test Loss: 1.4164, Test Accuracy: 0.3964
Epoch 5/100, Train Loss: 1.4320, Train Accuracy: 0.3896, Test Loss: 1.4403, Test Accuracy: 0.3931
Epoch 6/100, Train Loss: 1.4236, Train Accuracy: 0.3959, Test Loss: 1.4016, Test Accuracy: 0.3982
Epoch 7/100, Train Loss: 1.4200, Train Accuracy: 0.3951, Test Loss: 1.4003, Test Accuracy: 0.4059
Epoch 8/100, Train Loss: 1.4121, Train Accuracy: 0.4010, Test Loss: 1.5050, Test Accuracy: 0.3669
Epoch 9/100, Train Loss: 1.4032, Train Accuracy: 0.4063, Test Loss: 1.3864, Test Accuracy: 0.4125
Epoch 10/100, Train Loss: 1.3979, Train Accuracy: 0.4104, Test Loss: 1.3658, Test Accuracy: 0.4257
Epoch 11/100, Train