In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

In [2]:
# 데이터 로드 및 전처리 함수
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)

    # 필요한 특징 선택
    features = data[['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']]
    labels = data['classe']

    # 기본 통계량 특징 추가 (각 행별로 계산)
    features['gyros_forearm_x_mean'] = features['gyros_forearm_x'].mean(axis=0)
    features['gyros_forearm_y_mean'] = features['gyros_forearm_y'].mean(axis=0)
    features['gyros_forearm_z_mean'] = features['gyros_forearm_z'].mean(axis=0)
    
    features['gyros_forearm_x_std'] = features['gyros_forearm_x'].std(axis=0)
    features['gyros_forearm_y_std'] = features['gyros_forearm_y'].std(axis=0)
    features['gyros_forearm_z_std'] = features['gyros_forearm_z'].std(axis=0)
    
    features['gyros_forearm_x_max'] = features['gyros_forearm_x'].max(axis=0)
    features['gyros_forearm_y_max'] = features['gyros_forearm_y'].max(axis=0)
    features['gyros_forearm_z_max'] = features['gyros_forearm_z'].max(axis=0)
    
    features['gyros_forearm_x_min'] = features['gyros_forearm_x'].min(axis=0)
    features['gyros_forearm_y_min'] = features['gyros_forearm_y'].min(axis=0)
    features['gyros_forearm_z_min'] = features['gyros_forearm_z'].min(axis=0)

    # 이동 평균 추가
    window_size = 5
    features['gyros_forearm_x_ma'] = features['gyros_forearm_x'].rolling(window=window_size).mean().fillna(0)
    features['gyros_forearm_y_ma'] = features['gyros_forearm_y'].rolling(window=window_size).mean().fillna(0)
    features['gyros_forearm_z_ma'] = features['gyros_forearm_z'].rolling(window=window_size).mean().fillna(0)

    # 변화율 특징 추가
    features['gyros_forearm_x_roc'] = features['gyros_forearm_x'].pct_change().fillna(0)
    features['gyros_forearm_y_roc'] = features['gyros_forearm_y'].pct_change().fillna(0)
    features['gyros_forearm_z_roc'] = features['gyros_forearm_z'].pct_change().fillna(0)

    # 결측값 및 inf 값 처리
    features = features.replace([np.inf, -np.inf], np.nan)
    features = features.fillna(0)

    # 클래스 레이블 인코딩
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)

    return features, labels

# 데이터 로드
training_data_path = './원본 데이터/pml-training.csv'
features, labels = load_and_preprocess_data(training_data_path)

  data = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['gyros_forearm_x_mean'] = features['gyros_forearm_x'].mean(axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['gyros_forearm_y_mean'] = features['gyros_forearm_y'].mean(axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['gyros_forea

In [3]:
# Train/Test 분할
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# 표준화
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# PyTorch TensorDataset 생성
train_dataset = TensorDataset(torch.tensor(x_train).float(), torch.tensor(y_train).long())
test_dataset = TensorDataset(torch.tensor(x_test).float(), torch.tensor(y_test).long())

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [4]:
class CNN_LSTM(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=64, kernel_size=1, stride=1)
        self.bn1 = nn.BatchNorm1d(64)
        self.pool = nn.MaxPool1d(kernel_size=1, stride=1)
        
        self.lstm1 = nn.LSTM(input_size=64, hidden_size=128, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=128, hidden_size=128, num_layers=1, batch_first=True)
        self.dropout = nn.Dropout(0.5)
        
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.pool(x)
        
        x, _ = self.lstm1(x)
        x = self.dropout(x)
        x, _ = self.lstm2(x)
        
        x = x[:, -1, :]  # Use the output from the last LSTM
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [5]:
# 모델 초기화
input_channels = 21  # 실제 입력 데이터의 채널 수를 21로 고정
model = CNN_LSTM(input_channels=input_channels, num_classes=6)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:
# 학습 루프
epochs = 50
for epoch in range(epochs):
    model.train()
    train_loss, correct_train = 0, 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        
        # 입력 데이터의 차원을 확인하여 필요한 경우 3D로 변환
        if inputs.dim() == 2:  # inputs의 차원이 (batch_size, features)인 경우
            inputs = inputs.unsqueeze(1)  # (batch_size, 1, features)

        inputs = inputs.transpose(1, 2)  # (batch_size, 1, features) -> (batch_size, features, 1)
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()

    train_loss /= len(train_loader.dataset)
    train_accuracy = correct_train / len(train_loader.dataset)

    # 검증 루프
    model.eval()
    test_loss, correct_test = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            if inputs.dim() == 2:
                inputs = inputs.unsqueeze(1)
                
            inputs = inputs.transpose(1, 2)  # (batch_size, features, 1) -> (batch_size, 1, features)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_test += (predicted == labels).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = correct_test / len(test_loader.dataset)

    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
          f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

RuntimeError: input.size(-1) must be equal to input_size. Expected 64, got 1