# **1. 필요한 라이브러리 불러오기**

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# **2. 데이터 읽기 및 전처리**

In [2]:
def feature_engineering(df):
    for axis in ['x', 'y', 'z']:
        df[f'gyros_forearm_{axis}_mean'] = df[f'gyros_forearm_{axis}'].rolling(window=3).mean()
        df[f'gyros_forearm_{axis}_std'] = df[f'gyros_forearm_{axis}'].rolling(window=3).std()
        df[f'gyros_forearm_{axis}_max'] = df[f'gyros_forearm_{axis}'].rolling(window=3).max()
        df[f'gyros_forearm_{axis}_min'] = df[f'gyros_forearm_{axis}'].rolling(window=3).min()
        df[f'gyros_forearm_{axis}_cv'] = df[f'gyros_forearm_{axis}_std'] / df[f'gyros_forearm_{axis}_mean']
    return df

def calculate_rate_of_change(df):
    for axis in ['x', 'y', 'z']:
        df[f'gyros_forearm_{axis}_roc'] = df[f'gyros_forearm_{axis}'].diff().fillna(0)
    return df

def calculate_total_movement(df):
    df['total_movement'] = (df['gyros_forearm_x']**2 + df['gyros_forearm_y']**2 + df['gyros_forearm_z']**2)**0.5
    return df

def detect_outliers(df):
    from scipy.stats import zscore
    for axis in ['x', 'y', 'z']:
        df[f'gyros_forearm_{axis}_outlier'] = (zscore(df[f'gyros_forearm_{axis}']) > 3).astype(int)
    return df

# **3. 데이터 로드 및 전처리**

In [3]:
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    features = data[['gyros_forearm_x', 'gyros_forearm_y', 'gyros_forearm_z']]
    labels = data['classe']
    
    # 특징 공학 함수 호출
    features = feature_engineering(features)
    features = calculate_rate_of_change(features)
    features = calculate_total_movement(features)
    features = detect_outliers(features)

    # 모든 데이터가 float 타입이 되도록 변환
    features = features.astype(float)
    
    # 라벨을 숫자로 변환 (필요에 따라)
    labels = LabelBinarizer().fit_transform(labels)
    
    # 표준화
    features = (features - features.mean()) / features.std()
    
    return features, labels

# 데이터 로드
training_data_path = './원본 데이터/pml-training.csv'
features, labels = load_and_preprocess_data(training_data_path)

  data = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'gyros_forearm_{axis}_mean'] = df[f'gyros_forearm_{axis}'].rolling(window=3).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'gyros_forearm_{axis}_std'] = df[f'gyros_forearm_{axis}'].rolling(window=3).std()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 

# **4. 데이터 분할 및 타입 변환**

In [4]:
# 학습 및 테스트 데이터 분할
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# DataFrame을 NumPy 배열로 변환한 후 PyTorch Tensor로 변환
train_dataset = TensorDataset(torch.tensor(x_train.values).float(), torch.tensor(y_train).long())
test_dataset = TensorDataset(torch.tensor(x_test.values).float(), torch.tensor(y_test).long())

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# **5. PyTorch 모델 정의**

In [5]:
print(x_train.shape)

(15697, 25)


In [6]:
# CNN-LSTM 모델 정의
class CNN_LSTM(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=25, out_channels=64, kernel_size=2, stride=1, padding=1)  # padding 추가
        self.bn1 = nn.BatchNorm1d(64)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=1, padding=0)  # padding을 0으로 설정
        
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=2, stride=1, padding=1)  # padding 추가
        self.bn2 = nn.BatchNorm1d(64)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=1, padding=0)  # padding을 0으로 설정

        self.lstm1 = nn.LSTM(input_size=64, hidden_size=64, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=64, hidden_size=64, batch_first=True)
        
        self.fc1 = nn.Linear(64, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = x.permute(0, 2, 1)  # [batch_size, channels, length]로 변환
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.pool(x)

        x = x.permute(0, 2, 1)  # [batch_size, length, channels]로 변환
        x, _ = self.lstm1(x)
        x = self.dropout(x)
        x, _ = self.lstm2(x)
        
        x = self.fc1(x[:, -1, :])  # 마지막 타임스텝의 출력을 사용
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# 모델 인스턴스 생성
input_shape = x_train.shape[1]  # 입력 데이터의 특징 수를 input_size로 사용
num_classes = len(np.unique(y_train))  # 고유 클래스 수
model = CNN_LSTM(input_size=input_shape, num_classes=num_classes)

# **6. 모델 학습 및 평가**

In [7]:
for epoch in range(epochs):
    model.train()
    train_loss, correct_train = 0, 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        if inputs.dim() == 2:  # 입력 데이터 차원 확인 및 추가
            inputs = inputs.unsqueeze(1)
        
        outputs = model(inputs)
        
        # 만약 labels가 원-핫 인코딩된 경우, 아래 코드로 인덱스를 추출
        if labels.dim() > 1:
            labels = labels.argmax(dim=1)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()

    train_loss /= len(train_loader.dataset)
    train_accuracy = correct_train / len(train_loader.dataset)

    model.eval()
    test_loss, correct_test = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            if inputs.dim() == 2:  # 입력 데이터 차원 확인 및 추가
                inputs = inputs.unsqueeze(1)
            
            outputs = model(inputs)
            
            if labels.dim() > 1:
                labels = labels.argmax(dim=1)
            
            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_test += (predicted == labels).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = correct_test / len(test_loader.dataset)

    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
          f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')


RuntimeError: 0D or 1D target tensor expected, multi-target not supported