In [17]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# 1. 자이로 데이터 로드 및 전처리 (첫 행 제외)
gyro_data = pd.read_csv('./원본 데이터/자이로 데이터.csv', header=0, low_memory=False)

# RegisterDate 열 전처리
gyro_data['RegisterDate'] = pd.to_datetime(gyro_data['RegisterDate'], errors='coerce')

# NaT 값을 가진 행을 제거
gyro_data = gyro_data.dropna(subset=['RegisterDate'])

# 시간 차이 계산 및 시간 기반 피처 생성
gyro_data['time_diff'] = gyro_data['RegisterDate'].diff().dt.total_seconds().fillna(0)
gyro_data['hour'] = gyro_data['RegisterDate'].dt.hour
gyro_data['minute'] = gyro_data['RegisterDate'].dt.minute

# 2. 자이로스코프 3축 데이터 및 시간 기반 피처 결합
gyro_features = gyro_data.iloc[:, [2, 3, 4]].values  # X, Y, Z 축
time_features = gyro_data[['time_diff', 'hour', 'minute']].values  # 시간 기반 피처

# 데이터 스케일링
scaler = StandardScaler()
gyro_features_scaled = scaler.fit_transform(gyro_features)
time_features_scaled = scaler.fit_transform(time_features)

# 자이로스코프 데이터와 시간 기반 피처 결합
gyro_features_combined = np.hstack((gyro_features_scaled, time_features_scaled))

# 3. 시퀀스 생성
sequence_length = 128

def create_sequences(data, sequence_length):
    sequences = []
    for i in range(0, len(data) - sequence_length + 1, sequence_length):
        seq = data[i:i + sequence_length]
        if len(seq) == sequence_length:
            sequences.append(seq)
    return np.array(sequences)

gyro_sequences = create_sequences(gyro_features_combined, sequence_length)

# 텐서로 변환
gyro_tensor = torch.tensor(gyro_sequences, dtype=torch.float32)

# 데이터셋 분할
X_train, X_val = train_test_split(gyro_tensor, test_size=0.2, random_state=42)

# 4. CNN-LSTM 모델 정의
class CNNLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout_rate):
        super(CNNLSTMModel, self).__init__()
        self.conv1 = nn.Conv1d(input_size, 64, kernel_size=5, padding=2)
        self.lstm = nn.LSTM(64, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Conv1d를 위해 차원 변경
        x = F.relu(self.conv1(x))
        x = x.permute(0, 2, 1)  # LSTM을 위해 차원 복구
        out, _ = self.lstm(x)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# 5. 하이퍼파라미터 설정 및 모델 생성
input_size = 6  # 자이로스코프 3축 + 시간 기반 피처 3개 = 6
hidden_size = 64
output_size = 6  # UCI-HAR 데이터의 행동 클래스 수
num_layers = 1
dropout_rate = 0.3
learning_rate = 0.001
batch_size = 32
num_epochs = 100

model = CNNLSTMModel(input_size, hidden_size, output_size, num_layers, dropout_rate)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# 6. 데이터 로더 생성
train_dataset = TensorDataset(X_train, torch.zeros(len(X_train), dtype=torch.long))  # dummy labels for now
val_dataset = TensorDataset(X_val, torch.zeros(len(X_val), dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# 7. 모델 학습 및 평가 함수
def train_model(model, train_loader, val_loader, epochs):
    best_val_loss = float('inf')
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    
    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        correct_train = 0
        total_train = 0
        
        for X_batch, _ in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, torch.zeros(outputs.size(0), dtype=torch.long))  # dummy labels
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            correct_train += (predicted == 0).sum().item()  # dummy labels = 0
            total_train += predicted.size(0)

        train_loss = total_train_loss / len(train_loader)
        train_acc = correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)

        model.eval()
        total_val_loss = 0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for X_batch, _ in val_loader:
                val_outputs = model(X_batch)
                val_loss = criterion(val_outputs, torch.zeros(val_outputs.size(0), dtype=torch.long))
                total_val_loss += val_loss.item()

                _, predicted_val = torch.max(val_outputs.data, 1)
                correct_val += (predicted_val == 0).sum().item()  # dummy labels = 0
                total_val += predicted_val.size(0)

        val_loss = total_val_loss / len(val_loader)
        val_acc = correct_val / total_val
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

        # Early stopping 적용
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_model_with_time.pth")

# 8. 모델 학습 시작
train_model(model, train_loader, val_loader, num_epochs)

# 9. 모델 예측 수행 및 시각화
model.load_state_dict(torch.load("best_model_with_time.pth"))
model.eval()

def predict_in_batches(model, data_tensor, batch_size):
    model.eval()
    predictions = []
    with torch.no_grad():
        for i in range(0, data_tensor.size(0), batch_size):
            batch_data = data_tensor[i:i + batch_size]
            batch_predictions = model(batch_data)
            predicted_classes = torch.argmax(batch_predictions, dim=1)
            predictions.append(predicted_classes)
    return torch.cat(predictions)

# 배치 단위 예측 수행
predicted_classes = predict_in_batches(model, gyro_tensor, batch_size)

# 예측된 라벨을 원본 데이터 크기에 맞춰 확장
predicted_labels_expanded = np.repeat(predicted_classes.cpu().numpy(), sequence_length)
predicted_labels_expanded = predicted_labels_expanded[:len(gyro_data)]

# 10. 원본 데이터에 예측된 라벨 추가 및 시각화
gyro_data['predicted_label'] = predicted_labels_expanded

# 시각화
plt.figure(figsize=(10, 6))
plt.scatter(range(len(gyro_data)), gyro_data['predicted_label'], s=1, c='orange', label='Predicted Labels')
plt.xlabel('Index')
plt.ylabel('Predicted Labels')
plt.title('Predicted Behavior Labels')
plt.legend()
plt.show()

# 11. 최종 데이터 저장
gyro_data.to_csv('./원본 데이터/자이로 데이터_with_final_labels.csv', index=False)

# 12. 정확도 계산
accuracy = accuracy_score(gyro_data['cluster_label'], gyro_data['predicted_label'])
print(f"Accuracy: {accuracy * 100:.2f}%")

Epoch [1/100], Train Loss: 0.0242, Val Loss: 0.0000, Train Acc: 0.9981, Val Acc: 1.0000


KeyboardInterrupt: 