> UCI-HAR와 자이로 데이터 간의 차이를 줄이고, 자이로 데이터에 적합한 행동 라벨링을 추가

## 1. 라이브러리 및 데이터셋 로드

In [1]:
# 1. 라이브러리 및 데이터셋 로드
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from scipy.fft import fft
from scipy.signal import butter, filtfilt, resample
from scipy.stats import skew, entropy

# GPU 사용 여부 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# GPU 사용 여부 확인
if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not available.")

GPU is available: NVIDIA GeForce GTX 1650


### 1. 저역통과 필터 및 데이터 전처리 함수 ###

In [3]:
# 저역통과 필터 적용
def butter_lowpass(cutoff, fs, order=4):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def apply_lowpass_filter(data, cutoff, fs):
    b, a = butter_lowpass(cutoff, fs)
    return filtfilt(b, a, data, axis=0)

# 데이터 정규화 및 차원 맞춤 함수
def normalize_and_reshape_segments(segments):
    scaler = MinMaxScaler()
    normalized_segments = np.zeros_like(segments)
    for axis in range(segments.shape[2]):
        normalized_segments[:, :, axis] = scaler.fit_transform(segments[:, :, axis])
    return normalized_segments

# 슬라이딩 윈도우 생성 함수
def create_sliding_windows(data, window_size=128, stride=64):
    num_windows = (len(data) - window_size) // stride + 1
    windows = np.array([data[i:i+window_size] for i in range(0, len(data) - window_size + 1, stride)])
    return windows  # (num_windows, window_size, 3) 형태로 반환

### 2. UCI-HAR 데이터 로드 및 자이로 데이터 전처리 ###

In [None]:
# 피처 엔지니어링 함수 정의
def calculate_features(df, axis):
    # RMS, Skewness, Entropy 계산 (여기에 실제 계산을 추가)
    df[f'rms_{axis}'] = np.sqrt(np.mean(df[axis]**2))
    df[f'skew_{axis}'] = df[axis].skew()
    df[f'entropy_{axis}'] = entropy(np.abs(df[axis]))
    return df

# FFT 및 STFT 계산 함수 수정
def calculate_fft_stft(df, axis, window_size=128):
    fft_values = []
    
    # 각 윈도우에 대해 FFT 계산
    for i in range(0, len(df), window_size):
        window_data = df[axis].values[i:i+window_size]
        if len(window_data) == window_size:
            fft_result = np.abs(np.fft.fft(window_data))[:10]  # 절대값 사용
            fft_values.append(fft_result)
    
    if len(fft_values) == 0:
        raise ValueError(f"FFT 계산을 위한 유효한 윈도우가 없습니다. 데이터가 충분한지 확인하세요.")
    
    fft_values = np.array(fft_values)
    
    # FFT 값의 평균을 계산하여 하나의 컬럼으로 추가
    df[f'fft_{axis}'] = np.mean(fft_values, axis=0)[0]  # 첫 번째 FFT 값만 사용
    
    return df


# 정규화 함수 수정
def normalize_data(df, columns_to_scale):
    scaler = MinMaxScaler()
    # 존재하는 컬럼만 정규화
    existing_columns = [col for col in columns_to_scale if col in df.columns]
    if existing_columns:
        df[existing_columns] = scaler.fit_transform(df[existing_columns])
    return df

# 공통 데이터 처리 함수 정의
def preprocess_data(X_data):
    gyro_df = pd.DataFrame(X_data.reshape(-1, 3), columns=['X', 'Y', 'Z'])
    
    # FFT 계산 (피처 엔지니어링 전에 수행)
    for axis in ['X', 'Y', 'Z']:
        gyro_df = calculate_fft_stft(gyro_df, axis)
    
    # 피처 엔지니어링 적용
    for axis in ['X', 'Y', 'Z']:
        gyro_df = calculate_features(gyro_df, axis)
    
    # 정규화할 컬럼
    columns_to_scale = ['fft_X', 'fft_Y', 'fft_Z', 'rms_X', 'rms_Y', 'rms_Z', 'skew_X', 'skew_Y', 'skew_Z', 'entropy_X', 'entropy_Y', 'entropy_Z']
    
    # 컬럼 존재 여부 확인 및 정규화
    gyro_df = normalize_data(gyro_df, columns_to_scale)
    
    # 최종 피처 세트 구성
    feature_columns = ['X', 'Y', 'Z', 'rms_X', 'rms_Y', 'rms_Z', 'skew_X', 'skew_Y', 'skew_Z', 'entropy_X', 'entropy_Y', 'entropy_Z', 'fft_X', 'fft_Y', 'fft_Z']
    X_features = gyro_df[feature_columns].values
    
    return X_features

# UCI-HAR 데이터 로드 함수
def load_ucihar_data():
    uci_har_path = './원본 데이터/UCI HAR Dataset/'
    gyro_x_train = pd.read_csv(uci_har_path + 'train/Inertial Signals/body_gyro_x_train.txt', sep='\s+', header=None).values
    gyro_y_train = pd.read_csv(uci_har_path + 'train/Inertial Signals/body_gyro_y_train.txt', sep='\s+', header=None).values
    gyro_z_train = pd.read_csv(uci_har_path + 'train/Inertial Signals/body_gyro_z_train.txt', sep='\s+', header=None).values
    labels_train = pd.read_csv(uci_har_path + 'train/y_train.txt', sep='\s+', header=None).values - 1
    
    # X 데이터를 결합하여 피처 엔지니어링 수행
    X_train = np.stack([gyro_x_train, gyro_y_train, gyro_z_train], axis=-1)
    X_train_features = preprocess_data(X_train)
    
    return X_train_features, labels_train

# 자이로 데이터 전처리 함수
def preprocess_gyro_data(gyro_path):
    gyro_data = pd.read_csv(gyro_path)
    X_data = gyro_data[['X', 'Y', 'Z']].values
    X_train_features = preprocess_data(X_data)
    
    # 텐서로 변환
    gyro_data_tensor = torch.tensor(X_train_features, dtype=torch.float32)
    return gyro_data_tensor

# UCI-HAR 데이터 로드 및 전처리
X_train_features, y_train = load_ucihar_data()
print("UCI-HAR 데이터 로드 완료")
print(f"X_train_features 형태: {X_train_features.shape}")
print(f"y_train 형태: {y_train.shape}")

# 자이로 데이터 전처리
gyro_data_tensor = preprocess_gyro_data('./원본 데이터/자이로 데이터.csv')
print("자이로 데이터 전처리 완료")
print(f"gyro_data_tensor 형태: {gyro_data_tensor.shape}")

# 데이터를 Tensor로 변환
X_train_tensor = torch.tensor(X_train_features, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
print("데이터 Tensor 변환 완료")

UCI-HAR 데이터 로드 완료
X_train_features 형태: (941056, 15)
y_train 형태: (7352, 1)


### 3. 데이터 증강 ###

In [None]:
# 데이터 증강
def add_gaussian_noise(data, noise_factor=0.05):
    noise = torch.randn_like(data) * noise_factor
    return data + noise

def time_warp(data, sigma=0.2, knot=4):
    if len(data.shape) == 2:
        data = data.unsqueeze(1)  # (샘플 수, 1, 특성) 형태로 변경
    
    orig_steps = torch.arange(data.shape[1], dtype=torch.float32)
    random_warps = torch.normal(mean=1.0, std=sigma, size=(data.shape[0], knot+2, data.shape[2]))
    warp_steps = (torch.ones((data.shape[2],1))*(torch.linspace(0, data.shape[1]-1., knot+2))).t()
    ret = torch.zeros_like(data)
    for i, pat in enumerate(data):
        time_warp = torch.interp(orig_steps, warp_steps[:, 0], random_warps[i, :, 0])
        scale = (data.shape[1]-1)/torch.sum(time_warp)
        ret[i] = torch.interp(orig_steps, torch.cumsum(time_warp, 0)*scale, pat)
    return ret.squeeze(1) if ret.shape[1] == 1 else ret

# X_train_tensor 형태 확인 및 출력
print(f"X_train_tensor 원본 형태: {X_train_tensor.shape}")

# 데이터 증강 후 train_test_split 실행
# 학습/검증 데이터 분리
try:
    # 텐서를 NumPy 배열로 변환
    X_train_augmented_np = X_train_augmented.cpu().numpy()
    y_train_augmented_np = y_train_augmented.cpu().numpy()

    # train_test_split 사용
    X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(
        X_train_augmented_np,
        y_train_augmented_np,
        test_size=0.2,
        random_state=42,
        stratify=y_train_augmented_np
    )

    # 다시 텐서로 변환
    X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32).to(device)
    X_val_tensor = torch.tensor(X_val_np, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train_np, dtype=torch.long).to(device)
    y_val_tensor = torch.tensor(y_val_np, dtype=torch.long).to(device)

    print("학습/검증 데이터 분리 완료")
    print(f"X_train_tensor 형태: {X_train_tensor.shape}")
    print(f"X_val_tensor 형태: {X_val_tensor.shape}")

    # 데이터셋 및 DataLoader 생성
    train_dataset = UCIHARData(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    val_dataset = UCIHARData(X_val_tensor, y_val_tensor)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    print("DataLoader 생성 완료")

except Exception as e:
    print(f"오류 발생: {e}")
    print(f"X_train_augmented 형태: {X_train_augmented.shape if 'X_train_augmented' in locals() else 'Not defined'}")
    print(f"y_train_augmented 형태: {y_train_augmented.shape if 'y_train_augmented' in locals() else 'Not defined'}")


### 4. 데이터셋 및 모델 정의 ###

In [None]:
# 데이터셋 클래스
class UCIHARData(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx].view(-1)

## 5. LSTM, GRU, CNN-LSTM, BiLSTM, Transformer 모델 정의 및 성능 개선(앙상블)

In [None]:
# LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_prob):
        super(LSTMModel, self).__init__()
        self.input_size = input_size  # 추가
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.batch_norm = nn.BatchNorm1d(hidden_size)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.batch_norm(out[:, -1, :])
        out = self.dropout(out)
        out = self.fc(out)
        return out

# GRU 모델 정의
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_prob):
        super(GRUModel, self).__init__()
        self.input_size = input_size  # 추가
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.batch_norm = nn.BatchNorm1d(hidden_size)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.batch_norm(out[:, -1, :])
        out = self.dropout(out)
        out = self.fc(out)
        return out

# CNN-LSTM 모델 정의
class CNNLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_prob):
        super(CNNLSTMModel, self).__init__()
        self.input_size = input_size  # 추가
        self.conv1 = nn.Conv1d(input_size, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.lstm = nn.LSTM(64, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.pool(F.relu(self.conv1(x)))
        x = x.permute(0, 2, 1)
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# BiLSTM 모델 정의
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_prob):
        super(BiLSTMModel, self).__init__()
        self.input_size = input_size  # 추가
        self.bilstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob, bidirectional=True)
        self.batch_norm = nn.BatchNorm1d(hidden_size * 2)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        out, _ = self.bilstm(x)
        out = self.batch_norm(out[:, -1, :])
        out = self.dropout(out)
        out = self.fc(out)
        return out

# Transformer 모델 정의
class TransformerModel(nn.Module):
    def __init__(self, input_size, num_heads, num_layers, num_classes, dropout_prob):
        super(TransformerModel, self).__init__()
        self.input_size = input_size  # 추가
        self.input_embedding = nn.Linear(input_size, 128)
        self.positional_encoding = nn.Parameter(torch.randn(1, 180, 128))
        encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=num_heads, dropout=dropout_prob)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        # x: (batch_size, seq_len, input_size) -> (seq_len, batch_size, input_size)
        x = self.input_embedding(x)
        x = x + self.positional_encoding[:, :x.size(1), :]
        x = self.transformer(x.permute(1, 0, 2))  # (seq_len, batch_size, d_model)
        out = self.fc(x[-1, :, :])  # 마지막 타임스텝의 출력만 사용
        return out

## 6. 앙상블 모델 정의

In [None]:
# 입력 크기 정의
input_size = 3  # X, Y, Z 축

# 모델 정의
models = [
    LSTMModel(input_size=input_size, hidden_size=128, num_layers=3, num_classes=6, dropout_prob=0.5).to(device),
    GRUModel(input_size=input_size, hidden_size=128, num_layers=3, num_classes=6, dropout_prob=0.5).to(device),
    CNNLSTMModel(input_size=input_size, hidden_size=128, num_layers=3, num_classes=6, dropout_prob=0.5).to(device),
    BiLSTMModel(input_size=input_size, hidden_size=128, num_layers=3, num_classes=6, dropout_prob=0.5).to(device),
    TransformerModel(input_size=input_size, num_heads=4, num_layers=2, num_classes=6, dropout_prob=0.5).to(device)
]

# 입력 크기 확인
print(f"Input size: {input_size}")
for i, model in enumerate(models):
    print(f"Model {i+1} input size: {model.input_size if hasattr(model, 'input_size') else 'N/A'}")

# 모델들의 예측값을 앙상블로 처리
# 앙상블 예측 함수
def ensemble_predict(models, X_unlabeled, weights=[1, 1, 1, 1, 1]):
    predictions = []
    X_unlabeled = X_unlabeled.to(device)
    for i, model in enumerate(models):
        model.eval()
        with torch.no_grad():
            outputs = model(X_unlabeled)
            predictions.append(weights[i] * torch.softmax(outputs, dim=1))
    
    final_predictions = torch.argmax(sum(predictions), dim=1)
    return final_predictions

## 7. 손실 함수, 옵티마이저, 스케줄러 및 모델 학습

In [None]:
# 클래스별 가중치 계산
class_weights = 1.0 / torch.tensor(class_sample_count, dtype=torch.float32)
class_weights = class_weights.to(device)

# 손실 함수 정의 (가중치 적용)
criterion = nn.CrossEntropyLoss(weight=class_weights)

# 각 모델별로 옵티마이저와 스케줄러 정의
optimizers = [
    torch.optim.Adam(model.parameters(), lr=0.000001, weight_decay=1e-4) for model in models
]

schedulers = [
    torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0.0001) for optimizer in optimizers
]

# 모델 저장 함수
def save_checkpoint(epoch, model, optimizer, filename="checkpoint.pth.tar"):
    state = {'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
    torch.save(state, filename)

best_val_accuracy = 0.0

# 모델 학습 함수
def train_model(models, train_loader, val_loader, criterion, optimizers, schedulers, num_epochs, patience):
    global best_val_accuracy
    early_stop_counter = 0
    
    for epoch in range(num_epochs):
        for model in models:
            model.train()
        
        running_loss = [0.0] * len(models)
        correct = [0] * len(models)
        total = 0
        
        # Training loop
        for i, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(device)
           # 수정 코드
            labels = labels.to(device).squeeze()  # 1D로 변환
            total += labels.size(0)

            # 각 모델별 학습
            for j, model in enumerate(models):
                optimizers[j].zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # 그래디언트 클리핑 추가
                optimizers[j].step()
                
                running_loss[j] += loss.item()
                _, predicted = torch.max(outputs, 1)
                correct[j] += (predicted.cpu() == labels.cpu()).sum().item()

        # Training accuracy and loss
        for j, model in enumerate(models):
            accuracy = 100 * correct[j] / total
            print(f"Epoch [{epoch+1}/{num_epochs}], Model {j+1} Training Loss: {running_loss[j]/len(train_loader):.4f}, Training Accuracy: {accuracy:.2f}%")
        
        # Validation loop
        for model in models:
            model.eval()
        
        val_loss = [0.0] * len(models)
        val_correct = [0] * len(models)
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                # 수정 코드
                labels = labels.to(device).squeeze()
                val_total += labels.size(0)

                for j, model in enumerate(models):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    val_loss[j] += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    val_correct[j] += (predicted.cpu() == labels.cpu()).sum().item()

        # Validation accuracy and loss
        for j, model in enumerate(models):
            val_accuracy = (val_correct[j] / val_total) * 100
            val_loss[j] = val_loss[j] / len(val_loader)

            print(f"Model {j+1} Validation Accuracy: {val_accuracy:.2f}%, Validation Loss: {val_loss[j]:.4f}")

            # 스케줄러 갱신
            schedulers[j].step()

            # Save best model
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
                save_checkpoint(epoch, model, optimizers[j], filename=f"best_model_{j+1}.pth.tar")
                print(f"Best model {j+1} saved with accuracy: {best_val_accuracy:.2f}%")
                early_stop_counter = 0
            else:
                early_stop_counter += 1

        # Early stopping
        if early_stop_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        torch.cuda.empty_cache()

# 모델 학습 실행
train_model(models, train_loader, val_loader, criterion, optimizers, schedulers, num_epochs=1000, patience=200)

## 8. 슬라이딩 윈도우와 예측

In [None]:
# 학습된 모델 로드
for i, model in enumerate(models):
    checkpoint = torch.load(f'best_model_{i+1}.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

# 자이로 데이터를 GPU로 이동
gyro_data_tensor = gyro_data_tensor.to(device)

print(f"gyro_data_tensor shape: {gyro_data_tensor.shape}")

# 예측
batch_size = 32
all_predictions = []

for i in range(0, len(gyro_data_tensor), batch_size):
    batch = gyro_data_tensor[i:i+batch_size]
    # 마지막 배치 처리
    if len(batch) < batch_size:
        padding = torch.zeros(batch_size - len(batch), *batch.shape[1:], device=device)
        batch = torch.cat([batch, padding], dim=0)
    batch_predictions = ensemble_predict(models, batch)
    # 패딩 제거
    all_predictions.append(batch_predictions[:len(gyro_data_tensor[i:i+batch_size])])

all_predictions = torch.cat(all_predictions).cpu().numpy()

# 슬라이딩 윈도우 예측 결과를 원본 데이터에 매핑하는 함수
def map_predictions_to_original(original_data_length, predictions, window_size, stride):
    mapped_predictions = np.zeros((original_data_length, 6))  # 6은 활동 클래스의 수
    counts = np.zeros(original_data_length)
    
    for i, pred in enumerate(predictions):
        start = i * stride
        end = min(start + window_size, original_data_length)
        mapped_predictions[start:end, pred] += 1
        counts[start:end] += 1
    
    # 각 데이터 포인트에 대해 가장 많이 예측된 라벨 선택
    final_predictions = np.argmax(mapped_predictions, axis=1)
    
    return final_predictions

# 원본 자이로 데이터 로드
original_gyro_data = pd.read_csv('./원본 데이터/자이로 데이터.csv')

# 원본 데이터에 맞는 예측 라벨 생성
original_predictions = map_predictions_to_original(len(original_gyro_data), all_predictions, window_size=128, stride=64)

# 예측된 라벨을 활동 이름으로 매핑
activity_labels = {0: "walking", 1: "walking_upstairs", 2: "walking_downstairs", 3: "sitting", 4: "standing", 5: "laying"}
predicted_activities = [activity_labels[label] for label in original_predictions]

# 예측 결과 출력
for i, activity in enumerate(predicted_activities[:10]):
    print(f"Sample {i}: {activity}")

# 자이로 데이터에 예측 결과 추가
original_gyro_data['Predicted_Activity'] = predicted_activities

# 결과 저장
original_gyro_data.to_csv('./원본 데이터/자이로 데이터_예측.csv', index=False)

# 예측 결과 시각화
plt.figure(figsize=(12, 6))
activity_counts = pd.Series(predicted_activities).value_counts()
sns.barplot(x=activity_counts.index, y=activity_counts.values)
plt.title('Predicted Activity Distribution')
plt.xlabel('Activity')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 시간에 따른 활동 변화 시각화
plt.figure(figsize=(20, 6))
activity_series = pd.Series(predicted_activities)
activity_numeric = pd.factorize(activity_series)[0]
plt.plot(activity_numeric)
plt.title('Activity Changes Over Time')
plt.xlabel('Time')
plt.ylabel('Activity')
plt.yticks(range(len(activity_labels)), list(activity_labels.values()))
plt.tight_layout()
plt.show()

print("예측 완료 및 결과 저장됨")

## 9. 성능 평가 및 시각화

In [None]:
# 자이로 데이터에 라벨 추가
gyro_data = pd.read_csv('./원본 데이터/자이로 데이터.csv')
gyro_data['Predicted_Activity'] = predicted_activities[:len(gyro_data)]  # 길이 맞춤

# 라벨링된 자이로 데이터를 저장
gyro_data.to_csv('./원본 데이터/자이로 데이터_예측.csv', index=False)

# 예측된 활동별 분포 시각화
plt.figure(figsize=(10, 6))
sns.countplot(x=gyro_data['Predicted_Activity'])
plt.title('Predicted Activity Distribution')
plt.xticks(rotation=45)
plt.show()

# 예측 결과 요약
print(gyro_data['Predicted_Activity'].value_counts(normalize=True))

# 클러스터링 적용 및 결과 비교
kmeans = KMeans(n_clusters=6, random_state=42)
kmeans_labels = kmeans.fit_predict(gyro_data_tensor.cpu().numpy().reshape(gyro_data_tensor.shape[0], -1))

# 클러스터링 결과와 실제 예측 라벨 비교
kmeans_accuracy = accuracy_score(all_predictions, kmeans_labels)
print(f"K-Means Clustering Accuracy: {kmeans_accuracy:.2f}")

# Confusion Matrix 및 성능 평가
cm = confusion_matrix(all_predictions, kmeans_labels)
ConfusionMatrixDisplay(cm).plot()
plt.show()

print(classification_report(all_predictions, kmeans_labels))