In [7]:
import pandas as pd
import numpy as np
from numpy.fft import fft
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from scipy.signal import resample
from scipy.stats import skew, kurtosis, entropy, uniform
from scipy.signal import find_peaks, stft
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold, RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, f1_score
from skorch import NeuralNetClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from collections import Counter
import zipfile

# **1. 데이터 로드 및 전처리**

In [8]:
# 자이로 데이터 로드 (RegisterDate를 datetime으로 변환)
gyro_data = pd.read_csv('./원본 데이터/자이로 데이터.csv')
gyro_data['RegisterDate'] = pd.to_datetime(gyro_data['RegisterDate'])

# UCI-HAR ZIP 파일 경로 설정
uci_har_zip_path = './원본 데이터/UCI HAR Dataset.zip'

# ZIP 파일 열기
with zipfile.ZipFile(uci_har_zip_path, 'r') as zip_ref:
    # ZIP 파일에서 원하는 파일들을 추출하지 않고 바로 읽을 수 있습니다.

    # X, Y, Z 축 자이로스코프 데이터를 각 축별로 파일에서 불러옴
    with zip_ref.open('UCI HAR Dataset/train/Inertial Signals/body_gyro_x_train.txt') as gyro_x_file:
        gyro_x_train = pd.read_csv(gyro_x_file, sep='\s+', header=None).values

    with zip_ref.open('UCI HAR Dataset/train/Inertial Signals/body_gyro_y_train.txt') as gyro_y_file:
        gyro_y_train = pd.read_csv(gyro_y_file, sep='\s+', header=None).values

    with zip_ref.open('UCI HAR Dataset/train/Inertial Signals/body_gyro_z_train.txt') as gyro_z_file:
        gyro_z_train = pd.read_csv(gyro_z_file, sep='\s+', header=None).values

# 확인
print(f"Gyro X train shape: {gyro_x_train.shape}")
print(f"Gyro Y train shape: {gyro_y_train.shape}")
print(f"Gyro Z train shape: {gyro_z_train.shape}")

# UCI-HAR 데이터의 각 축별 평균값으로 DataFrame 생성
uci_har_gyro_df = pd.DataFrame({
    'X': np.mean(gyro_x_train, axis=1),
    'Y': np.mean(gyro_y_train, axis=1),
    'Z': np.mean(gyro_z_train, axis=1)
})

Gyro X train shape: (7352, 128)
Gyro Y train shape: (7352, 128)
Gyro Z train shape: (7352, 128)


# **2. 주파수 업샘플링 (UCI-HAR 데이터를 50Hz에서 100Hz로 업샘플링)**

In [9]:
current_freq = 50  # 원본 UCI-HAR 데이터 주파수 (50Hz)
desired_freq = 100  # 자이로 데이터 주파수 (100Hz)

# 업샘플링을 위한 시간축 생성
t_current = np.linspace(0, len(uci_har_gyro_df) / current_freq, num=len(uci_har_gyro_df))
t_new = np.linspace(0, len(uci_har_gyro_df) / current_freq, num=len(uci_har_gyro_df) * 2)

# 각 축에 대해 선형 보간법을 사용한 업샘플링
uci_har_gyro_df_upsampled = pd.DataFrame({
    'X': np.interp(t_new, t_current, uci_har_gyro_df['X']),
    'Y': np.interp(t_new, t_current, uci_har_gyro_df['Y']),
    'Z': np.interp(t_new, t_current, uci_har_gyro_df['Z'])
})

# 자이로 데이터에서 UCI-HAR 데이터 길이에 맞춰 슬라이싱
n_samples_uci = len(uci_har_gyro_df)
gyro_sliced = gyro_data.iloc[:n_samples_uci].copy()  # UCI-HAR 데이터 길이만큼 자이로 데이터를 슬라이싱

# **3. 데이터 정규화**
***
> MinMaxScaler를 사용하여 자이로 데이터와 UCI-HAR 데이터를 [0,1] 범위로 정규화

In [10]:
scaler = MinMaxScaler()
gyro_sliced[['X', 'Y', 'Z']] = scaler.fit_transform(gyro_sliced[['X', 'Y', 'Z']])
uci_har_gyro_df[['X', 'Y', 'Z']] = scaler.fit_transform(uci_har_gyro_df[['X', 'Y', 'Z']])

# **4. 특성 엔지니어링 함수 정의**
***
> RMS(root mean square, 제곱평균제곱근), Skewness(왜도), Kurtosis(첨도), Entropy(불확실성) 및 피크 탐지 계산 함수 정의

In [11]:
# RMS 함수 정의
def rms(values):
    return np.sqrt(np.mean(values**2))

# 엔트로피 계산 함수 정의
def calc_entropy(values):
    # 확률 밀도 함수 계산 후 엔트로피 계산
    value_prob = np.histogram(values, bins=30, density=True)[0]  # 확률 밀도 함수
    return entropy(value_prob + 1e-6)  # 엔트로피 계산

# FFT 특징 계산 함수
def fft_features(values, n=10):
    fft_vals = np.abs(fft(values))  # FFT 계산 후 절댓값을 취함
    return np.mean(fft_vals[:n])  # 주파수 성분의 상위 N개 평균 계산

# STFT 특징 계산 함수
def stft_features(values, n=10):
    _, _, Zxx = stft(values)
    Zxx_flat = np.abs(Zxx).flatten()  # STFT 결과를 1차원으로 변환
    Zxx_mean = np.mean(Zxx_flat)  # 플랫한 결과의 평균값을 계산
    return Zxx_mean

# 각 축별로 RMS, Skewness, Kurtosis, Entropy 및 피크 탐지를 계산
def calculate_features(df, axis):
    df[f'rms_{axis}'] = rms(df[axis])
    df[f'skew_{axis}'] = skew(df[axis])
    df[f'kurtosis_{axis}'] = kurtosis(df[axis])
    df[f'entropy_{axis}'] = calc_entropy(df[axis])

    # 피크 탐지
    peaks, _ = find_peaks(df[axis], height=0)
    df[f'peaks_{axis}'] = 0
    df.loc[peaks, f'peaks_{axis}'] = 1

    return df

# 푸리에 변환 (FFT) 및 STFT 계산 함수
def calculate_fft_stft(df, axis, n=10):
    # FFT 계산
    df[f'fft_{axis}'] = fft_features(df[axis].values, n=n)

    # STFT 계산
    stft_result = stft_features(df[axis].values)
    df[f'stft_{axis}'] = stft_result

    return df

# 모든 축(X, Y, Z)에 대해 위에서 정의한 특성 계산
for axis in ['X', 'Y', 'Z']:
    uci_har_gyro_df_upsampled = calculate_features(uci_har_gyro_df_upsampled, axis)
    gyro_sliced = calculate_features(gyro_sliced, axis)
    uci_har_gyro_df_upsampled = calculate_fft_stft(uci_har_gyro_df_upsampled, axis)
    gyro_sliced = calculate_fft_stft(gyro_sliced, axis)

# 데이터 정규화 (MinMaxScaler)
scaler = MinMaxScaler()
# 컬럼 이름의 대소문자를 일치시킵니다.
columns_to_scale = ['fft_X', 'fft_Y', 'fft_Z', 'stft_X', 'stft_Y', 'stft_Z']
uci_har_gyro_df_upsampled[columns_to_scale] = scaler.fit_transform(uci_har_gyro_df_upsampled[columns_to_scale])
gyro_sliced[columns_to_scale] = scaler.fit_transform(gyro_sliced[columns_to_scale])

# 5. **특성 데이터 구축 및 학습 데이터 준비**

In [12]:
# 최종 피처 세트 구축
X_train_features = np.column_stack((
    uci_har_gyro_df_upsampled[['X', 'Y', 'Z']].values,
    uci_har_gyro_df_upsampled[['rms_X', 'rms_Y', 'rms_Z']].values,
    uci_har_gyro_df_upsampled[['skew_X', 'skew_Y', 'skew_Z']].values,
    uci_har_gyro_df_upsampled[['entropy_X', 'entropy_Y', 'entropy_Z']].values,
    uci_har_gyro_df_upsampled[['fft_X', 'fft_Y', 'fft_Z']].values,
    uci_har_gyro_df_upsampled[['peaks_X', 'peaks_Y', 'peaks_Z']].values,
    uci_har_gyro_df_upsampled[['stft_X', 'stft_Y', 'stft_Z']].values
))

X_gyro_features = np.column_stack((
    gyro_sliced[['X', 'Y', 'Z']].values,
    gyro_sliced[['rms_X', 'rms_Y', 'rms_Z']].values,
    gyro_sliced[['skew_X', 'skew_Y', 'skew_Z']].values,
    gyro_sliced[['entropy_X', 'entropy_Y', 'entropy_Z']].values,
    gyro_sliced[['fft_X', 'fft_Y', 'fft_Z']].values,
    gyro_sliced[['peaks_X', 'peaks_Y', 'peaks_Z']].values,
    gyro_sliced[['stft_X', 'stft_Y', 'stft_Z']].values
))

# ZIP 파일 열기
with zipfile.ZipFile(uci_har_zip_path, 'r') as zip_ref:
    # ZIP 파일 내의 y_train.txt 파일을 불러옴
    with zip_ref.open('UCI HAR Dataset/train/y_train.txt') as y_train_file:
        y_train = pd.read_csv(y_train_file, header=None).values.flatten()

# 확인
print(f"y_train shape: {y_train.shape}")

# 라벨 데이터에서 최소값이 1이므로, 모든 값을 1씩 감소시켜 0부터 시작하도록 변환
y_train_upsampled = np.repeat(y_train, 2)  # 레이블을 2배로 확장
if y_train_upsampled.min() > 0:  # 라벨 값이 1부터 시작하면 1씩 감소
    y_train_upsampled = y_train_upsampled - 1

# 데이터셋을 학습용과 검증용으로 나누기
X_train_features, X_val, y_train_upsampled, y_val = train_test_split(X_train_features,
                                                                     y_train_upsampled,
                                                                     test_size=0.2,
                                                                     random_state=42)

# 검증 세트의 라벨도 같은 방식으로 변환
if y_val.min() > 0:  # 라벨 값이 1부터 시작하면 1씩 감소
    y_val = y_val - 1

# 데이터 차원 확장 (batch_size, sequence_length, input_size)
X_train_features = X_train_features.reshape(X_train_features.shape[0], 1, X_train_features.shape[1])
X_val = X_val.reshape(X_val.shape[0], 1, X_val.shape[1])

# TensorDataset 및 DataLoader 정의
train_dataset = TensorDataset(torch.tensor(X_train_features, dtype=torch.float32), torch.tensor(y_train_upsampled, dtype=torch.long))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

y_train shape: (7352,)


# **6. 모델 정의 (LSTM, GRU, CNN-LSTM, BiLSTM, Transformer)**
***
> 각 모델은 행동 인지 분류를 수행하기 위한 다른 신경망 구조로 정의됨

In [13]:
# 모델 정의 (LSTM, GRU, CNN-LSTM, BiLSTM, Transformer)
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.gru.num_layers, x.size(0), self.gru.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

class CNNLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super(CNNLSTMModel, self).__init__()
        self.conv = nn.Conv1d(in_channels=21, out_channels=64, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(64, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x.shape: (batch_size, seq_length, input_size)
        x = x.permute(0, 2, 1)  # Change to (batch_size, input_size, seq_length) for conv1d
        x = self.conv(x)
        x = x.permute(0, 2, 1)  # Change back to (batch_size, seq_length, out_channels)
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout):
        super(BiLSTMModel, self).__init__()
        self.bilstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.bilstm.num_layers * 2, x.size(0), self.bilstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.bilstm.num_layers * 2, x.size(0), self.bilstm.hidden_size).to(x.device)
        out, _ = self.bilstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

class TransformerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_heads, num_layers, num_classes, dropout):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_size, hidden_size)
        self.positional_encoding = nn.Parameter(torch.zeros(1, input_size, hidden_size))
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x.shape: (batch_size, seq_length, input_size)
        x = self.embedding(x) + self.positional_encoding
        x = self.transformer(x)
        out = self.fc(x[:, -1, :])  # 마지막 시점의 출력만 사용
        return out

# **7. 모델 학습 및 평가**
***
> 선택한 모델을 사용해 학습 및 검증 데이터를 학습시키고, 최적의 모델을 찾음

In [14]:
# 학습 결과를 로그 파일에 저장하는 함수 추가
def log_training_results(train_losses, val_losses, train_accuracies, val_accuracies, filename='training_log.txt'):
    with open(filename, 'w') as f:
        f.write("Epoch\tTrain Loss\tVal Loss\tTrain Accuracy\tVal Accuracy\n")
        for epoch, (train_loss, val_loss, train_acc, val_acc) in enumerate(zip(train_losses, val_losses, train_accuracies, val_accuracies), 1):
            f.write(f"{epoch}\t{train_loss:.4f}\t{val_loss:.4f}\t{train_acc:.2f}%\t{val_acc:.2f}%\n")

# 체크포인트 저장 함수 추가
def save_checkpoint(model, optimizer, epoch, filename):
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'epoch': epoch
    }
    torch.save(checkpoint, filename)

# 체크포인트 불러오기 함수 추가
def load_checkpoint(filename, model, optimizer):
    checkpoint = torch.load(filename)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    return model, optimizer, epoch

# EarlyStopping 클래스 추가
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# 모델 학습 및 평가 함수 정의
def train_and_evaluate_model(model, train_loader, val_loader, num_epochs, learning_rate):
    criterion = nn.CrossEntropyLoss()  # 분류 문제이므로 CrossEntropyLoss 사용
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Adam Optimizer 사용
    early_stopping = EarlyStopping(patience=3, min_delta=0.01)  # Early Stopping 설정

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = []

    # 체크포인트를 로드하여 학습을 이어서 진행
    try:
        model, optimizer, start_epoch = load_checkpoint('checkpoint_v2.0.pth', model, optimizer)
        print(f"Continuing training from epoch {start_epoch}")
    except FileNotFoundError:
        print("No checkpoint found, starting from epoch 0")
        start_epoch = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # 학습 과정
        for features, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # 에포크마다 체크포인트 저장
        save_checkpoint(model, optimizer, epoch, f'checkpoint_epoch_{epoch}.pth')

        # 에포크별 손실 및 정확도 계산
        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)

        # 검증 과정
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for features, labels in val_loader:
                outputs = model(features)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        # 검증 데이터의 손실 및 정확도 계산
        val_loss /= len(val_loader)
        val_acc = 100 * val_correct / val_total
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        # Early stopping 체크
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print(f"Early stopping at epoch {epoch+1}")
            break

    # 마지막 평가 결과
    accuracy = accuracy_score(val_correct, val_total)
    f1 = f1_score(val_correct, val_total, average='weighted')

    # 3. 모델 학습이 끝난 후 로그 저장 추가
    log_training_results(train_losses, val_losses, train_accuracies, val_accuracies, 'training_log.txt')

    return accuracy, f1, train_losses, val_losses, train_accuracies, val_accuracies

# **8. 학습 곡선 시각화**

In [15]:
def plot_learning_curves(train_losses, val_losses, train_accuracies, val_accuracies):
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(12, 5))

    # 손실 시각화
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label='Train Loss')
    plt.plot(epochs, val_losses, label='Validation Loss')
    plt.title('Loss Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # 정확도 시각화
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies, label='Train Accuracy')
    plt.plot(epochs, val_accuracies, label='Validation Accuracy')
    plt.title('Accuracy Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.tight_layout()
    plt.show()

# **9.모델 정의**

In [16]:
# num_classes 정의
num_classes = len(np.unique(y_train))  # y_train 데이터에서 고유한 클래스 수 계산

# 모델 리스트 정의
models = [
    LSTMModel(input_size=X_train_features.shape[2], hidden_size=128, num_layers=2, num_classes=num_classes, dropout=0.3),
    GRUModel(input_size=X_train_features.shape[2], hidden_size=128, num_layers=2, num_classes=num_classes, dropout=0.3),
    CNNLSTMModel(input_size=X_train_features.shape[2], hidden_size=128, num_layers=2, num_classes=num_classes, dropout=0.3),
    BiLSTMModel(input_size=X_train_features.shape[2], hidden_size=128, num_layers=2, num_classes=num_classes, dropout=0.3),
    TransformerModel(input_size=X_train_features.shape[2], hidden_size=128, num_heads=4, num_layers=2, num_classes=num_classes, dropout=0.3)
]



# **10. RandomizedSearchCV 하이퍼파라미터 최적화**

In [None]:
# 결과 저장
best_params_per_model = {}
best_scores_per_model = {}

# 각 모델에 대해 RandomizedSearchCV 실행
for model in models:
    print(f"Optimizing model: {model.__class__.__name__}")

    if isinstance(model, TransformerModel):
        # TransformerModel에 필요한 하이퍼파라미터 설정
        params = {
            'lr': uniform(0.0001, 0.01),
            'module__hidden_size': [64, 128, 256],
            'module__num_layers': [1, 2, 3],
            'module__num_heads': [4, 8],  # Transformer에만 필요한 num_heads
            'module__dropout': [0.2, 0.3, 0.5],
            'optimizer__weight_decay': [1e-4, 1e-5, 0],
            'batch_size': [16, 32, 64]
        }
    else:
        # LSTM, GRU, CNN-LSTM 등 다른 모델에 필요한 하이퍼파라미터 설정
        params = {
            'lr': uniform(0.0001, 0.01),
            'module__hidden_size': [64, 128, 256],
            'module__num_layers': [1, 2, 3],
            'module__dropout': [0.2, 0.3, 0.5],
            'optimizer__weight_decay': [1e-4, 1e-5, 0],
            'batch_size': [16, 32, 64]
        }

    # NeuralNetClassifier로 모델 래핑
    net = NeuralNetClassifier(
        module=model,
        module__input_size=X_train_features.shape[2],
        module__num_classes=num_classes,
        criterion=nn.CrossEntropyLoss,
        optimizer=optim.Adam,
        max_epochs=20,
        iterator_train__shuffle=True,
        device='cuda' if torch.cuda.is_available() else 'cpu',
        verbose=1  # 매 에포크마다 로그 출력
    )

    # TensorDataset을 사용하기 전에, 학습 데이터를 NumPy 배열로 변환
    X_train_np = X_train_features.astype(np.float32)  # X_train_features를 NumPy 배열로 변환
    y_train_np = y_train_upsampled.astype(np.int64)   # y_train_upsampled을 NumPy 배열로 변환

    # RandomizedSearchCV 설정
    rs = RandomizedSearchCV(
        net,
        params,
        refit=True,
        cv=KFold(n_splits=5, shuffle=True, random_state=42),
        scoring='accuracy',
        n_iter=10,
        verbose=2,
        random_state=42
    )

    try:
        rs.fit(X_train_np, y_train_np)
    except RuntimeError as e:
        print(f"Error occurred during model fitting: {e}")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        # GPU 관련 에러가 있을 때 CPU로 전환
        if "CUDA" in str(e):
            net.set_params(device='cpu')
            rs.fit(X_train_np, y_train_np)

    # 모델 학습 후 메모리 해제
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # GPU 메모리 초기화

    # 최적 하이퍼파라미터와 점수 저장
    best_params_per_model[model.__class__.__name__] = rs.best_params_
    best_scores_per_model[model.__class__.__name__] = rs.best_score_

    # 결과 출력
    print(f"Best parameters for {model.__class__.__name__}: {rs.best_params_}")
    print(f"Best cross-validation accuracy for {model.__class__.__name__}: {rs.best_score_:.4f}")



Optimizing model: LSTMModel
Fitting 5 folds for each of 10 candidates, totalling 50 fits
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.7720[0m       [32m0.2545[0m        [35m1.7219[0m  2.2033
      2        [36m1.6143[0m       [32m0.2853[0m        [35m1.5507[0m  1.8249
      3        [36m1.5427[0m       [32m0.3071[0m        [35m1.5142[0m  1.7082
      4        [36m1.5162[0m       [32m0.3321[0m        [35m1.4900[0m  1.2988
      5        [36m1.4951[0m       0.3151        [35m1.4832[0m  1.3342
      6        [36m1.4786[0m       [32m0.3480[0m        1.4875  1.3481
      7        [36m1.4659[0m       0.3172        [35m1.4601[0m  1.3021
      8        [36m1.4582[0m       0.3092        [35m1.4434[0m  1.9335
      9        [36m1.4495[0m       0.3204        [35m1.4376[0m  2.5328
     10        [36m1.4374[0m       0.3459        [35m1.4128[0m  4.7385
     11    



  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.7801[0m       [32m0.2981[0m        [35m1.7486[0m  1.8621
      2        [36m1.6443[0m       [32m0.3018[0m        [35m1.5526[0m  1.2799
      3        [36m1.5361[0m       0.2933        [35m1.5408[0m  1.3080
      4        [36m1.5067[0m       [32m0.3055[0m        [35m1.4830[0m  1.3413
      5        [36m1.4904[0m       [32m0.3241[0m        [35m1.4779[0m  1.3424
      6        [36m1.4759[0m       0.2869        1.4787  1.2955
      7        [36m1.4759[0m       0.3151        [35m1.4667[0m  1.3081
      8        [36m1.4532[0m       [32m0.3273[0m        [35m1.4336[0m  1.3053
      9        [36m1.4467[0m       [32m0.3422[0m        [35m1.4221[0m  1.5420
     10        [36m1.4405[0m       [32m0.3698[0m        1.4232  1.7812
     11        [36m1.4301[0m       0.3459        [35m1.4164[0m  2.4854
     12        



  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.7831[0m       [32m0.2317[0m        [35m1.7584[0m  1.3076
      2        [36m1.6390[0m       [32m0.2816[0m        [35m1.5591[0m  1.3114
      3        [36m1.5510[0m       [32m0.3108[0m        [35m1.5104[0m  1.7831
      4        [36m1.5082[0m       0.2880        [35m1.5010[0m  1.8141
      5        [36m1.4864[0m       [32m0.3278[0m        [35m1.4971[0m  1.8711
      6        [36m1.4769[0m       0.2875        [35m1.4859[0m  1.2881
      7        [36m1.4694[0m       0.3193        [35m1.4727[0m  1.3051
      8        [36m1.4626[0m       [32m0.3496[0m        [35m1.4389[0m  1.3007
      9        [36m1.4495[0m       0.3464        [35m1.4294[0m  1.2733
     10        [36m1.4421[0m       0.3459        [35m1.4218[0m  1.2914
     11        [36m1.4330[0m       0.3353        1.4245  1.9486
     12        1.4337    



  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.7744[0m       [32m0.2862[0m        [35m1.7319[0m  1.7517
      2        [36m1.6247[0m       [32m0.2931[0m        [35m1.5499[0m  1.3013
      3        [36m1.5370[0m       0.2894        1.5838  1.2914
      4        [36m1.5162[0m       [32m0.3075[0m        [35m1.5057[0m  1.2420
      5        [36m1.4922[0m       [32m0.3208[0m        1.5197  1.2629
      6        [36m1.4838[0m       0.3181        [35m1.4708[0m  1.2677
      7        [36m1.4667[0m       [32m0.3425[0m        [35m1.4703[0m  1.2940
      8        [36m1.4601[0m       [32m0.3452[0m        [35m1.4397[0m  1.2410
      9        [36m1.4493[0m       [32m0.3473[0m        1.4531  1.5062
     10        [36m1.4399[0m       0.3468        [35m1.4342[0m  1.7814
     11        [36m1.4326[0m       [32m0.3563[0m        [35m1.4191[0m  2.4781
     12        

# **11. 최적의 성능을 가진 모델을 선택**

In [None]:
# 최고의 성능 모델을 선택하는 기준을 튜플로 설정
results = {}
model_f1_scores = []

for model in models:
    accuracy, f1, train_losses, val_losses, train_accuracies, val_accuracies = train_and_evaluate_model(
        model, train_loader, val_loader, num_epochs=num_epochs, learning_rate=learning_rate
    )
    results[model.__class__.__name__] = {'accuracy': accuracy, 'f1': f1}
    model_f1_scores.append(f1)  # F1 점수를 저장하여 가중치 앙상블에 사용
    plot_learning_curves(train_losses, val_losses, train_accuracies, val_accuracies)

# F1 및 Accuracy 기준으로 최적 모델 선택
best_model_name = max(results, key=lambda x: (results[x]['f1'], results[x]['accuracy']))
best_model = [model for model in models if model.__class__.__name__ == best_model_name][0]

# **12. 최적 모델로 훈련 후 검증 데이터 평가**

In [None]:
X_val_np = X_val.astype(np.float32)
y_val_np = y_val.astype(np.int64)

train_acc = net.score(X_train_np, y_train_np)
val_acc = net.score(X_val_np, y_val_np)

print(f"Training Accuracy: {train_acc:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")

# **13. 모델 저장 및 불러오기**

In [None]:
# 최적 모델 저장 함수
def save_model(model, filename):
    # 학습된 모델의 상태를 파일로 저장
    torch.save(model.state_dict(), filename)

# 학습된 최적의 모델을 저장
save_model(best_model, './모델/best_model.pth')

# 모델 불러오기 함수
def load_model(model, filename):
    # 저장된 모델의 상태를 불러와서 현재 모델에 적용
    model.load_state_dict(torch.load(filename))
    model.eval()  # 평가 모드로 전환 (추론을 위해)

# 저장된 최적 모델 불러오기
load_model(best_model, './모델/best_model.pth')

# **14. 가중치 기반 앙상블 예측**

In [None]:
# 가중치 기반 앙상블 예측 함수 정의
def weighted_ensemble_predict(models, features, model_f1_scores):
    model_predictions = []
    weights = []

    for i, model in enumerate(models):
        model.eval()
        with torch.no_grad():
            outputs = model(features)
            _, predicted = torch.max(outputs.data, 1)
            model_predictions.append(predicted.cpu().numpy())
            weights.append(model_f1_scores[i])

    model_predictions = np.array(model_predictions)
    weights = np.array(weights)

    weighted_predictions = np.zeros(model_predictions.shape[1], dtype=int)
    for i in range(model_predictions.shape[1]):
        weighted_sum = Counter()
        for j in range(model_predictions.shape[0]):
            weighted_sum[model_predictions[j, i]] += weights[j]
        weighted_predictions[i] = weighted_sum.most_common(1)[0][0]

    return weighted_predictions

In [None]:
# 불러온 모델로 예측 수행
gyro_tensor = torch.tensor(X_gyro_features, dtype=torch.float32)
with torch.no_grad():
    predicted_labels = best_model(gyro_tensor).argmax(dim=1)
    gyro_sliced['predicted_label'] = predicted_labels.cpu().numpy()

# 결과 저장
gyro_sliced.to_csv('./원본 데이터/자이로 데이터_라벨링.csv', index=False)

In [None]:
# 예측 결과 시각화
gyro_tensor = torch.tensor(X_gyro_features, dtype=torch.float32)
predicted_labels = weighted_ensemble_predict(models, gyro_tensor, model_f1_scores)

gyro_sliced['predicted_label'] = predicted_labels

gyro_sliced.to_csv('./원본 데이터/자이로 데이터_라벨링.csv', index=False)

plt.figure(figsize=(10, 6))
gyro_sliced['predicted_label'].value_counts().sort_index().plot(kind='bar') ㅊ

# 시간에 따른 예측된 활동 분포 시각화
plt.figure(figsize=(20, 8))
gyro_sliced['predicted_label_numeric'] = gyro_sliced['predicted_label'].factorize()[0]
plt.plot(gyro_sliced['RegisterDate'], gyro_sliced['predicted_label_numeric'], label='Predicted Label')
plt.title('Activity Prediction Over Time')
plt.xlabel('Time')
plt.ylabel('Predicted Activity (Numeric)')
plt.xticks(rotation=45)
plt.legend()
plt.show()

print("자이로 데이터에 예측된 라벨을 추가한 결과 파일이 저장되었습니다.")