In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from scipy.signal import welch

import sys
sys.path.append('../../tool_code/function/') # "~~/tool_code/plot/" (상대 경로)

from DataPlot import Data_Load_Plot, Result_Plot, Train_Loss_Plot
from Scaling import time_scaling, time_inv_scaling

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [2]:
# 데이터 load & plot
fpath = "../../data/synthetic_data/"

Contaminated_data = np.load(fpath + "contaminated_by_realistic" + ".npy")
Clean_data = np.load(fpath + "clean_data" + ".npy")
Artifact_daata = Contaminated_data - Clean_data

# Data Standard Scaling
X, y, scaler_x, scaler_y = time_scaling(Contaminated_data, Clean_data, standard='x')

print(Contaminated_data.shape)
print(Clean_data.shape)

X: (1000, 4000)
y: (1000, 4000)
(1000, 4000)
(1000, 4000)


In [3]:
# Data preprocessing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("<Original>")
print("-----------------------------")
print(f"X_train shape: {X_train.shape}\ny_train shape: {y_train.shape}") # x : B x T, y : B x T
print("-----------------------------")
print(f"X_test shape: {X_test.shape}\ny_test shape: {y_test.shape}")
print("-----------------------------")

# 차원 추가 (LSTM은 세번째 차원 추가)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1]) # Batch x length x 1
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
y_train = y_train.reshape(y_train.shape[0], 1, y_train.shape[1]) # Batch x length x 1
y_test = y_test.reshape(y_test.shape[0], 1, y_test.shape[1])

print("<Unsqueezed>")
print("-----------------------------")
print(f"X_train shape: {X_train.shape}\ny_train shape: {y_train.shape}") # x : B x T x 1 , y : B x T
print("-----------------------------")
print(f"X_test shape: {X_test.shape}\ny_test shape: {y_test.shape}")
print("-----------------------------")

train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))

#train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
#test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

<Original>
-----------------------------
X_train shape: (800, 4000)
y_train shape: (800, 4000)
-----------------------------
X_test shape: (200, 4000)
y_test shape: (200, 4000)
-----------------------------
<Unsqueezed>
-----------------------------
X_train shape: (800, 1, 4000)
y_train shape: (800, 1, 4000)
-----------------------------
X_test shape: (200, 1, 4000)
y_test shape: (200, 1, 4000)
-----------------------------


In [4]:
# 모델 정의
class CNN(nn.Module):
    def __init__(self, in_channels=1, out_channels=1, hidden_dim=16, kernel_size=3, dropout_rate=0.25):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels, hidden_dim, kernel_size, padding=1),
            nn.GELU(),
            nn.Conv1d(hidden_dim, hidden_dim * 2, kernel_size, padding=1),
            nn.BatchNorm1d(hidden_dim * 2),
            nn.GELU(),
            nn.Conv1d(hidden_dim * 2, hidden_dim, kernel_size, padding=1),
            nn.GELU(),
            nn.Conv1d(hidden_dim, out_channels, kernel_size, padding=1)
        )
        
        self.drop = nn.Dropout1d(dropout_rate)

    def forward(self, x):  # x: B x 1 x T
        x = self.layer1(x)
        return x

model = CNN().to(device)
print(model)

CNN(
  (layer1): Sequential(
    (0): Conv1d(1, 16, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): GELU(approximate='none')
    (2): Conv1d(16, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): GELU(approximate='none')
    (5): Conv1d(32, 16, kernel_size=(3,), stride=(1,), padding=(1,))
    (6): GELU(approximate='none')
    (7): Conv1d(16, 1, kernel_size=(3,), stride=(1,), padding=(1,))
  )
  (drop): Dropout1d(p=0.25, inplace=False)
)


In [5]:
import os
import ray
from ray import tune

def train_cnn(config, checkpoint_dir=None):
    
    train_loader = DataLoader(dataset=train_dataset, batch_size=["batch_size"], shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=["batch_size"], shuffle=False)

    model = CNN(
        in_channels=1,
        out_channels=1,
        hidden_dim=config["hidden_dim"],
        kernel_size=config["kernel_size"],
        dropout_rate=config["dropout_rate"]
    ).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    if checkpoint_dir:
        checkpoint = torch.load(os.path.join(checkpoint_dir, "checkpoint"))
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])


    model.train()
    for epoch in range(100):
        
        running_loss = 0.0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * x.size(0)
        
        tune.report(loss=running_loss / len(train_loader))


In [29]:
import os
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler

ray.shutdown()
ray.init()

search_space = {
    "hidden_dim": tune.choice([16, 32, 64]),
    "kernel_size": tune.choice([3, 5, 7]),
    "dropout_rate": tune.uniform(0.1, 0.5),
    "lr": tune.loguniform(1e-4, 1e-2),
    "batch_size": tune.choice([16, 32, 64])
}

scheduler = ASHAScheduler(
    metric="loss",
    mode="min",
    max_t=10,
    grace_period=1,
    reduction_factor=2
)

analysis = tune.run(
    train_cnn,
    resources_per_trial={"cpu": 2, "gpu": 1},
    config=search_space,
    num_samples=1000,  # 실험할 샘플 수
    scheduler=scheduler,
    storage_path="C:/ray_results",  # 짧은 경로 설정
    log_to_file=True
)

print("Best config: ", analysis.best_config)

best_config = analysis.get_best_config(metric="loss", mode="min")
print(f"Best config: {best_config}")


2024-08-16 06:32:48,745	INFO worker.py:1781 -- Started a local Ray instance.
2024-08-16 06:32:52,040	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949




FileNotFoundError: [WinError 3] 지정된 경로를 찾을 수 없습니다: 'C:\\Users\\stell\\AppData\\Local\\Temp\\ray\\session_2024-08-16_06-32-46_981402_16356\\artifacts\\2024-08-16_06-32-52\\train_cnn_2024-08-16_06-32-52\\driver_artifacts\\train_cnn_ed3b1_00000_0_batch_size=16,dropout_rate=0.3678,hidden_dim=16,kernel_size=7,lr=0.0087_2024-08-16_06-34-06'