In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [2]:
# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = np.load("../../KEMDy20_v1_1/Extract/Dataset.npz")

In [4]:
train_x = torch.from_numpy(dataset["train_x"]).float()
test_x = torch.from_numpy(dataset["test_x"]).float()
train_aug_x = torch.from_numpy(dataset["train_aug_x"]).float()

In [5]:
train_y = dataset["train_y"]
test_y = dataset["test_y"]
train_aug_y = dataset["train_aug_y"]

In [6]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
# Autoencoder 모델을 정의합니다.
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
    
        self.fc1 = nn.Linear(100000, 4096, bias=False)
        self.fc2 = nn.Linear(4096, 2048, bias=False)
        self.fc3 = nn.Linear(2048, 1024, bias=False)
        
        self.defc1 = nn.Linear(1024, 2048, bias=False)
        self.defc2 = nn.Linear(2048, 4096, bias=False)
        self.defc3 = nn.Linear(4096, 100000, bias=False)

    def encoder(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

        return x
    
    def decoder(self, x):
        x = self.defc1(x)
        x = self.defc2(x)      
        x = self.defc3(x)

        return x
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# 모델을 생성합니다.
model = AutoEncoder().to(device)

# L1 Loss 함수를 정의합니다.
criterion = nn.L1Loss()

# 옵티마이저를 정의합니다.
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 하이퍼파라미터를 정의합니다.
num_epochs = 30
batch_size = 32

# 모델을 학습합니다.
for epoch in range(num_epochs):
    for i in range(0, len(train_x), batch_size):
        batch_x = train_x[i:i+batch_size]
        
        # 모델에 입력 데이터를 GPU 상으로 이동시킵니다.
        batch_x = batch_x.to(device)
        
        # 순전파 단계를 수행합니다.
        output = model(batch_x)
        
        # 손실을 계산하고 역전파 단계를 수행합니다.
        loss = criterion(output, batch_x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # 현재 epoch의 손실을 출력합니다.
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Epoch [1/30], Loss: 0.4733
Epoch [2/30], Loss: 1.9926
Epoch [3/30], Loss: 0.9184
Epoch [4/30], Loss: 0.6841
Epoch [5/30], Loss: 0.6169
Epoch [6/30], Loss: 0.5645
Epoch [7/30], Loss: 0.5370
Epoch [8/30], Loss: 0.5222
Epoch [9/30], Loss: 0.5085
Epoch [10/30], Loss: 0.5006
Epoch [11/30], Loss: 0.4921
Epoch [12/30], Loss: 0.4854
Epoch [13/30], Loss: 0.4806
Epoch [14/30], Loss: 0.4761
Epoch [15/30], Loss: 0.4728
Epoch [16/30], Loss: 0.4677
Epoch [17/30], Loss: 0.4647
Epoch [18/30], Loss: 0.4606
Epoch [19/30], Loss: 0.4566
Epoch [20/30], Loss: 0.4541
Epoch [21/30], Loss: 0.4506
Epoch [22/30], Loss: 0.4480
Epoch [23/30], Loss: 0.4450


In [9]:
# 학습된 Encoder 모델을 추출합니다.
encoder_model = nn.Sequential(*list(model.children())[:3])

# 배치 사이즈 정의
batch_size = 32

# train_x를 배치 단위로 분할하고 Autoencoder에 입력하여 feature 추출
encoded_train_x_list = []
for i in range(0, len(train_x), batch_size):
    batch_x = train_x[i:i+batch_size]
    encoded_train_x_list.append(model.encoder(batch_x.to(device)).cpu().detach().numpy())
encoded_train_x = np.concatenate(encoded_train_x_list, axis=0)

# test_x를 배치 단위로 분할하고 Autoencoder에 입력하여 feature 추출
encoded_test_x_list = []
for i in range(0, len(test_x), batch_size):
    batch_x = test_x[i:i+batch_size]
    encoded_test_x_list.append(model.encoder(batch_x.to(device)).cpu().detach().numpy())
encoded_test_x = np.concatenate(encoded_test_x_list, axis=0)

# train_x_aug를 배치 단위로 분할하고 Autoencoder에 입력하여 feature 추출
encoded_train_aug_x_list = []
for i in range(0, len(train_aug_x), batch_size):
    batch_x = train_aug_x[i:i+batch_size]
    encoded_train_aug_x_list.append(model.encoder(batch_x.to(device)).cpu().detach().numpy())
encoded_train_aug_x = np.concatenate(encoded_train_aug_x_list, axis=0)

In [28]:
save_path = "../../KEMDy20_v1_1/Extract/"
if not os.path.exists(save_path):
    os.mkdir(save_path)

In [29]:
np.savez(save_path+"Dataset_AE",
         train_x = encoded_train_x,
         train_y = train_y,
         test_x = encoded_test_x,
         test_y = test_y,
         train_aug_x = encoded_train_x,
         train_aug_y = train_aug_y
         )