# Initial Settings

In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')
path = '/content/gdrive/My Drive/Outsourcing/DACON/ACOUSTIC/'

import os
os.chdir(path)

Mounted at /content/gdrive/


# Library

In [2]:
import random
import numpy as np
import os.path as osp
import warnings
import torch.optim as optim
import torch
import cv2
import torchvision.transforms as transforms

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from scipy.signal import stft
from PIL import Image
from torch.utils.data import Dataset, DataLoader

warnings.filterwarnings(action='ignore')

# 파라미터 세팅

In [3]:
# CFG = {
#     'SR':16000,
#     'N_MFCC':128, # MFCC 벡터를 추출할 개수 (<=128)
#     'SEED':41
# }

def set_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


lr = 1e-4
random_seed = 41
batch_size = 16
epochs = 100
set_seeds(random_seed)

# 데이터 로더

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
])


class STFTDataLoader(Dataset):
  def __init__(self, image, mode='train', transform = None):
    self.image = image
    self.mode = mode
    self.transform = transform

  def __len__(self):
    return self.image.shape[0]

  def __getitem__(self, idx):
    sample_image = self.image[idx]

    sample_image = cv2.cvtColor(sample_image, cv2.COLOR_BGR2RGB)
    sample_image = cv2.resize(sample_image, (128, 128))
    sample_image = transform(sample_image)
    if self.transform:
        sample_image = self.transform(image=sample_image)['image']

    return sample_image


In [5]:
raw_stft = np.load('stft_image_dataset.npy')
dataset = STFTDataLoader(image=raw_stft, mode='train')
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, pin_memory=True)

# 아키텍처 정의

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the autoencoder model
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        # 인코더 정의
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1),  # 3x128x128 -> 16x64x64
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),  # 16x64x64 -> 32x32x32
            nn.ReLU()
        )

        # 디코더 정의
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2),  # 32x32x32 -> 16x64x64
            nn.ConvTranspose2d(16, 3, kernel_size=2, stride=2),  # 16x64x64 -> 3x128x128
            nn.Sigmoid()
            )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvAutoencoder()
model = model.to(device)

# 학습

In [8]:
# 손실 함수와 옵티마이저
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# 모델 학습
for epoch in range(1, epochs+1):
    training_loss = 0
    for step, sample in enumerate(data_loader):
      sample = sample.to(device)

      optimizer.zero_grad()
      results = model(sample)
      loss = criterion(results, sample)
      loss.backward()
      optimizer.step()

      training_loss += loss.item()    # training_loss = training_loss + loss.item()
      if step % 10 == 0:    # 매 10 미니배치마다 출력
          avg_train_loss = training_loss/10
          print(f'Epoch: {epoch} \t | step: {step} \t | Loss: {avg_train_loss:.4f}')
          avg_train_loss = 0.

print('Finished Training')

Epoch: 1 	 | step: 0 	 | Loss: 0.0046
Epoch: 1 	 | step: 10 	 | Loss: 0.0487
Epoch: 1 	 | step: 20 	 | Loss: 0.0927
Epoch: 1 	 | step: 30 	 | Loss: 0.1368
Epoch: 1 	 | step: 40 	 | Loss: 0.1809
Epoch: 1 	 | step: 50 	 | Loss: 0.2233
Epoch: 1 	 | step: 60 	 | Loss: 0.2654
Epoch: 1 	 | step: 70 	 | Loss: 0.3064
Epoch: 2 	 | step: 0 	 | Loss: 0.0035
Epoch: 2 	 | step: 10 	 | Loss: 0.0414
Epoch: 2 	 | step: 20 	 | Loss: 0.0776
Epoch: 2 	 | step: 30 	 | Loss: 0.1109
Epoch: 2 	 | step: 40 	 | Loss: 0.1416
Epoch: 2 	 | step: 50 	 | Loss: 0.1700
Epoch: 2 	 | step: 60 	 | Loss: 0.1955
Epoch: 2 	 | step: 70 	 | Loss: 0.2188
Epoch: 3 	 | step: 0 	 | Loss: 0.0021
Epoch: 3 	 | step: 10 	 | Loss: 0.0223
Epoch: 3 	 | step: 20 	 | Loss: 0.0413
Epoch: 3 	 | step: 30 	 | Loss: 0.0593
Epoch: 3 	 | step: 40 	 | Loss: 0.0762
Epoch: 3 	 | step: 50 	 | Loss: 0.0924
Epoch: 3 	 | step: 60 	 | Loss: 0.1080
Epoch: 3 	 | step: 70 	 | Loss: 0.1231
Epoch: 4 	 | step: 0 	 | Loss: 0.0014
Epoch: 4 	 | step: 10 	 | Los

KeyboardInterrupt: 