<a href="https://colab.research.google.com/github/Taewon-Park/Dacon/blob/main/%EC%9D%8C%EC%84%B1_%EC%A4%91%EC%B2%A9_%EB%8D%B0%EC%9D%B4%ED%84%B0_%EB%B6%84%EB%A5%98.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!gdown https://drive.google.com/uc?id=1LN3A6oXgv-uapNxyilKy3V0z3sRW8TON

In [None]:
!unizp 235616_음성 중첩 데이터 분류 AI 경진대회_data.zip

In [None]:
# 라이브러리
import random
from glob import glob

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torchaudio
import torchaudio.transforms as AT
from torch import nn, optim
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [None]:
# Seed 및 GPU 설정
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)

DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [None]:
# Feature 생성
class MRMS(nn.Module):
    def __init__(self):
        super(MRMS, self).__init__()
        self.sr, self.n_fft, self.hop, self.pad, self.f_min, self.f_max, self.n_mels = \
            16000, 2048, 100, 50, 25, 7500, 160
        self.tf_0 = self.create_tf(250)
        self.tf_1 = self.create_tf(500)
        self.tf_2 = self.create_tf(750)
        self.tf_3 = self.create_tf(1000)
        self.cali = torch.linspace(-0.5, 0.5, steps=160, device=DEVICE).view(1, -1, 1)

    def create_tf(self, win_length):
        tf = nn.Sequential(
            AT.MelSpectrogram(sample_rate=self.sr,
                              n_fft=self.n_fft,
                              win_length=win_length,
                              hop_length=self.hop,
                              pad=self.pad,
                              f_min=self.f_min,
                              f_max=self.f_max,
                              n_mels=self.n_mels),
            AT.AmplitudeToDB()
        )
        return tf

    def forward(self, x):
        with torch.no_grad():
            spec_0 = self.tf_0(x)[0, :, 1:-1]
            spec_1 = self.tf_1(x)[0, :, 1:-1]
            spec_2 = self.tf_2(x)[0, :, 1:-1]
            spec_3 = self.tf_3(x)[0, :, 1:-1]
            out = torch.stack([spec_0, spec_1, spec_2, spec_3], dim=0)
            out = (out - out.mean(dim=[1, 2], keepdim=True)) / 20 + self.cali
            return out

extractor = MRMS().to(DEVICE)

In [None]:
for file_name in tqdm(np.sort(glob('./Data/train/*.wav'))):
    x, _ = torchaudio.load(file_name)
    x = x.to(DEVICE)
    spec = extractor(x)
    name = './Cache/train/' + file_name.split('/')[-1].split('.')[0] + '.pt'
    torch.save(spec.to('cpu'), name)

for file_name in tqdm(np.sort(glob('./Data/test/*.wav'))):
    x, _ = torchaudio.load(file_name)
    x = x.to(DEVICE)
    spec = extractor(x)
    name = './Cache/test/' + file_name.split('/')[-1].split('.')[0] + '.pt'
    torch.save(spec.to('cpu'), name)

In [None]:
# 데이터 Augmentation
class CustomDataset(Dataset):
    def __init__(self, file_list, gt_list, augmentation):
        self.file_list = file_list
        self.gt_list = gt_list
        self.augmentation = augmentation
        self.spec_augment = nn.Sequential(
            AT.FrequencyMasking(32, False),
            AT.TimeMasking(12, False),
            AT.TimeMasking(12, False),
        )

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        with torch.no_grad():
            x = torch.load(self.file_list[index])
            gt = self.gt_list[index]

            if self.augmentation:
                x = self.spec_augment(x)
                i, j = random.randrange(64), random.randrange(64)
                x = F.pad(x, [32, 32, 32, 32])
                x = x[:, i:i + 160, j:j + 160]

                if random.random() > 0.25:
                    mixup_lambda = random.uniform(0.05, 0.25)
                    mixup_index = random.randrange(self.__len__())
                    mixup_x = torch.load(self.file_list[mixup_index])
                    mixup_gt = self.gt_list[mixup_index]
                    x = (1 - mixup_lambda) * x + mixup_lambda * mixup_x
                    gt = (1 - mixup_lambda) * gt + mixup_lambda * mixup_gt
            return x, gt

In [None]:
# 학습 파라미터
N_MODEL = 16
N_EPOCH = 200
BATCH_SIZE = 128
MODEL_FACTOR = 24
LEARNING_RATE = 0.2
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0001
LOADER_PARAM = {
    'batch_size': BATCH_SIZE,
    'num_workers': 3,
    'pin_memory': True
}

In [None]:
# Data Loader
train_x = np.sort(glob('./Cache/train/*.pt'))
test_x = np.sort(glob('./Cache/test/*.pt'))
train_y = torch.tensor(pd.read_csv('./Data/train_answer.csv')
                       .to_numpy()[:, 1:], dtype=torch.float32)

train_loader = DataLoader(CustomDataset(train_x, train_y, augmentation=True),
                          shuffle=True, drop_last=True, **LOADER_PARAM)
test_loader = DataLoader(CustomDataset(test_x, list(range(10000)), augmentation=False),
                         shuffle=False, drop_last=False, **LOADER_PARAM)

In [None]:
# 모델
class bn_relu_conv(nn.Module):

    def __init__(self, ks, n_in, n_out):
        super(bn_relu_conv, self).__init__()
        self.bn = nn.BatchNorm2d(n_in)
        self.relu = nn.LeakyReLU(0.1)
        self.conv = nn.Conv2d(n_in, n_out, kernel_size=ks, padding=ks // 2)

    def forward(self, x):
        return self.conv(self.relu(self.bn(x)))


class MAC(nn.Module):

    def __init__(self, pool_method):
        super(MAC, self).__init__()
        self.factor = MODEL_FACTOR
        self.pool = nn.AvgPool2d(2) if pool_method == 'Avg' else nn.MaxPool2d(2)
        self.initialize = nn.Conv2d(4, 2 * self.factor, 6, stride=2, padding=2)
        self.lg_0 = nn.Sequential(
            bn_relu_conv(1, 2 * self.factor, self.factor),
            bn_relu_conv(3, self.factor, self.factor),
            bn_relu_conv(1, self.factor, self.factor),
            bn_relu_conv(3, self.factor, self.factor)
        )
        self.lg_1 = nn.Sequential(
            bn_relu_conv(1, self.factor, 2 * self.factor),
            bn_relu_conv(3, 2 * self.factor, 2 * self.factor),
            bn_relu_conv(1, 2 * self.factor, 2 * self.factor),
            bn_relu_conv(3, 2 * self.factor, 2 * self.factor)
        )
        self.lg_2 = nn.Sequential(
            bn_relu_conv(1, 2 * self.factor, 4 * self.factor),
            bn_relu_conv(3, 4 * self.factor, 4 * self.factor),
            bn_relu_conv(1, 4 * self.factor, 4 * self.factor),
            bn_relu_conv(3, 4 * self.factor, 4 * self.factor)
        )
        self.lg_3 = nn.Sequential(
            bn_relu_conv(1, 4 * self.factor, 6 * self.factor),
            bn_relu_conv(3, 6 * self.factor, 6 * self.factor),
            bn_relu_conv(1, 6 * self.factor, 6 * self.factor),
            bn_relu_conv(3, 6 * self.factor, 6 * self.factor)
        )
        self.lg_4 = nn.Sequential(
            bn_relu_conv(1, 6 * self.factor, 8 * self.factor),
            bn_relu_conv(3, 8 * self.factor, 8 * self.factor),
            bn_relu_conv(1, 8 * self.factor, 8 * self.factor),
            bn_relu_conv(3, 8 * self.factor, 8 * self.factor)
        )
        self.finalize = nn.Sequential(
            bn_relu_conv(1, 8 * self.factor, 16 * self.factor),
            nn.AdaptiveAvgPool2d(1)
        ) if pool_method == 'Avg' else nn.Sequential(
            bn_relu_conv(1, 8 * self.factor, 16 * self.factor),
            nn.AdaptiveMaxPool2d(1)
        )

        self.dense = nn.Sequential(
            nn.Linear(16 * self.factor, 8 * self.factor),
            nn.Dropout(0.25),
            nn.BatchNorm1d(8 * self.factor),
            nn.LeakyReLU(0.25),
            nn.Linear(8 * self.factor, 30)
        )

    def forward(self, x):
        x = self.initialize(x)
        x = self.pool(self.lg_0(x))
        x = self.pool(self.lg_1(x))
        x = self.pool(self.lg_2(x))
        x = self.pool(self.lg_3(x))
        x = self.finalize(self.lg_4(x))
        x = self.dense(x.view(x.shape[0], -1))
        return F.log_softmax(x, dim=1)

In [None]:
# Training Method
def train(model_no, pool_method):
    model = MAC(pool_method).to(DEVICE)
    criterion = nn.KLDivLoss(reduction='batchmean')
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE,
                          momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=N_EPOCH, eta_min=LEARNING_RATE / 40)
    scaler = GradScaler()

    for epoch in range(N_EPOCH):
        model.train()
        running_loss, running_count = 0., 0

        for i, (xx, yy) in tqdm(enumerate(train_loader), leave=False):
            optimizer.zero_grad()

            with autocast():
                xx, yy = xx.to(DEVICE), yy.to(DEVICE)
                pred = model(xx)
                loss = criterion(pred, yy)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step(epoch + i / len(train_loader))

            with torch.no_grad():
                running_loss += loss.item() * len(yy)
                running_count += len(yy)

        print('{}Pool Model {:01d} Epoch {:03d} | Train {:7.5f}'
              .format(pool_method, model_no + 1, epoch + 1, running_loss / running_count))

    model.eval()
    prediction = torch.zeros((10000, 30), dtype=torch.float32)

    with torch.no_grad():
        for idx, (xx, _) in enumerate(test_loader):
            xx = xx.to(DEVICE)
            pred = model(xx).detach().exp().to('cpu')
            prediction[BATCH_SIZE * idx:min(BATCH_SIZE * (idx + 1), len(prediction))] = pred[:, :]

    df = pd.read_csv('./Data/submission.csv')
    df.iloc[:, 1:] = prediction.numpy()
    df.to_csv('./SubResult/{}{:01d}.csv'.format(pool_method, model_no + 1), index=False)

In [None]:
# Train
for m in range(N_MODEL // 2):
    train(m, 'Avg')
    train(m, 'Max')

out = np.zeros((10000, 30), dtype=np.float32)
file_list = glob('./SubResult/*.csv')

for file_name in file_list:
    print(file_name)
    out += pd.read_csv(file_name).to_numpy()[:, 1:]
out /= len(file_list)

df = pd.read_csv('./Data/submission.csv')
df.iloc[:, 1:] = out
df.to_csv('./PredictionFinal.csv', index=False)