## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import librosa
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score

from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


# Hyperparameters

In [126]:
EPOCHS = 1000
LR = 1e-1
SR = 16000
SEED = 42
N_MFCC = 128
BATCH = 256
device = 'cuda'
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED) # Seed 고정

## Data Pre-processing

In [3]:
path = 'open/'
train_df = pd.read_csv(path+'train.csv') # 모두 정상 Sample
test_df = pd.read_csv(path+'test.csv')

In [4]:
train_df

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0
...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0


In [5]:
def get_mfcc_feature(df):
    features = []
    features2 = []
    for path in tqdm(df['SAMPLE_PATH']):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=SR)
        
        # melspectrogram
        mels = librosa.feature.melspectrogram(y, sr=sr, n_mels=N_MFCC)
        mels = librosa.power_to_db(mels, ref=np.max)
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC)
        
        y_feature2 = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mels:
            y_feature2.append(np.mean(e))
        features2.append(y_feature2)
        
        y_feature = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mfcc:
            y_feature.append(np.mean(e))
        features.append(y_feature)
    return features, features2

In [6]:
%%time

train_features, train_features2 = get_mfcc_feature(train_df)
test_features, test_features2 = get_mfcc_feature(test_df)

100%|██████████████████████████████████████████████████████████████████████████████| 1279/1279 [00:32<00:00, 39.07it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1514/1514 [00:38<00:00, 39.42it/s]

CPU times: total: 1min 27s
Wall time: 1min 11s





In [7]:
# 반복문으로 전체 컬럼명 변경하기
def rename(df):
    flag = 0
    for col_name in df.columns:
        if col_name == 0:
            flag = 1
        if flag == 1:
            df.rename(columns = {col_name : 128+col_name}, inplace = True)
    return df

In [8]:
tmp = pd.concat([train_df, pd.DataFrame(train_features)], axis=1)
tmp = rename(tmp)
tmp = pd.concat([tmp, pd.DataFrame(train_features2)], axis=1)

test = pd.concat([test_df, pd.DataFrame(test_features)], axis=1)
test = rename(test)
test = pd.concat([test, pd.DataFrame(test_features2)], axis=1)

scaler = MinMaxScaler()
tmp.iloc[:,range(4,len(tmp.columns))] = scaler.fit_transform(tmp.iloc[:,range(4,len(tmp.columns))])
test.iloc[:,range(3,len(test.columns))] = scaler.transform(test.iloc[:,range(3,len(test.columns))])
tmp

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL,128,129,130,131,132,133,...,118,119,120,121,122,123,124,125,126,127
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0,0.952273,0.284621,0.295843,0.363586,0.357530,0.294327,...,0.833846,0.842291,0.818590,0.817896,0.799984,0.799645,0.811261,0.816584,0.825692,0.805742
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0,0.081981,0.935459,0.514880,0.613406,0.691659,0.825306,...,0.148846,0.144862,0.149996,0.165012,0.156853,0.160930,0.163661,0.162874,0.158788,0.084692
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0,0.240260,0.664368,0.724483,0.354632,0.625930,0.695975,...,0.454258,0.520886,0.454087,0.465858,0.423989,0.497721,0.504808,0.502295,0.487844,0.409240
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0,0.943729,0.295295,0.312391,0.371455,0.329344,0.268686,...,0.808817,0.827032,0.815166,0.787913,0.779337,0.781981,0.771998,0.779565,0.796365,0.798277
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0,0.949608,0.188729,0.179787,0.154144,0.007228,0.055841,...,0.877542,0.909439,0.894097,0.863824,0.849838,0.867351,0.865360,0.861068,0.868706,0.875541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0,0.964989,0.096119,0.129592,0.222531,0.039493,0.075616,...,0.857574,0.875432,0.862135,0.853636,0.852105,0.854758,0.852087,0.851688,0.860304,0.881918
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0,0.959506,0.283203,0.293961,0.390142,0.322176,0.239990,...,0.842018,0.853391,0.822730,0.825484,0.822317,0.809976,0.820805,0.835689,0.836974,0.819587
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0,0.930908,0.223856,0.210020,0.151861,0.134018,0.149416,...,0.918239,0.932157,0.898576,0.905386,0.894843,0.879353,0.900221,0.910439,0.903620,0.895840
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0,0.932890,0.247222,0.265837,0.223214,0.170546,0.141445,...,0.922436,0.946884,0.920099,0.916849,0.900980,0.903257,0.919087,0.922190,0.920950,0.932029


In [9]:
tmp.dropna()

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL,128,129,130,131,132,133,...,118,119,120,121,122,123,124,125,126,127
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0,0.952273,0.284621,0.295843,0.363586,0.357530,0.294327,...,0.833846,0.842291,0.818590,0.817896,0.799984,0.799645,0.811261,0.816584,0.825692,0.805742
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0,0.081981,0.935459,0.514880,0.613406,0.691659,0.825306,...,0.148846,0.144862,0.149996,0.165012,0.156853,0.160930,0.163661,0.162874,0.158788,0.084692
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0,0.240260,0.664368,0.724483,0.354632,0.625930,0.695975,...,0.454258,0.520886,0.454087,0.465858,0.423989,0.497721,0.504808,0.502295,0.487844,0.409240
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0,0.943729,0.295295,0.312391,0.371455,0.329344,0.268686,...,0.808817,0.827032,0.815166,0.787913,0.779337,0.781981,0.771998,0.779565,0.796365,0.798277
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0,0.949608,0.188729,0.179787,0.154144,0.007228,0.055841,...,0.877542,0.909439,0.894097,0.863824,0.849838,0.867351,0.865360,0.861068,0.868706,0.875541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0,0.964989,0.096119,0.129592,0.222531,0.039493,0.075616,...,0.857574,0.875432,0.862135,0.853636,0.852105,0.854758,0.852087,0.851688,0.860304,0.881918
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0,0.959506,0.283203,0.293961,0.390142,0.322176,0.239990,...,0.842018,0.853391,0.822730,0.825484,0.822317,0.809976,0.820805,0.835689,0.836974,0.819587
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0,0.930908,0.223856,0.210020,0.151861,0.134018,0.149416,...,0.918239,0.932157,0.898576,0.905386,0.894843,0.879353,0.900221,0.910439,0.903620,0.895840
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0,0.932890,0.247222,0.265837,0.223214,0.170546,0.141445,...,0.922436,0.946884,0.920099,0.916849,0.900980,0.903257,0.919087,0.922190,0.920950,0.932029


In [10]:
cols = tmp.columns.drop(['SAMPLE_ID', 'SAMPLE_PATH', 'LABEL'])
cols

Index(['FAN_TYPE',        128,        129,        130,        131,        132,
              133,        134,        135,        136,
       ...
              118,        119,        120,        121,        122,        123,
              124,        125,        126,        127],
      dtype='object', length=257)

## Model Fit

In [11]:
class CustomDataset(Dataset):
    def __init__(self, df, eval_mode):
        self.eval_mode = eval_mode
        if self.eval_mode:
            self.labels = df['LABEL'].values
        self.df = df[cols].values
        
    def __getitem__(self, index):
        if self.eval_mode:
            self.x = self.df[index]
            self.y = self.labels[index]
            return torch.Tensor(self.x), self.y
        else:
            self.x = self.df[index]
            return torch.Tensor(self.x)
        
    def __len__(self):
        return len(self.df)

In [12]:
train_df, val_df = train_test_split(tmp, test_size=0.1, random_state=SEED)

In [13]:
train_dataset = CustomDataset(df=train_df, eval_mode=False)
train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True)

val_dataset = CustomDataset(df = val_df, eval_mode=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH, shuffle=False)

In [178]:
class ResBlock(nn.Module):
    def __init__(self, block):
        super().__init__()
        self.block = block
    def forward(self, x):
        return self.block(x) + x #f(x) + x
    
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.d = nn.Sequential(
            nn.Linear(257, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(),
            
            ResBlock(
                nn.Sequential(
                    nn.Linear(1024, 1024),
                    nn.BatchNorm1d(1024),
                    nn.LeakyReLU(),
                    
                    nn.Linear(1024, 1024),
                    nn.BatchNorm1d(1024),
                    nn.LeakyReLU(),
                )
            ),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            
            nn.Linear(512, 257),
        )

    def forward(self, x):
        x = self.d(x)
        return x
    
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.g = nn.Sequential(
            nn.Linear(257, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            ResBlock(
                nn.Sequential(
                    nn.Linear(512, 512),
                    nn.BatchNorm1d(512),
                    nn.LeakyReLU(),
                )
            ),
            nn.Linear(512, 257),
        )

    def forward(self, x):
        x = self.g(x)
        return x

In [210]:
class Trainer():
    def __init__(self, discriminator, generator, d_optimizer, g_optimizer, train_loader, val_loader, device):
        self.discriminator = discriminator
        self.generator = generator
        
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        
        self.train_loader = train_loader
        self.val_loader = val_loader
        
        self.device = device
        
        # Loss Function
        self.criterion = nn.KLDivLoss(reduction='batchmean', log_target=True).to(self.device)
        # self.criterion = nn.L1Loss().to(self.device)
        
    def fit(self):
        best_score = 0
        for epoch in range(EPOCHS):
            gloss,dloss = [], []
            for x in iter(self.train_loader):
                # Initialize grad
                self.g_optimizer.zero_grad()
                self.d_optimizer.zero_grad()
                
                x = x.float().to(self.device)
                # make ground truth (labels) -> 1 for real, 0 for fake
                # real_label = torch.full((BATCH, 1), 0, dtype=torch.float32).to(device)
                # fake_label = torch.full((len(x), 257), 1, dtype=torch.float32).to(device)
                
                # 
                # Generatior
                # 
                z = torch.randn(len(x), 257).to(device)
                fake_images = self.generator(z)
                
                g_loss = self.criterion(F.log_softmax(self.discriminator(fake_images),dim=1), 
                                        F.log_softmax(x,dim=1))
                
                # backpropagation를 통해 generator 학습
                g_loss.backward()
                self.g_optimizer.step()
                
                # 
                # Discriminator
                # 
                self.d_optimizer.zero_grad()
                self.g_optimizer.zero_grad()
                
                z = torch.randn(len(x), 257).to(device)
                fake_images = self.generator(z)
                
                fake_loss = self.criterion(F.log_softmax(self.discriminator(fake_images),dim=1), 
                                           F.log_softmax(fake_images,dim=1))
                real_loss = self.criterion(F.log_softmax(self.discriminator(x),dim=1),
                                           F.log_softmax(x,dim=1))
                
                d_loss = (fake_loss + real_loss) / 2
                d_loss.backward()
                self.d_optimizer.step()
                
                gloss.append(g_loss.item())
                dloss.append(d_loss.item())
                
            score = self.validation(self.discriminator, self.generator)
            print(f'Epoch : [{epoch}] Sum loss : [{np.mean(gloss) + np.mean(dloss)}] Val Score : [{score}])')

            if best_score < score:
                best_score = score
                torch.save(self.generator.state_dict(), './best_model.pth', _use_new_zipfile_serialization=False)
                            
    def validation(self, discriminator, generator, thr=0.98):
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        pred = []
        true = []
        with torch.no_grad():
            for x, y in iter(self.val_loader):
                x = x.float().to(self.device)
                _x = self.generator(x)
                log_target = F.log_softmax(_x, dim=1)
                log_input = F.log_softmax(x, dim=1)
                # print(log_input, log_target)
                diff = cos(log_input, log_target).cpu().tolist()
                print(diff)
                batch_pred = np.where(np.array(diff) > thr, 0, 1).tolist()
                # pint(batch_pred)
                pred += batch_pred
                true += y.tolist()

        return f1_score(true, pred, average='macro')

In [211]:
discriminator = Discriminator().to(device)
generator = Generator().to(device)

d_optimizer = optim.SGD(discriminator.parameters(), lr=LR)
g_optimizer = optim.SGD(generator.parameters(), lr=LR)

trainer = Trainer(discriminator, generator, d_optimizer, g_optimizer, train_loader, val_loader, device)
trainer.fit()

[0.9924969673156738, 0.992084264755249, 0.9926764369010925, 0.9928351640701294, 0.9946681261062622, 0.9877535104751587, 0.9919318556785583, 0.990973711013794, 0.9923294186592102, 0.9932342767715454, 0.9952181577682495, 0.9925341606140137, 0.9934883117675781, 0.9919187426567078, 0.9952178001403809, 0.994168758392334, 0.9955893158912659, 0.9914930462837219, 0.9938791394233704, 0.9849106669425964, 0.9917218685150146, 0.9879419803619385, 0.9930429458618164, 0.9912294149398804, 0.9904138445854187, 0.9920161962509155, 0.992999792098999, 0.9935943484306335, 0.9931430220603943, 0.9906361103057861, 0.9944208860397339, 0.9827194213867188, 0.9902018904685974, 0.9908474087715149, 0.9925801753997803, 0.9956003427505493, 0.993108332157135, 0.9873900413513184, 0.9923640489578247, 0.9941469430923462, 0.993045449256897, 0.9921786785125732, 0.9941480159759521, 0.9925534725189209, 0.9844775199890137, 0.9884722232818604, 0.9912950396537781, 0.986769437789917, 0.9958282709121704, 0.9904835224151611, 0.9945

## Prediction

In [212]:
test_dataset = CustomDataset(test, False)
test_loader = DataLoader(test_dataset, batch_size=BATCH, shuffle=False)

In [231]:
def prediction(generator, test_loader, device, thr=0.997):
    generator.to(device)
    generator.eval()
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    pred = []
    with torch.no_grad():
        for x in iter(test_loader):
            x = x.float().to(device)
            _x = generator(x)
            log_target = F.log_softmax(_x, dim=1)
            log_input = F.log_softmax(x, dim=1)
                
            diff = cos(log_input, log_target).cpu().tolist()
            print(diff)
            batch_pred = np.where(np.array(diff) > thr, 0, 1).tolist()
            pred += batch_pred
    return pred

In [232]:
preds = prediction(generator, test_loader, device)
sum(preds)

[0.9930853843688965, 0.9945313930511475, 0.9969683289527893, 0.9929805994033813, 0.9974288940429688, 0.9979478716850281, 0.9978541135787964, 0.9971587657928467, 0.9970815181732178, 0.9954466819763184, 0.9968195557594299, 0.9952965378761292, 0.9927602410316467, 0.9932861328125, 0.9977220296859741, 0.994010329246521, 0.9942312240600586, 0.9964574575424194, 0.9929571151733398, 0.9936128854751587, 0.9934980869293213, 0.9930330514907837, 0.9969207048416138, 0.9901865720748901, 0.9947214722633362, 0.9934585690498352, 0.9917967915534973, 0.993861198425293, 0.9970600605010986, 0.9979257583618164, 0.9979775547981262, 0.9913318157196045, 0.9978014230728149, 0.9941408038139343, 0.9937984943389893, 0.9956027865409851, 0.9978523254394531, 0.9969591498374939, 0.9967849254608154, 0.9976922869682312, 0.9970587491989136, 0.9977856874465942, 0.9940472841262817, 0.9972249269485474, 0.9892908334732056, 0.9976685047149658, 0.9971864223480225, 0.993069052696228, 0.9945075511932373, 0.9919261932373047, 0.997

984

## Submission

In [233]:
submit = pd.read_csv(path+'sample_submission.csv')

In [234]:
submit['LABEL'] = preds
submit

Unnamed: 0,SAMPLE_ID,LABEL
0,TEST_0000,1
1,TEST_0001,1
2,TEST_0002,1
3,TEST_0003,1
4,TEST_0004,0
...,...,...
1509,TEST_1509,1
1510,TEST_1510,1
1511,TEST_1511,0
1512,TEST_1512,0


In [235]:
submit.to_csv('submit.csv', index=False)

In [236]:
submit['LABEL'].sum()

984