## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import librosa
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score

from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


# Hyperparameters

In [2]:
EPOCHS = 1000
LR = 1e-1
SR = 16000
SEED = 42
N_MFCC = 128
BATCH = 256
device = 'cuda'
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED) # Seed 고정

## Data Pre-processing

In [3]:
path = 'open/'
train_df = pd.read_csv(path+'train.csv') # 모두 정상 Sample
test_df = pd.read_csv(path+'test.csv')

In [4]:
train_df

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0
...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0


In [5]:
def get_mfcc_feature(df):
    features = []
    features2 = []
    for path in tqdm(df['SAMPLE_PATH']):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=SR)
        
        # melspectrogram
        mels = librosa.feature.melspectrogram(y, sr=sr, n_mels=N_MFCC)
        mels = librosa.power_to_db(mels, ref=np.max)
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC)
        
        y_feature2 = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mels:
            y_feature2.append(np.mean(e))
        features2.append(y_feature2)
        
        y_feature = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mfcc:
            y_feature.append(np.mean(e))
        features.append(y_feature)
    return features, features2

In [6]:
%%time

train_features, train_features2 = get_mfcc_feature(train_df)
test_features, test_features2 = get_mfcc_feature(test_df)

100%|██████████████████████████████████████████████████████████████████████████████| 1279/1279 [00:27<00:00, 46.36it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1514/1514 [00:32<00:00, 46.15it/s]

CPU times: total: 1min 21s
Wall time: 1min





In [7]:
# 반복문으로 전체 컬럼명 변경하기
def rename(df):
    flag = 0
    for col_name in df.columns:
        if col_name == 0:
            flag = 1
        if flag == 1:
            df.rename(columns = {col_name : 128+col_name}, inplace = True)
    return df

In [8]:
tmp = pd.concat([train_df, pd.DataFrame(train_features)], axis=1)
tmp = rename(tmp)
tmp = pd.concat([tmp, pd.DataFrame(train_features2)], axis=1)

test = pd.concat([test_df, pd.DataFrame(test_features)], axis=1)
test = rename(test)
test = pd.concat([test, pd.DataFrame(test_features2)], axis=1)

scaler = MinMaxScaler()
tmp.iloc[:,range(4,len(tmp.columns))] = scaler.fit_transform(tmp.iloc[:,range(4,len(tmp.columns))])
test.iloc[:,range(3,len(test.columns))] = scaler.transform(test.iloc[:,range(3,len(test.columns))])
tmp

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL,128,129,130,131,132,133,...,118,119,120,121,122,123,124,125,126,127
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0,0.952273,0.284621,0.295843,0.363586,0.357530,0.294327,...,0.833846,0.842291,0.818590,0.817896,0.799984,0.799645,0.811261,0.816584,0.825692,0.805742
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0,0.081981,0.935459,0.514880,0.613406,0.691659,0.825306,...,0.148846,0.144862,0.149996,0.165012,0.156853,0.160930,0.163661,0.162874,0.158788,0.084692
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0,0.240260,0.664368,0.724483,0.354632,0.625930,0.695975,...,0.454258,0.520886,0.454087,0.465858,0.423989,0.497721,0.504808,0.502295,0.487844,0.409240
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0,0.943729,0.295295,0.312391,0.371455,0.329344,0.268686,...,0.808817,0.827032,0.815166,0.787913,0.779337,0.781981,0.771998,0.779565,0.796365,0.798277
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0,0.949608,0.188729,0.179787,0.154144,0.007228,0.055841,...,0.877542,0.909439,0.894097,0.863824,0.849838,0.867351,0.865360,0.861068,0.868706,0.875541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0,0.964989,0.096119,0.129592,0.222531,0.039493,0.075616,...,0.857574,0.875432,0.862135,0.853636,0.852105,0.854758,0.852087,0.851688,0.860304,0.881918
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0,0.959506,0.283203,0.293961,0.390142,0.322176,0.239990,...,0.842018,0.853391,0.822730,0.825484,0.822317,0.809976,0.820805,0.835689,0.836974,0.819587
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0,0.930908,0.223856,0.210020,0.151861,0.134018,0.149416,...,0.918239,0.932157,0.898576,0.905386,0.894843,0.879353,0.900221,0.910439,0.903620,0.895840
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0,0.932890,0.247222,0.265837,0.223214,0.170546,0.141445,...,0.922436,0.946884,0.920099,0.916849,0.900980,0.903257,0.919087,0.922190,0.920950,0.932029


In [9]:
tmp.dropna()

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL,128,129,130,131,132,133,...,118,119,120,121,122,123,124,125,126,127
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0,0.952273,0.284621,0.295843,0.363586,0.357530,0.294327,...,0.833846,0.842291,0.818590,0.817896,0.799984,0.799645,0.811261,0.816584,0.825692,0.805742
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0,0.081981,0.935459,0.514880,0.613406,0.691659,0.825306,...,0.148846,0.144862,0.149996,0.165012,0.156853,0.160930,0.163661,0.162874,0.158788,0.084692
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0,0.240260,0.664368,0.724483,0.354632,0.625930,0.695975,...,0.454258,0.520886,0.454087,0.465858,0.423989,0.497721,0.504808,0.502295,0.487844,0.409240
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0,0.943729,0.295295,0.312391,0.371455,0.329344,0.268686,...,0.808817,0.827032,0.815166,0.787913,0.779337,0.781981,0.771998,0.779565,0.796365,0.798277
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0,0.949608,0.188729,0.179787,0.154144,0.007228,0.055841,...,0.877542,0.909439,0.894097,0.863824,0.849838,0.867351,0.865360,0.861068,0.868706,0.875541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0,0.964989,0.096119,0.129592,0.222531,0.039493,0.075616,...,0.857574,0.875432,0.862135,0.853636,0.852105,0.854758,0.852087,0.851688,0.860304,0.881918
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0,0.959506,0.283203,0.293961,0.390142,0.322176,0.239990,...,0.842018,0.853391,0.822730,0.825484,0.822317,0.809976,0.820805,0.835689,0.836974,0.819587
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0,0.930908,0.223856,0.210020,0.151861,0.134018,0.149416,...,0.918239,0.932157,0.898576,0.905386,0.894843,0.879353,0.900221,0.910439,0.903620,0.895840
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0,0.932890,0.247222,0.265837,0.223214,0.170546,0.141445,...,0.922436,0.946884,0.920099,0.916849,0.900980,0.903257,0.919087,0.922190,0.920950,0.932029


In [10]:
cols = tmp.columns.drop(['SAMPLE_ID', 'SAMPLE_PATH', 'LABEL'])
cols

Index(['FAN_TYPE',        128,        129,        130,        131,        132,
              133,        134,        135,        136,
       ...
              118,        119,        120,        121,        122,        123,
              124,        125,        126,        127],
      dtype='object', length=257)

## Model Fit

In [11]:
class CustomDataset(Dataset):
    def __init__(self, df, eval_mode):
        self.eval_mode = eval_mode
        if self.eval_mode:
            self.labels = df['LABEL'].values
        self.df = df[cols].values
        
    def __getitem__(self, index):
        if self.eval_mode:
            self.x = self.df[index]
            self.y = self.labels[index]
            return torch.Tensor(self.x).reshape(-1,1), self.y
        else:
            self.x = self.df[index]
            return torch.Tensor(self.x).reshape(-1,1)
        
    def __len__(self):
        return len(self.df)

In [12]:
class EarlyStopping:
    def __init__(self, tolerance=5, min_delta=0):
        self.tolerance = tolerance
        self.min_delta = min_delta
        self.counter = 0
        self.early_stop = False
        self.min_loss = np.inf

    def __call__(self, train_loss, validation_loss=None):
        if train_loss < self.min_loss:
            self.counter = 0
            self.min_loss = train_loss
            print(f'counter : set 0 min loss : {self.min_loss}')
        elif train_loss > self.min_loss:
            self.counter += 1
            print(f'counter : {self.counter}')
        if self.counter >= self.tolerance:  
            self.early_stop = True

In [13]:
train_df, val_df = train_test_split(tmp, test_size=0.1, random_state=SEED)

In [14]:
train_dataset = CustomDataset(df=train_df, eval_mode=False)
train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True)

val_dataset = CustomDataset(df = val_df, eval_mode=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH, shuffle=False)

In [15]:
class Trainer():
    def __init__(self, model, optimizer, train_loader, val_loader, scheduler, device):
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.scheduler = scheduler
        self.device = device
        self.early_stopping = EarlyStopping(tolerance=100, min_delta=10)
        
        # Loss Function
        self.criterion = nn.KLDivLoss(reduction='batchmean', log_target=True).to(self.device)
        # self.criterion = nn.L1Loss().to(self.device)
        
    def fit(self):
        self.model.to(self.device)
        best_score = 0
        for epoch in range(EPOCHS):
            self.model.train()
            train_loss = []
            for x in iter(self.train_loader):
                self.optimizer.zero_grad()
                x = x.float().to(self.device)
                _x = self.model(x)

                log_target = F.log_softmax(_x, dim=1)
                log_input = F.log_softmax(x.reshape(-1,257), dim=1)
                
                loss = self.criterion(log_input, log_target)
                loss.backward()
                self.optimizer.step()
                
                train_loss.append(loss.item())
            
            score = self.validation(self.model)
            print(f'Epoch : [{epoch}] Train loss : [{np.mean(train_loss)}] Val Score : [{score}])')
            
            if best_score < score:
                best_score = score
                torch.save(model.state_dict(), './best_model.pth', _use_new_zipfile_serialization=False)
                
            # early stopping
            self.early_stopping(np.mean(train_loss))
            if self.early_stopping.early_stop:
                print("early_stopping:", epoch)
                break
            
            if self.scheduler is not None:
                self.scheduler.step(score)
                            
    def validation(self, eval_model, thr=0.999):
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        eval_model.eval()
        pred = []
        true = []
        with torch.no_grad():
            for x, y in iter(self.val_loader):
                x = x.float().to(self.device)
                _x = self.model(x)
                
                log_target = F.log_softmax(_x, dim=1)
                log_input = F.log_softmax(x.reshape(-1,257), dim=1)
                
                diff = cos(log_input, log_target).cpu().tolist()
                # print(diff)
                batch_pred = np.where(np.array(diff) > thr, 0, 1).tolist()

                pred += batch_pred
                true += y.tolist()

        return f1_score(true, pred, average='macro')

In [70]:
class ResBlock(nn.Module):
    def __init__(self, block):
        super().__init__()
        self.block = block
    def forward(self, x):
        return self.block(x) + x #f(x) + x
    
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        def Conv(in_channels, out_channels):
            layers = []
            layers += [nn.Conv1d(in_channels = in_channels, out_channels = out_channels, 
                                 kernel_size = 1, stride = 1)]
            layers += [nn.BatchNorm1d(num_features = out_channels)]
            layers += [nn.GELU()]
            return nn.Sequential(*layers)
        in_channels, out_channels = out_channels, 
                                 kernel_size = 1, stride = 1)filters[0]),
            nn.in_channels, out_channels = out_channels, 
                                 kernel_size = 1, stride = 1self.input_skip = nn.Sequential(
            nn.Conv
        self.Encoder = nn.Sequential(
            Conv(257,512),
            nn.MaxPool1d(kernel_size=1, stride=1),
            Conv(512,512),
            nn.MaxPool1d(kernel_size=1, stride=1),
            
            Conv(512,1024),
            nn.MaxPool1d(kernel_size=1, stride=1),
            Conv(1024,1024),
        )
        self.pool1 = nn.MaxPool1d(kernel_size=1, stride=1)
        self.unpool1 = nn.ConvTranspose1d(in_channels=1024, out_channels=1024, kernel_size=1, stride=1)

        self.Decoder1 = nn.Sequential(
            Conv(2048,1024),
            nn.MaxPool1d(kernel_size=1, stride=1),
            Conv(1024,512),
        )
        self.pool2 = nn.MaxPool1d(kernel_size=1, stride=1)
        self.unpool2 = nn.ConvTranspose1d(in_channels=512, out_channels=512, kernel_size=1, stride=1)
        
        self.Decoder2 = nn.Sequential(
            Conv(1024,512),
            nn.MaxPool1d(kernel_size=1, stride=1),
            Conv(512,512),
        )
        self.pool3 = nn.MaxPool1d(kernel_size=1, stride=1)
        self.unpool3 = nn.ConvTranspose1d(in_channels=512, out_channels=512, kernel_size=1, stride=1)
        
        self.flat = nn.Flatten()
        self.fc = nn.Conv1d(in_channels = 1024, out_channels = 257, kernel_size = 1, stride = 1)
        
    def forward(self, x):
        self_x = self.Encoder(x)
        cat1 = torch.cat((self.unpool1(_x),self.pool1(_x)), dim=1)
        
        _x = self.Decoder1(cat1)
        cat2 = torch.cat((self.unpool2(_x),self.pool2(_x)), dim=1)
        
        _x = self.Decoder2(cat2)
        cat3 = torch.cat((self.unpool3(_x),self.pool3(_x)), dim=1)
        
        x = self.flat(self.fc(cat3))
        return x

In [71]:
model = CNN()
model.eval()
optimizer = optim.SGD(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10, threshold_mode='abs', min_lr=1e-12, verbose=True)

trainer = Trainer(model, optimizer, train_loader, val_loader, scheduler, device)
trainer.fit()

torch.Size([256, 512, 1])


RuntimeError: The size of tensor a (512) must match the size of tensor b (256) at non-singleton dimension 1

In [72]:
model = CNN()
model.load_state_dict(torch.load('./best_model.pth'))
model.eval()

CNN(
  (Encoder): Sequential(
    (0): Sequential(
      (0): Conv1d(257, 512, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate='none')
    )
    (1): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (2): Sequential(
      (0): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate='none')
    )
    (3): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate='none')
    )
    (5): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (6): Sequential(
      (0): Conv1d(1024, 1024, kernel_size=(1,), strid

## Prediction

In [73]:
test_dataset = CustomDataset(test, False)
test_loader = DataLoader(test_dataset, batch_size=BATCH, shuffle=False)

In [74]:
def prediction(model, test_loader, device, thr=0.999):
    model.to(device)
    model.eval()
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    pred = []
    with torch.no_grad():
        for x in iter(test_loader):
            x = x.float().to(device)
            _x = model(x)
            log_target = F.log_softmax(_x, dim=1)
            log_input = F.log_softmax(x.reshape(-1,257), dim=1)
                
            diff = cos(log_input, log_target).cpu().tolist()
            print(diff)
            batch_pred = np.where(np.array(diff) > thr, 0, 1).tolist()
            pred += batch_pred
    return pred

In [75]:
preds = prediction(model, test_loader, device)
sum(preds)

[0.9995537400245667, 0.9999197125434875, 0.9994228482246399, 0.9963836669921875, 0.9992743730545044, 0.9999178647994995, 0.99991774559021, 0.9996844530105591, 0.9982609748840332, 0.9993952512741089, 0.9990406036376953, 0.99971604347229, 0.9992411136627197, 0.9988492727279663, 0.9998130798339844, 0.9993667602539062, 0.9999433159828186, 0.9994677305221558, 0.9997826218605042, 0.9959637522697449, 0.9954495429992676, 0.9992697238922119, 0.9992820024490356, 0.9989156723022461, 0.9996443390846252, 0.9996000528335571, 0.9989124536514282, 0.9996083974838257, 0.9989909529685974, 0.9998980760574341, 0.9998732209205627, 0.9997262954711914, 0.9998555183410645, 0.9995408654212952, 0.9989401698112488, 0.9999208450317383, 0.9998863935470581, 0.9989365339279175, 0.9991800785064697, 0.9998806715011597, 0.9990048408508301, 0.9998503923416138, 0.9998046159744263, 0.999340295791626, 0.9995889663696289, 0.9996730089187622, 0.9996721148490906, 0.9998587369918823, 0.9999300241470337, 0.9983798265457153, 0.99

427

## Submission

In [76]:
submit = pd.read_csv(path+'sample_submission.csv')

In [77]:
submit['LABEL'] = preds
submit

Unnamed: 0,SAMPLE_ID,LABEL
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,1
4,TEST_0004,0
...,...,...
1509,TEST_1509,1
1510,TEST_1510,1
1511,TEST_1511,0
1512,TEST_1512,0


In [78]:
submit.to_csv('submit.csv', index=False)

In [79]:
submit['LABEL'].sum()

427