## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import librosa
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score
from scipy.spatial import distance

from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


# Hyperparameters

In [2]:
EPOCHS = 1000
LR = 1e-1
SR = 16000
SEED = 42
N_MFCC = 128
BATCH = 256
device = 'cuda'
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED) # Seed 고정

## Data Pre-processing

In [3]:
path = 'open/'
train_df = pd.read_csv(path+'train.csv') # 모두 정상 Sample
test_df = pd.read_csv(path+'test.csv')

In [4]:
train_df

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0
...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0


In [5]:
def get_mfcc_feature(df):
    features = []
    features2 = []
    for path in tqdm(df['SAMPLE_PATH']):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=SR)
        
        # melspectrogram
        mels = librosa.feature.melspectrogram(y, sr=sr, n_mels=N_MFCC)
        mels = librosa.power_to_db(mels, ref=np.max)
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC)
        
        y_feature2 = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mels:
            y_feature2.append(np.mean(e))
        features2.append(y_feature2)
        
        y_feature = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mfcc:
            y_feature.append(np.mean(e))
        features.append(y_feature)
    return features, features2

In [6]:
%%time

train_features, train_features2 = get_mfcc_feature(train_df)
test_features, test_features2 = get_mfcc_feature(test_df)

100%|██████████████████████████████████████████████████████████████████████████████| 1279/1279 [00:31<00:00, 40.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1514/1514 [00:38<00:00, 39.64it/s]

CPU times: total: 1min 24s
Wall time: 1min 10s





In [7]:
# 반복문으로 전체 컬럼명 변경하기
def rename(df):
    flag = 0
    for col_name in df.columns:
        if col_name == 0:
            flag = 1
        if flag == 1:
            df.rename(columns = {col_name : 128+col_name}, inplace = True)
    return df

In [8]:
tmp = pd.concat([train_df, pd.DataFrame(train_features)], axis=1)
tmp = rename(tmp)
tmp = pd.concat([tmp, pd.DataFrame(train_features2)], axis=1)

test = pd.concat([test_df, pd.DataFrame(test_features)], axis=1)
test = rename(test)
test = pd.concat([test, pd.DataFrame(test_features2)], axis=1)

scaler = MinMaxScaler()
tmp.iloc[:,range(4,len(tmp.columns))] = scaler.fit_transform(tmp.iloc[:,range(4,len(tmp.columns))])
test.iloc[:,range(3,len(test.columns))] = scaler.transform(test.iloc[:,range(3,len(test.columns))])
tmp

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL,128,129,130,131,132,133,...,118,119,120,121,122,123,124,125,126,127
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0,0.952273,0.284621,0.295843,0.363586,0.357530,0.294327,...,0.833846,0.842291,0.818590,0.817896,0.799984,0.799645,0.811261,0.816584,0.825692,0.805742
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0,0.081981,0.935459,0.514880,0.613406,0.691659,0.825306,...,0.148846,0.144862,0.149996,0.165012,0.156853,0.160930,0.163661,0.162874,0.158788,0.084692
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0,0.240260,0.664368,0.724483,0.354632,0.625930,0.695975,...,0.454258,0.520886,0.454087,0.465858,0.423989,0.497721,0.504808,0.502295,0.487844,0.409240
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0,0.943729,0.295295,0.312391,0.371455,0.329344,0.268686,...,0.808817,0.827032,0.815166,0.787913,0.779337,0.781981,0.771998,0.779565,0.796365,0.798277
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0,0.949608,0.188729,0.179787,0.154144,0.007228,0.055841,...,0.877542,0.909439,0.894097,0.863824,0.849838,0.867351,0.865360,0.861068,0.868706,0.875541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0,0.964989,0.096119,0.129592,0.222531,0.039493,0.075616,...,0.857574,0.875432,0.862135,0.853636,0.852105,0.854758,0.852087,0.851688,0.860304,0.881918
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0,0.959506,0.283203,0.293961,0.390142,0.322176,0.239990,...,0.842018,0.853391,0.822730,0.825484,0.822317,0.809976,0.820805,0.835689,0.836974,0.819587
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0,0.930908,0.223856,0.210020,0.151861,0.134018,0.149416,...,0.918239,0.932157,0.898576,0.905386,0.894843,0.879353,0.900221,0.910439,0.903620,0.895840
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0,0.932890,0.247222,0.265837,0.223214,0.170546,0.141445,...,0.922436,0.946884,0.920099,0.916849,0.900980,0.903257,0.919087,0.922190,0.920950,0.932029


In [9]:
tmp.dropna()

Unnamed: 0,SAMPLE_ID,SAMPLE_PATH,FAN_TYPE,LABEL,128,129,130,131,132,133,...,118,119,120,121,122,123,124,125,126,127
0,TRAIN_0000,./train/TRAIN_0000.wav,2,0,0.952273,0.284621,0.295843,0.363586,0.357530,0.294327,...,0.833846,0.842291,0.818590,0.817896,0.799984,0.799645,0.811261,0.816584,0.825692,0.805742
1,TRAIN_0001,./train/TRAIN_0001.wav,0,0,0.081981,0.935459,0.514880,0.613406,0.691659,0.825306,...,0.148846,0.144862,0.149996,0.165012,0.156853,0.160930,0.163661,0.162874,0.158788,0.084692
2,TRAIN_0002,./train/TRAIN_0002.wav,0,0,0.240260,0.664368,0.724483,0.354632,0.625930,0.695975,...,0.454258,0.520886,0.454087,0.465858,0.423989,0.497721,0.504808,0.502295,0.487844,0.409240
3,TRAIN_0003,./train/TRAIN_0003.wav,2,0,0.943729,0.295295,0.312391,0.371455,0.329344,0.268686,...,0.808817,0.827032,0.815166,0.787913,0.779337,0.781981,0.771998,0.779565,0.796365,0.798277
4,TRAIN_0004,./train/TRAIN_0004.wav,2,0,0.949608,0.188729,0.179787,0.154144,0.007228,0.055841,...,0.877542,0.909439,0.894097,0.863824,0.849838,0.867351,0.865360,0.861068,0.868706,0.875541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,TRAIN_1274,./train/TRAIN_1274.wav,2,0,0.964989,0.096119,0.129592,0.222531,0.039493,0.075616,...,0.857574,0.875432,0.862135,0.853636,0.852105,0.854758,0.852087,0.851688,0.860304,0.881918
1275,TRAIN_1275,./train/TRAIN_1275.wav,2,0,0.959506,0.283203,0.293961,0.390142,0.322176,0.239990,...,0.842018,0.853391,0.822730,0.825484,0.822317,0.809976,0.820805,0.835689,0.836974,0.819587
1276,TRAIN_1276,./train/TRAIN_1276.wav,2,0,0.930908,0.223856,0.210020,0.151861,0.134018,0.149416,...,0.918239,0.932157,0.898576,0.905386,0.894843,0.879353,0.900221,0.910439,0.903620,0.895840
1277,TRAIN_1277,./train/TRAIN_1277.wav,2,0,0.932890,0.247222,0.265837,0.223214,0.170546,0.141445,...,0.922436,0.946884,0.920099,0.916849,0.900980,0.903257,0.919087,0.922190,0.920950,0.932029


In [10]:
cols = tmp.columns.drop(['SAMPLE_ID', 'SAMPLE_PATH', 'LABEL'])
cols

Index(['FAN_TYPE',        128,        129,        130,        131,        132,
              133,        134,        135,        136,
       ...
              118,        119,        120,        121,        122,        123,
              124,        125,        126,        127],
      dtype='object', length=257)

## Model Fit

In [11]:
class CustomDataset(Dataset):
    def __init__(self, df, eval_mode):
        self.eval_mode = eval_mode
        if self.eval_mode:
            self.labels = df['LABEL'].values
        self.df = df[cols].values
        
    def __getitem__(self, index):
        if self.eval_mode:
            self.x = self.df[index]
            self.y = self.labels[index]
            return torch.Tensor(self.x).reshape(-1,1), self.y
        else:
            self.x = self.df[index]
            return torch.Tensor(self.x).reshape(-1,1)
        
    def __len__(self):
        return len(self.df)

In [12]:
class EarlyStopping:
    def __init__(self, tolerance=5, min_delta=0):
        self.tolerance = tolerance
        self.min_delta = min_delta
        self.counter = 0
        self.early_stop = False
        self.min_loss = np.inf

    def __call__(self, train_loss, validation_loss=None):
        if train_loss < self.min_loss:
            self.counter = 0
            self.min_loss = train_loss
            print(f'counter : set 0 min loss : {self.min_loss}')
        elif train_loss > self.min_loss:
            self.counter += 1
            print(f'counter : {self.counter}')
        if self.counter >= self.tolerance:  
            self.early_stop = True

In [13]:
train_df, val_df = train_test_split(tmp, test_size=0.1, random_state=SEED)

In [14]:
train_dataset = CustomDataset(df=train_df, eval_mode=False)
train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True)

val_dataset = CustomDataset(df = val_df, eval_mode=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH, shuffle=False)

In [165]:
class Trainer():
    def __init__(self, model, optimizer, train_loader, val_loader, scheduler, device):
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.scheduler = scheduler
        self.device = device
        self.early_stopping = EarlyStopping(tolerance=100, min_delta=10)
        
        # Loss Function
        self.criterion = nn.KLDivLoss(reduction='batchmean', log_target=True).to(self.device)
        # self.criterion = nn.L1Loss().to(self.device)
        
    def fit(self):
        self.model.to(self.device)
        best_score = float('-inf')
        for epoch in range(EPOCHS):
            self.model.train()
            train_loss = []
            for x in iter(self.train_loader):
                self.optimizer.zero_grad()
                x = x.float().to(self.device)
                _x = self.model(x)

                log_target = F.log_softmax(_x, dim=1).to(self.device)
                log_input = F.log_softmax(x.reshape(-1,257), dim=1).to(self.device)
                # print(log_input[0], log_target[0])
                loss = self.criterion(log_input, log_target)
                loss.backward()
                self.optimizer.step()
                
                train_loss.append(loss.item())
            
            score = self.validation(self.model)
            print(f'Epoch : [{epoch}] Train loss : [{np.mean(train_loss)}] Val Score : [{score}])')
            
            if best_score <= score:
                best_score = score
                torch.save(model.state_dict(), './best_model.pth', _use_new_zipfile_serialization=False)
            torch.save(model.state_dict(), './model.pth', _use_new_zipfile_serialization=False)
            # early stopping
            self.early_stopping(np.mean(train_loss))
            if self.early_stopping.early_stop:
                print("early_stopping:", epoch)
                break
            
            if self.scheduler is not None:
                self.scheduler.step(score)
                            
    def validation(self, eval_model, thr=0.999):
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        eval_model.eval()
        pred = []
        true = []
        with torch.no_grad():
            for x, y in iter(self.val_loader):
                x = x.float().to(self.device)
                _x = self.model(x)
                
                log_target = F.log_softmax(_x, dim=1)
                log_input = F.log_softmax(x.reshape(-1,257), dim=1)
                
                diff = cos(log_input, log_target).cpu().tolist()
                # print(diff)
                batch_pred = np.where(np.array(diff) > thr, 0, 1).tolist()

                pred += batch_pred
                true += y.tolist()

        return f1_score(true, pred, average='macro')

In [166]:
class ResConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResConv, self).__init__()

        self.conv_block = nn.Sequential(
            nn.Conv1d(in_channels = in_channels, out_channels = out_channels, 
                                 kernel_size = 1, stride = 1),
            nn.BatchNorm1d(num_features = out_channels),
            nn.GELU(),
        )
        self.conv_skip = nn.Sequential(
            nn.Conv1d(in_channels = in_channels, out_channels = out_channels, 
                                 kernel_size = 1, stride = 1),
            nn.BatchNorm1d(num_features = out_channels),
        )

    def forward(self, x):
        return self.conv_block(x) + self.conv_skip(x)

class Upsample(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Upsample, self).__init__()

        self.upsample = nn.ConvTranspose1d(
            in_channels, out_channels, kernel_size=1, stride=1
        )

    def forward(self, x):
        return self.upsample(x)
    

class Model(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.input_layer = nn.Sequential(
            nn.Conv1d(in_channels = 257, out_channels = 512, 
                                 kernel_size = 1, stride = 1),
            nn.BatchNorm1d(512),
            nn.GELU(),
            nn.Conv1d(in_channels = 512, out_channels = 512, 
                                 kernel_size = 1, stride = 1),
        )
        
        self.input_skip = nn.Sequential(
            nn.Conv1d(in_channels = 257, out_channels = 512, 
                                 kernel_size = 1, stride = 1)
        )
        
        self.resconv1 = ResConv(512, 512)
        self.resconv2 = ResConv(512, 1024)
        
        self.bridge = ResConv(1024, 1024)
        
        
        # Decoder        
        self.upsample_1 = Upsample(1024, 1024)
        self.upresconv1 = ResConv(1024*2, 512)
        
        self.upsample_2 = Upsample(1024, 512)
        self.upresconv2 = ResConv(1024, 512)
        
        self.upsample_3 = Upsample(512, 512)
        self.upresconv3 = ResConv(512*2, 512)

        self.flat = nn.Flatten()
        self.fc = nn.Conv1d(in_channels = 512, out_channels = 257, kernel_size = 1, stride = 1)
        
    def forward(self, x):
        # Encode
        x1 = self.input_layer(x) + self.input_skip(x)
        x2 = self.resconv1(x1) + self.input_skip(x)
        x3 = self.resconv2(x2)
        
        # Bridge
        x4 = self.bridge(x3)
        
        # Decode
        # x4 = self.upsample_1(x4)
        x5 = torch.cat([x4, x3], dim=1)
        x6 = self.upresconv1(x5)
        
        # x6 = self.upsample_2(x6)
        x7 = torch.cat([x6, x2], dim=1)
        
        x8 = self.upresconv2(x7)

        # x8 = self.upsample_3(x8)
        x9 = torch.cat([x8, x1], dim=1)

        x10 = self.upresconv3(x9)

        output = self.fc(x10)
        return self.flat(output)

In [None]:
model = Model()
optimizer = optim.SGD(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10, threshold_mode='abs', min_lr=1e-12, verbose=True)

trainer = Trainer(model, optimizer, train_loader, val_loader, scheduler, device)
trainer.fit()

Epoch : [0] Train loss : [0.17930056452751159] Val Score : [0.4074074074074074])
counter : set 0 min loss : 0.17930056452751159
Epoch : [1] Train loss : [0.09885907024145127] Val Score : [0.43859649122807015])
counter : set 0 min loss : 0.09885907024145127
Epoch : [2] Train loss : [0.07162650823593139] Val Score : [0.4458874458874459])
counter : set 0 min loss : 0.07162650823593139
Epoch : [3] Train loss : [0.05666284039616585] Val Score : [0.45064377682403434])
counter : set 0 min loss : 0.05666284039616585
Epoch : [4] Train loss : [0.046596828103065493] Val Score : [0.459915611814346])
counter : set 0 min loss : 0.046596828103065493
Epoch : [5] Train loss : [0.039390365779399875] Val Score : [0.46443514644351463])
counter : set 0 min loss : 0.039390365779399875
Epoch : [6] Train loss : [0.03425737842917442] Val Score : [0.459915611814346])
counter : set 0 min loss : 0.03425737842917442
Epoch : [7] Train loss : [0.030386653169989584] Val Score : [0.4576271186440678])
counter : set 0 m

In [155]:
model = CNN()
model.load_state_dict(torch.load('./best_model.pth'))
model.eval()

# model = CNN()
# model.load_state_dict(torch.load('./model.pth'))
# model.eval()

CNN(
  (input_layer): Sequential(
    (0): Conv1d(257, 512, kernel_size=(1,), stride=(1,))
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
  )
  (input_skip): Sequential(
    (0): Conv1d(257, 512, kernel_size=(1,), stride=(1,))
  )
  (resconv1): ResConv(
    (conv_block): Sequential(
      (0): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate='none')
    )
    (conv_skip): Sequential(
      (0): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (resconv2): ResConv(
    (conv_block): Sequential(
      (0): Conv1d(512, 1024, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats

## Prediction

In [156]:
test_dataset = CustomDataset(test, False)
test_loader = DataLoader(test_dataset, batch_size=BATCH, shuffle=False)

In [157]:
def prediction(model, test_loader, device, thr=0.999):
    model.to(device)
    model.eval()
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    pred = []
    with torch.no_grad():
        for x in iter(test_loader):
            x = x.float().to(device)
            _x = model(x)
            log_target = F.log_softmax(_x, dim=1)
            log_input = F.log_softmax(x.reshape(-1,257), dim=1)
                
            diff = cos(log_input, log_target).cpu().tolist()
            print(diff)
            batch_pred = np.where(np.array(diff) > thr, 0, 1).tolist()
            pred += batch_pred
    return pred

In [158]:
preds = prediction(model, test_loader, device)
sum(preds)

[0.9996505975723267, 0.999908447265625, 0.9990004301071167, 0.9973986148834229, 0.9991329312324524, 0.9999409914016724, 0.9999246597290039, 0.999808669090271, 0.9969272613525391, 0.9992584586143494, 0.9985594749450684, 0.9997560977935791, 0.9988080263137817, 0.9989929795265198, 0.9998619556427002, 0.9991437196731567, 0.999944806098938, 0.9997737407684326, 0.9997348785400391, 0.9975021481513977, 0.9974305033683777, 0.9990506172180176, 0.9991956353187561, 0.9987855553627014, 0.9997345209121704, 0.9997130036354065, 0.9990031123161316, 0.9996923208236694, 0.9986710548400879, 0.9999092221260071, 0.9998365640640259, 0.9997600317001343, 0.9998252391815186, 0.9996917843818665, 0.999175488948822, 0.9999167919158936, 0.9998964667320251, 0.9985170364379883, 0.9984459280967712, 0.9999053478240967, 0.999008297920227, 0.9998723268508911, 0.9998548030853271, 0.9993114471435547, 0.9996931552886963, 0.9997081160545349, 0.9993782043457031, 0.9998681545257568, 0.9999277591705322, 0.9985414743423462, 0.99

535

## Submission

In [152]:
submit = pd.read_csv(path+'sample_submission.csv')

In [153]:
submit['LABEL'] = preds
submit

Unnamed: 0,SAMPLE_ID,LABEL
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,1
4,TEST_0004,0
...,...,...
1509,TEST_1509,1
1510,TEST_1510,1
1511,TEST_1511,0
1512,TEST_1512,0


In [102]:
submit.to_csv('submit.csv', index=False)

In [103]:
submit['LABEL'].sum()

535