In [1]:
import torch
import torch.nn as nn
from transformers import AutoFeatureExtractor, ASTModel

import librosa
import numpy as np

from glob import glob
import random
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn import svm
import joblib

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
### 데이터셋 로드 ###
data_paths = glob('./train/*비명*')[:200]+glob('./train/*자동차*')[:200]+glob('./train/*동물*')[:200]+glob('./train/*경적*')[:200]+glob('./train/*대화*')[:200]
random.shuffle(data_paths)
data_paths = data_paths+glob('./test_data_1second/*')

In [3]:
### 비정형 데이터를 OC-SVM에 학습시키기 위해 피쳐 추출 모델로 AudioSet으로 학습된 distil-ast 모델을 가져옴 ###
feature_extractor = AutoFeatureExtractor.from_pretrained("bookbot/distil-ast-audioset")
model = ASTModel.from_pretrained("bookbot/distil-ast-audioset").to('cuda')

In [4]:
### 모델로 특징 추출 ###
def feature_extraction(data_paths):
    y, sr = librosa.load(data_paths[0], sr=16000)
    input_tensor = feature_extractor(y, sampling_rate=sr, return_tensors="pt")
    with torch.no_grad():
        datasets = model(**input_tensor.to('cuda')).last_hidden_state

    for data_path in tqdm(data_paths[1:]):
        y, sr = librosa.load(data_path, sr=16000)
        input_tensor = feature_extractor(y, sampling_rate=sr, return_tensors="pt")
        with torch.no_grad():
            datasets = torch.cat((datasets, model(**input_tensor.to('cuda')).last_hidden_state.detach()))
    
    return datasets

datasets = feature_extraction(data_paths)

  return F.conv2d(input, weight, bias, self.stride,
100%|██████████| 1179/1179 [02:04<00:00,  9.47it/s]


In [5]:
### 학습용으로 820개, 테스트용으로 360개로 나눔 ###
### 학습 셋은 분류 모델이 학습한 클래스의 샘플만 포함 ###
train_set = datasets[:-360]
test_set = datasets[-360:]

train_set = train_set.cpu().numpy().reshape(len(train_set), -1)
test_set = test_set.cpu().numpy().reshape(len(test_set), -1)

In [6]:
### RobustScaler로 학습 셋을 정규화 ###
### ※ StandardScaler의 경우 성능이 많이 떨어짐 ###
def fit_transform_scaler(train_set, test_set):
    scaler = RobustScaler()
    scaler = scaler.fit(train_set)

    train_set = scaler.transform(train_set)
    test_set = scaler.transform(test_set)
    
    return train_set, test_set, scaler

train_set, test_set, scaler = fit_transform_scaler(train_set, test_set)

In [7]:
### 차원의 저주를 방지하고 연산량을 줄이기 위해 차원 축소 ##
def fit_transform_pca(train_set, test_set):
    pca = PCA(n_components=128, whiten=True)
    pca = pca.fit(train_set)

    print('Explained variance percentage = %0.2f' % sum(pca.explained_variance_ratio_))

    train_set = pca.transform(train_set)
    test_set = pca.transform(test_set)
    
    return train_set, test_set, pca

train_set, test_set, pca = fit_transform_pca(train_set, test_set)

Explained variance percentage = 0.89


In [9]:
### OC-SVM 학습 ###
oc_svm_clf = svm.OneClassSVM(gamma=0.001, kernel='rbf', nu=0.156)
oc_svm_clf.fit(train_set)

In [10]:
### 모델 평가 ###
preds = oc_svm_clf.predict(test_set)
gt_labels = [1]*180+[-1]*180

tp, tn, fp, fn = 0, 0, 0, 0

for i in range(len(preds)):
    if (gt_labels[i]==1) & (preds[i] == 1):
        tp += 1
    elif (gt_labels[i]==1) & (preds[i] == -1):
        fn += 1
    elif (gt_labels[i]==-1) & (preds[i] == 1):
        fp += 1
    elif (gt_labels[i]==-1) & (preds[i] == -1):
        tn += 1

accuracy = (tp + tn) / (tp + tn + fp + fn)
recall = tp / (tp + fn)
precision = tp / (tp + fp)

F1_score = 2 * precision * recall / (precision + recall)

print('accuracy : {0:.2f}, recall : {1:.2f}, precision : {2:.2f}, F1 Score : {3:.2f}'.format(accuracy, recall, precision, F1_score))

accuracy : 0.94, recall : 0.97, precision : 0.92, F1 Score : 0.95


In [15]:
### 하이퍼 파라미터 튜닝 ###

# 데이터를 분류하는 선이 얼마나 멀리 떨어져 있는 데이터까지 계산에 고려할 것인가. 높을수록 선에 가까운 데이터들만 계산에 고려함.
gamma = np.linspace(0.0001, 99, 20)
best_gamma = 0.0001

# nu와 변수 C와의 차이점은 nu는 0보다 크고 1보다 작거나 같은 값을 가진다는 점. nu는 데이터 분류할 때 이상치 허용 정도를 의미하며 낮을수록 과적합 가능성 높아짐. 
nu = np.linspace(0.00001, 0.99, 20)
best_nu = 0.00001

# 클래스들 사이의 비선형 경계를 수용하기 위해 변수 공간을 확장할 때 사용하는 계산 기법
#kernel = ['linear', 'poly', 'rbf', 'sigmoid']
kernel = ['rbf']
best_kernel = 'rbf'

def param_op(gamma, kernel, nu):
    best_score = 0

    for k in kernel:
        for g in tqdm(gamma, desc="gamma"):
            for n in tqdm(nu, desc="nu"):
                accuracy = 0
                recall = 0
                precision = 0
                score = 0

                oc_svm_clf = svm.OneClassSVM(gamma=g, kernel=k, nu=n)
                oc_svm_clf.fit(train_set)

                preds = oc_svm_clf.predict(test_set)

                tp, tn, fp, fn = 0, 0, 0, 0
                for i in range(len(preds)):
                    if (gt_labels[i]==1) & (preds[i] == 1):
                        tp += 1
                    elif (gt_labels[i]==1) & (preds[i] == -1):
                        fn += 1
                    elif (gt_labels[i]==-1) & (preds[i] == 1):
                        fp += 1
                    elif (gt_labels[i]==-1) & (preds[i] == -1):
                        tn += 1
                if (tp + tn + fp + fn != 0 and tp + fn != 0 and tp + fp != 0):
                    accuracy = (tp + tn) / (tp + tn + fp + fn)
                    recall = tp / (tp + fn)
                    precision = tp / (tp + fp)
                if (precision + recall != 0):
                    score = 2 * precision * recall / (precision + recall)

                if (score > best_score):
                    best_score = score
                    best_gamma = g
                    best_nu = n
                    best_kernel = k
                    print('갱신된 Best_Score: ', best_score)

    print('학습된 모델 BestScore:', best_score)
    print('하이퍼 파라미터=> kernel:', best_kernel,', gamma: ', best_gamma, ', nu: ', best_nu,'\n')
    return best_kernel, best_gamma, best_nu

In [16]:
best_kernel, best_gamma, best_nu = param_op(gamma, kernel, nu)

gamma:   0%|          | 0/20 [00:00<?, ?it/s]
nu:   0%|          | 0/20 [00:00<?, ?it/s][A
nu:  25%|██▌       | 5/20 [00:00<00:00, 34.49it/s][A

갱신된 Best_Score:  0.8866995073891626
갱신된 Best_Score:  0.9254498714652957
갱신된 Best_Score:  0.9459459459459458



nu:  45%|████▌     | 9/20 [00:00<00:00, 18.61it/s][A
nu:  60%|██████    | 12/20 [00:00<00:00, 13.32it/s][A
nu:  70%|███████   | 14/20 [00:01<00:00, 11.69it/s][A
nu:  80%|████████  | 16/20 [00:01<00:00,  9.74it/s][A
nu:  90%|█████████ | 18/20 [00:01<00:00,  9.09it/s][A
nu:  95%|█████████▌| 19/20 [00:01<00:00,  7.93it/s][A
nu: 100%|██████████| 20/20 [00:01<00:00, 10.37it/s][A
gamma:   5%|▌         | 1/20 [00:01<00:36,  1.93s/it]
nu:   0%|          | 0/20 [00:00<?, ?it/s][A
nu:  10%|█         | 2/20 [00:00<00:01, 13.91it/s][A
nu:  20%|██        | 4/20 [00:00<00:02,  6.02it/s][A
nu:  25%|██▌       | 5/20 [00:00<00:02,  5.31it/s][A
nu:  30%|███       | 6/20 [00:01<00:02,  5.53it/s][A
nu:  35%|███▌      | 7/20 [00:01<00:02,  5.74it/s][A
nu:  40%|████      | 8/20 [00:01<00:02,  5.40it/s][A
nu:  45%|████▌     | 9/20 [00:01<00:01,  5.55it/s][A
nu:  50%|█████     | 10/20 [00:01<00:01,  5.46it/s][A
nu:  55%|█████▌    | 11/20 [00:01<00:01,  5.44it/s][A
nu:  60%|██████    | 12/20 

학습된 모델 BestScore: 0.9459459459459458
하이퍼 파라미터=> kernel: rbf , gamma:  0.0001 , nu:  0.15632421052631582 






In [17]:
### 고려한 감마가 가장 작을 때 최고의 성능이기 때문에 더 낮추었을 때 성능이 개선되는지 확인 ###
best_kernel, best_gamma, best_nu = param_op(np.linspace(0.000001, 0.00001, 20), ['rbf'], nu)

gamma:   0%|          | 0/20 [00:00<?, ?it/s]
nu:   0%|          | 0/20 [00:00<?, ?it/s][A
nu:  20%|██        | 4/20 [00:00<00:00, 39.77it/s][A

갱신된 Best_Score:  0.878048780487805
갱신된 Best_Score:  0.9254498714652957
갱신된 Best_Score:  0.9369863013698629



nu:  40%|████      | 8/20 [00:00<00:00, 24.87it/s][A
nu:  55%|█████▌    | 11/20 [00:00<00:00, 15.84it/s][A
nu:  65%|██████▌   | 13/20 [00:00<00:00, 14.08it/s][A
nu:  75%|███████▌  | 15/20 [00:01<00:00, 11.38it/s][A
nu:  85%|████████▌ | 17/20 [00:01<00:00, 10.18it/s][A
nu:  95%|█████████▌| 19/20 [00:01<00:00,  8.67it/s][A
nu: 100%|██████████| 20/20 [00:01<00:00, 11.31it/s][A
gamma:   5%|▌         | 1/20 [00:01<00:33,  1.77s/it]
nu:   0%|          | 0/20 [00:00<?, ?it/s][A
nu:  25%|██▌       | 5/20 [00:00<00:00, 41.84it/s][A

갱신된 Best_Score:  0.9408602150537634



nu:  50%|█████     | 10/20 [00:00<00:00, 18.67it/s][A
nu:  65%|██████▌   | 13/20 [00:00<00:00, 13.80it/s][A
nu:  75%|███████▌  | 15/20 [00:01<00:00, 10.29it/s][A
nu:  85%|████████▌ | 17/20 [00:01<00:00,  9.10it/s][A
nu:  95%|█████████▌| 19/20 [00:01<00:00,  8.32it/s][A
nu: 100%|██████████| 20/20 [00:01<00:00, 10.49it/s][A
gamma:  10%|█         | 2/20 [00:03<00:33,  1.86s/it]
nu:   0%|          | 0/20 [00:00<?, ?it/s][A
nu:  25%|██▌       | 5/20 [00:00<00:00, 44.24it/s][A

갱신된 Best_Score:  0.9459459459459458



nu:  50%|█████     | 10/20 [00:00<00:00, 18.35it/s][A
nu:  65%|██████▌   | 13/20 [00:00<00:00, 14.40it/s][A
nu:  75%|███████▌  | 15/20 [00:01<00:00, 11.98it/s][A
nu:  85%|████████▌ | 17/20 [00:01<00:00, 10.52it/s][A
nu: 100%|██████████| 20/20 [00:01<00:00, 11.61it/s][A
gamma:  15%|█▌        | 3/20 [00:05<00:30,  1.80s/it]
nu:   0%|          | 0/20 [00:00<?, ?it/s][A
nu:  25%|██▌       | 5/20 [00:00<00:00, 35.06it/s][A
nu:  45%|████▌     | 9/20 [00:00<00:00, 21.47it/s][A
nu:  60%|██████    | 12/20 [00:00<00:00, 15.21it/s][A
nu:  70%|███████   | 14/20 [00:00<00:00, 13.02it/s][A
nu:  80%|████████  | 16/20 [00:01<00:00, 10.71it/s][A
nu:  90%|█████████ | 18/20 [00:01<00:00,  9.42it/s][A
nu: 100%|██████████| 20/20 [00:01<00:00, 11.04it/s][A
gamma:  20%|██        | 4/20 [00:07<00:28,  1.81s/it]
nu:   0%|          | 0/20 [00:00<?, ?it/s][A
nu:  25%|██▌       | 5/20 [00:00<00:00, 43.27it/s][A
nu:  50%|█████     | 10/20 [00:00<00:00, 18.68it/s][A
nu:  65%|██████▌   | 13/20 [00:0

학습된 모델 BestScore: 0.9459459459459458
하이퍼 파라미터=> kernel: rbf , gamma:  1.9473684210526315e-06 , nu:  0.15632421052631582 






In [18]:
oc_svm_clf = svm.OneClassSVM(gamma=best_gamma, kernel=best_kernel, nu=best_nu)
oc_svm_clf.fit(train_set)

In [None]:
joblib.dump(oc_svm_clf, './ocsvm_AST.pkl')
joblib.dump(scaler, './robustscaler_AST.pkl')
joblib.dump(pca, './pca_AST.pkl')