In [123]:
import torch
import numpy as np
import pandas as pd
import torchaudio
from sklearn.metrics import roc_curve, auc
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from torch.utils.data import random_split, DataLoader
from sklearn.model_selection import train_test_split



from customDatasets.audioDataset import AudioDataset

In [124]:
# free gpu
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [125]:
#Load training and test dataset

def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

set_seed(42)

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]
range_train, range_test = train_test_split(range(len(train_df)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=meta_train_df['machine_id'])

val_df = train_df.iloc[range_test].reset_index(drop=True)

train_df = train_df.iloc[range_train].reset_index(drop=True)

test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]

train_audios = []
val_audios = []
test_audios = []
test_labels = []


for idx in range(len(train_df)):
    audio_file = data_path + train_df.loc[idx, 'filename']
    aud, sr = torchaudio.load(audio_file) 
    train_audios.append(aud)

for idx in range(len(val_df)):
    audio_file = data_path + val_df.loc[idx, 'filename']
    aud, sr = torchaudio.load(audio_file)
    val_audios.append(aud)

for idx in range(len(test_df)):
    audio_file = data_path_test + test_df.loc[idx, 'filename']
    aud, sr = torchaudio.load(audio_file) 
    test_audios.append(aud)
    test_labels.append(test_df.loc[idx, 'is_normal'])

In [126]:
def split_sgram (sgram, n_split=10):
    sgrams = []
    shape = sgram.shape[0]
    for idx in range(n_split):
        len = int(shape/10)
        new_sgram = sgram[len*idx:len*idx + len, :]
        sgrams.append(new_sgram)
    return sgrams

In [127]:
from torchaudio import transforms
spectrogram = False
if spectrogram:
    mel = transforms.MelSpectrogram(n_mels=128, n_fft=1000, hop_length=501)
    ampl = transforms.AmplitudeToDB(top_db=80)

    train_dataset = []
    val_dataset = []
    test_dataset = []



    #Data preprocessing
    for idx in range(len(train_audios)):
        spec = mel(train_audios[idx])
        spec = ampl(spec).mT
        specs = split_sgram(spec.squeeze(0))
        for spec in specs:
            train_dataset.append(spec.reshape(-1))

    for idx in range(len(val_audios)):
        spec = mel(val_audios[idx])
        spec = ampl(spec).mT
        specs = split_sgram(spec.squeeze(0))
        for spec in specs:
            val_dataset.append(spec.reshape(-1))

    for idx in range(len(test_audios)):
        spec = mel(test_audios[idx])
        spec = ampl(spec).mT
        specs = split_sgram(spec.squeeze(0))
        test_dataset.append([spec.reshape(-1) for spec in specs])


In [128]:
import librosa
import numpy as np

def extract_mfccs(audio, n_mfcc=13, sr=16000, hop_length=512, n_fft=2048):
    
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
    
    # Compute mean and variance for each MFCC coefficient over all frames
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_var = np.var(mfccs, axis=1)
    
    # Combine mean and variance into a single feature vector
    mfccs_features = np.concatenate((mfccs_mean, mfccs_var))
    
    return mfccs_features

In [129]:
mfcc = True
if mfcc:

    train_dataset = []
    val_dataset = []
    test_dataset = []

    for idx in range(len(train_audios)):
        features = extract_mfccs(np.array(train_audios[idx].squeeze(0)))
        train_dataset.append(features)

    for idx in range(len(val_audios)):
        features = extract_mfccs(np.array(val_audios[idx].squeeze(0)))
        val_dataset.append(features)

    for idx in range(len(test_audios)):
        features = extract_mfccs(np.array(test_audios[idx].squeeze(0)))
        test_dataset.append(features)

In [130]:
train_dataset = np.array(train_dataset)
val_dataset = np.array(val_dataset)
test_dataset = np.array(test_dataset)
test_labels = np.array(test_labels)

print(train_dataset.shape)
print(val_dataset.shape)
print(test_dataset.shape)

(2370, 52)
(0,)
(1101, 52)


In [131]:
#pca = PCA(n_components=10)
#pca.fit(train_dataset)

In [132]:
#train_dataset = pca.transform(train_dataset)
#test_dataset = pca.transform(test_dataset)

In [133]:
frames = False
if not frames:
    for covariance_type in ['full', 'tied', 'diag']:
        for n_components in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]:
            scores = []
            gmm = GaussianMixture(n_components=n_components)
            gmm.fit(train_dataset)
            scores = gmm.score_samples(test_dataset)
            print(scores.shape)
            fpr, tpr, _ = roc_curve(test_labels, scores, pos_label=1)
            roc_auc = auc(fpr, tpr)
            val_likelihood = gmm.score(val_dataset)
            print(f'{covariance_type}, {n_components}, AUC: {roc_auc}, Val Likelihood: {val_likelihood}')

(1101,)
full, 1, AUC: 0.858501872659176
(1101,)
full, 2, AUC: 0.8548938826466919
(1101,)
full, 3, AUC: 0.8588472742405328
(1101,)
full, 4, AUC: 0.857070328755722
(1101,)
full, 5, AUC: 0.8718934665002082
(1101,)
full, 6, AUC: 0.8696088223054514
(1101,)
full, 7, AUC: 0.871552226383687
(1101,)
full, 8, AUC: 0.8820640865584687
(1101,)
full, 9, AUC: 0.8535330836454432
(1101,)
full, 10, AUC: 0.8600499375780276
(1101,)
full, 11, AUC: 0.8474864752392841
(1101,)
full, 12, AUC: 0.8638160632542656
(1101,)
full, 13, AUC: 0.8415272575946735
(1101,)
full, 14, AUC: 0.8326799833541407
(1101,)
full, 15, AUC: 0.8374739908447774
(1101,)
full, 16, AUC: 0.8489596337910943
(1101,)
tied, 1, AUC: 0.858501872659176
(1101,)
tied, 2, AUC: 0.8620016645859342
(1101,)
tied, 3, AUC: 0.8592925509779443
(1101,)
tied, 4, AUC: 0.857070328755722
(1101,)
tied, 5, AUC: 0.8730586766541824
(1101,)
tied, 6, AUC: 0.8737786100707448
(1101,)
tied, 7, AUC: 0.8747523928422805
(1101,)
tied, 8, AUC: 0.8574157303370787
(1101,)
tied, 

In [134]:
if frames:
    for covariance_type in ['full', 'tied', 'diag']:
        for n_components in [1, 2, 3, 4]:
            scores = []
            gmm = GaussianMixture(n_components=n_components)
            gmm.fit(train_dataset)
            for test_sample in test_dataset:
                test = np.array(test_sample)
                #test = pca.transform(test)
                predictions = gmm.score_samples(test)
                scores.append(np.min(predictions))
            print(scores)
            fpr, tpr, _ = roc_curve(test_labels, scores, pos_label=1)
            roc_auc = auc(fpr, tpr)
        print(f'{covariance_type}, {n_components}, AUC: {roc_auc}')