In [1]:
import librosa
import librosa.feature
import librosa.display

import matplotlib.pyplot as plt

import numpy as np
from scipy import signal
import scipy.stats
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
import sklearn.metrics
import sklearn
from scipy.spatial.distance import  mahalanobis
import cv2

from moviepy.editor import VideoClip
from moviepy.video.io.bindings import mplfig_to_npimage

import torch
import torchaudio

import seaborn as sns

import pandas as pd
import os
import re

from FSDDComparisonHelp import FSDD, getMFCC, getTwoMFCCs, showMFCC, compareFeatures

from sklearn import preprocessing
import python_speech_features as mfcc

# plt.style.use("classic")
# plt.style.use("bmh")
plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.grid'] = True

In [2]:
def extractMFCCs(y, sr):
    return librosa.feature.mfcc(
            y=y,
            sr=sr,
            n_mfcc=12,
            dct_type=3,
            n_fft=512,
            hop_length=256
        )

def getFeatureList(speakerFilesList : list):
    featureList = []
    for file in speakerFilesList:
        y, sr = librosa.load(file)
        # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=80, fmax=8000)
        S = extractMFCCs(y, sr)
        # S = librosa.feature.mfcc(y=y, sr=sr, dct_type=3)
        featureList.append(S)
    return featureList

def getDeltasList(speakerFilesList : list):
    featureList = []
    for file in speakerFilesList:
        y, sr = librosa.load(file)
        S = extractMFCCs(y, sr)
        S = sklearn.preprocessing.scale(S, axis=1)
        S = librosa.feature.delta(S, width=3)
        featureList.append(S)
    return featureList

def trimFeatures(collection : list):
    newCollection = []
    minLen = min(min([feature.shape[1] for feature in featureList]) for featureList in collection)
    for featureList in collection:
        newFeatureList = [feature[:, :minLen] for feature in featureList]
        newCollection.append(newFeatureList)
    return newCollection

def meanDist(features, gmm):
    rlistA = []
    for el in features:
        x = el
        means = np.squeeze(gmm.means_)
        cov = np.squeeze(gmm.covariances_)
        r = np.sqrt((x - means).T @ np.linalg.inv(cov) @ (x - means)) # mahalanobis(x, means, np.linalg.inv(cov))
        rlistA.append(r)
    return np.mean(rlistA)

In [3]:
spkA = 3
spkB = 5
fsdd = FSDD('../Datasets/FSDD/recordings')
print(fsdd.speakers)
# spkA = 1
# spkB = 3
filesA, filesB = fsdd.getFilesForDigit(0, spkA=spkA, spkB=spkB)
print(fsdd.speakers[spkA])
print(fsdd.speakers[spkB])

mfccsA = getFeatureList(filesA)
mfccsB = getFeatureList(filesB)
mfccsA, mfccsB = trimFeatures([mfccsA, mfccsB])

deltasA = getDeltasList(filesA)
deltasB = getDeltasList(filesB)
deltasA, deltasB = trimFeatures([deltasA, deltasB])

['nicolas', 'theo', 'lucas', 'george', 'jackson', 'yweweler']
george
yweweler


In [4]:
featuresListA = []
for spkFeatures in zip(mfccsA, deltasA):
    f = np.vstack(spkFeatures)
    featuresListA.append(f)


featuresListB = []
for spkFeatures in zip(mfccsB, deltasB):
    f = np.vstack(spkFeatures)
    featuresListB.append(f)

# train test split
testsize = 5

train4A = np.hstack(featuresListA[:-testsize]).T
train4B = np.hstack(featuresListB[:-testsize]).T

test4A = np.hstack(featuresListA[-testsize:]).T
test4B = np.hstack(featuresListB[-testsize:]).T

print(train4A.shape)
print(train4B.shape)
print(test4A.shape)
print(test4B.shape)

gm4A = GaussianMixture(n_components=1, max_iter=200, n_init=3).fit(train4A)
gm4B = GaussianMixture(n_components=1, max_iter=200, n_init=3).fit(train4B)

print('Means')
print(gm4A.means_.shape)
print(gm4B.means_.shape)

print('Covariances')
print(gm4A.covariances_.shape)
print(gm4B.covariances_.shape)

(1080, 24)
(1080, 24)
(120, 24)
(120, 24)
Means
(1, 24)
(1, 24)
Covariances
(1, 24, 24)
(1, 24, 24)


In [6]:
print('Mahalanobis train')

print('A-A: ', meanDist(train4A, gm4A))
print('B-A: ', meanDist(train4B, gm4A))
print('A-B: ', meanDist(train4A, gm4B))
print('B-B: ', meanDist(train4B, gm4B))


print('Mahalanobis test')

print('A-A: ', meanDist(test4A, gm4A))
print('B-A: ', meanDist(test4B, gm4A))
print('A-B: ', meanDist(test4A, gm4B))
print('B-B: ', meanDist(test4B, gm4B))


Mahalanobis train
A-A:  4.5987666266370555
B-A:  9.449667751255243
A-B:  11.097810035631527
B-B:  4.621195746324019
Mahalanobis test
A-A:  4.323347672986163
B-A:  9.14573714765535
A-B:  10.350384495543427
B-B:  4.091851017812931
