In [None]:
import numpy as np
import scipy as sp
from scipy.stats import mode
import pandas as pd
import matplotlib.pyplot as plt
from time import time

import librosa
import librosa.display

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import BaggingClassifier
from sklearn import linear_model
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC

In [None]:
def load_sound_file(file_path):
    X, sr = librosa.load(file_path, sr=None)
    return X

def extract_feature(file_name): # Late fusion
    X, sample_rate = librosa.load(file_name, sr=None)
    mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_fft=4096, hop_length=4096, n_mfcc=n_mfcc).T
    mfcc_delta = librosa.feature.delta(mfcc, width=5, order=1, trim=True)
    return mfcc, mfcc_delta

def parse_audio_files(file_names, file_labels):
    features, features_delta, labels = np.empty((0,n_mfcc-1)), np.empty((0,n_mfcc)), np.empty(0)
    for fn, fl in zip(file_names, file_labels):
        try:
            mfcc, mfcc_delta = extract_feature(fn)
        except Exception as e:
            print ("Error encountered while parsing file: ", fn)
            continue
        features = np.vstack([features, mfcc])
        features_delta = np.vstack([features_delta, mfcc_delta])
        labels = np.append(labels, fl*np.ones(mfcc.shape[0]))
    return np.array(features), np.array(features_delta), np.array(labels, dtype = np.int)

def predict_proba(clf1, clf2, X_val):
    pred_proba = np.empty((0,30))
    for x in X_val:
        x_mfcc, x_mfcc_delta = extract_feature(x)
        y_pred_proba1 = np.sum(clf1.predict_proba(x_mfcc), axis=0).reshape(-1)
        y_pred_proba2 = np.sum(clf2.predict_proba(x_mfcc_delta), axis=0).reshape(-1)
        pred_proba = np.vstack([pred_proba,np.hstack([y_pred_proba1,y_pred_proba2])])
        #print pred_proba.shape
    return np.array(pred_proba, dtype=np.int)




In [None]:
def plot_wave(title, raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    librosa.display.waveplot(np.array(raw_sound), sr=16000)
    plt.show()
    
def plot_specgram(title,raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    plt.specgram(np.array(raw_sound), Fs=16000)
    plt.show()

def plot_log_power_specgram(title,raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    D = librosa.logamplitude(np.abs(librosa.stft(raw_sound))**2, ref_power=np.max)
    librosa.display.specshow(D, x_axis='time' ,y_axis='log')
    plt.specgram(np.array(f), Fs=16000)
    plt.show()

In [None]:
from functools import partial
from pathlib import Path
from multiprocessing import Pool
import os
import shutil
import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile
from tqdm import tqdm_notebook as tqdm
import torch.nn.functional as F
from fastai.basic_data import DatasetType


In [None]:
DATA = Path('/content/drive/MyDrive/ADReSS-IS2020-train')
NSYNTH_AUDIO = DATA/'ADReSS-IS2020-data'
TRAIN_AUDIO_PATH = NSYNTH_AUDIO/'train/Full_wave_enhanced_audio/cd'
TRAIN_AUDIO_PATH2 = NSYNTH_AUDIO/'train/Full_wave_enhanced_audio/cc'

NSYNTH_IMAGES = DATA/'ADReSS2020_images'
TRAIN_IMAGE_PATH = NSYNTH_IMAGES/'train'
train_acoustic_fnames = [f.name for f in (TRAIN_AUDIO_PATH.iterdir())
                         if 'S' in f.name]

len(train_acoustic_fnames)

54

In [None]:
train_acoustic_fnames.append([f.name for f in (TRAIN_AUDIO_PATH2.iterdir())
                         if 'S' in f.name])


In [None]:
fcd = [f.name for f in (TRAIN_AUDIO_PATH.iterdir())
                         if 'S' in f.name]

len(fcd)

fcc = [f.name for f in (TRAIN_AUDIO_PATH2.iterdir())
                         if 'S' in f.name]

len(fcc)

54

In [None]:
fnames_cc.append(fnames_cd)

In [None]:
len(fcc)

54

In [None]:
train_fnames=[]
for i in fcc:
   train_fnames.append(i)
for i in fcd:
   train_fnames.append(i)

In [None]:
train_fnames

['S001.wav',
 'S002.wav',
 'S003.wav',
 'S004.wav',
 'S005.wav',
 'S006.wav',
 'S007.wav',
 'S009.wav',
 'S011.wav',
 'S012.wav',
 'S013.wav',
 'S015.wav',
 'S016.wav',
 'S017.wav',
 'S018.wav',
 'S019.wav',
 'S020.wav',
 'S021.wav',
 'S024.wav',
 'S025.wav',
 'S027.wav',
 'S028.wav',
 'S029.wav',
 'S030.wav',
 'S032.wav',
 'S033.wav',
 'S034.wav',
 'S035.wav',
 'S036.wav',
 'S038.wav',
 'S039.wav',
 'S040.wav',
 'S041.wav',
 'S043.wav',
 'S048.wav',
 'S049.wav',
 'S051.wav',
 'S052.wav',
 'S055.wav',
 'S056.wav',
 'S058.wav',
 'S059.wav',
 'S061.wav',
 'S062.wav',
 'S063.wav',
 'S064.wav',
 'S067.wav',
 'S068.wav',
 'S070.wav',
 'S071.wav',
 'S072.wav',
 'S073.wav',
 'S076.wav',
 'S077.wav',
 'S079.wav',
 'S080.wav',
 'S081.wav',
 'S082.wav',
 'S083.wav',
 'S084.wav',
 'S086.wav',
 'S087.wav',
 'S089.wav',
 'S090.wav',
 'S092.wav',
 'S093.wav',
 'S094.wav',
 'S095.wav',
 'S096.wav',
 'S097.wav',
 'S100.wav',
 'S101.wav',
 'S103.wav',
 'S104.wav',
 'S107.wav',
 'S108.wav',
 'S110.wav',

In [None]:
def read_file(filename, path='', sample_rate=None, trim=False):
    filename = Path(path) / filename
    file_sr, data = wavfile.read(filename)
    if data.dtype == np.int16:
        data = np.float32(data) / np.iinfo(np.int16).max
    elif data.dtype != np.float32:
        raise OSError('Encounted unexpected dtype: {}'.format(data.dtype))
    if sample_rate is not None and sample_rate != file_sr:
        if len(data) > 0:
            data = librosa.core.resample(data, file_sr, sample_rate, res_type='kaiser_fast')
        file_sr = sample_rate
    if trim and len(data) > 1:
        data = librosa.effects.trim(data, top_db=40)[0]
    return data, file_sr

In [None]:
def mfcc_extraction(fname, src_path):
    #X, sample_rate = librosa.load(file_name, sr=None)
    X, sample_rate = read_file(fname, src_path)
    #mfcc = librosa.feature.mfcc(y=librosa.effects.harmonic(X), 
    #sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
    mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_fft=4096, hop_length=4096, n_mfcc=n_mfcc).T
    mfcc_delta = librosa.feature.delta(mfcc, width=5, order=1)
    len(mfcc)
    len(mfcc_delta)
    return mfcc, mfcc_delta 
    
    
    '''
    x, sample_rate = read_file(fname, src_path)
    
    n_fft = 1024
    hop_length = 256
    n_mels = 40
    fmin = 20
    fmax = sample_rate / 2 
    
    mel_spec_power = librosa.feature.melspectrogram(x, sr=sample_rate, n_fft=n_fft, 
                                                    hop_length=hop_length, 
                                                    n_mels=n_mels, power=2.0, 
                                                    fmin=fmin, fmax=fmax)
    mel_spec_db = librosa.power_to_db(mel_spec_power, ref=np.max)
    dst_fname = dst_path / (fname[:-4] + '.png')
    plt.imsave(dst_fname, mel_spec_db)
    '''

In [None]:
import glob
import numpy as np
control = glob.glob('/content/drive/MyDrive/ADReSS-IS2020-train/ADReSS-IS2020-data/train/Full_wave_enhanced_audio/cc/*.*')
dementia = glob.glob('/content/drive/MyDrive/ADReSS-IS2020-train/ADReSS-IS2020-data/train/Full_wave_enhanced_audio/cd/*.*')

import tensorflow as tf
#data = []
labels = []
for i in control:   
    labels.append('Control')
for i in dementia:   
    labels.append('Dementia')

#train_data = np.array(data)
#train_labels = np.array(labels)

In [None]:
import numpy as np
import pandas as pd

In [None]:
labels

['Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Control',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 'Dementia',
 

In [None]:
labels_train = pd.factorize(labels)[0]

In [None]:
labels_train

array([0, 0, 0, 0, ..., 1, 1, 1, 1])

In [None]:
train_labels = np.array(labels)
import keras
from sklearn import preprocessing
lb = preprocessing.LabelEncoder()
y_train=train_labels
y_train = tf.keras.utils.to_categorical(lb.fit_transform(y_train))

In [None]:
def extract_feature_means(audio_file_path: str) -> pd.DataFrame:
    # config settings
    number_of_mfcc = 40
    

    # 1. Importing 1 file
    y, sr = librosa.load(audio_file_path)

    # Trim leading and trailing silence from an audio signal (silence before and after the actual audio)
    signal, _ = librosa.effects.trim(y)

    # 2. Fourier Transform
    # Default FFT window size
    n_fft = c.N_FFT  # FFT window size
    hop_length = c.HOP_LENGTH  # number audio of frames between STFT columns (looks like a good default)

    # Short-time Fourier transform (STFT)
    d_audio = np.abs(librosa.stft(signal, n_fft=n_fft, hop_length=hop_length))
    '''
    # 3. Spectrogram
    # Convert an amplitude spectrogram to Decibels-scaled spectrogram.
    db_audio = librosa.amplitude_to_db(d_audio, ref=np.max)

    # 4. Create the Mel Spectrograms
    s_audio = librosa.feature.melspectrogram(signal, sr=sr)
    s_db_audio = librosa.amplitude_to_db(s_audio, ref=np.max)

    # 5 Zero crossings

    # #6. Harmonics and Perceptrual
    # Note:
    #
    # Harmonics are characteristichs that represent the sound color
    # Perceptrual shock wave represents the sound rhythm and emotion
    y_harm, y_perc = librosa.effects.hpss(signal)

    # 7. Spectral Centroid
    # Note: Indicates where the ”centre of mass” for a sound is located and is calculated
    # as the weighted mean of the frequencies present in the sound.

    # Calculate the Spectral Centroids
    spectral_centroids = librosa.feature.spectral_centroid(signal, sr=sr)[0]
    spectral_centroids_delta = librosa.feature.delta(spectral_centroids)
    spectral_centroids_accelerate = librosa.feature.delta(spectral_centroids, order=2)

    # spectral_centroid_feats = np.stack((spectral_centroids, delta, accelerate))  # (3, 64, xx)

    # 8. Chroma Frequencies¶
    # Note: Chroma features are an interesting and powerful representation
    # for music audio in which the entire spectrum is projected onto 12 bins
    # representing the 12 distinct semitones ( or chromas) of the musical octave.

    # Increase or decrease hop_length to change how granular you want your data to be
    hop_length = c.HOP_LENGTH

    # Chromogram
    chromagram = librosa.feature.chroma_stft(signal, sr=sr, hop_length=hop_length)

    # 9. Tempo BPM (beats per minute)¶
    # Note: Dynamic programming beat tracker.

    # Create Tempo BPM variable
    tempo_y, _ = librosa.beat.beat_track(signal, sr=sr)

    # 10. Spectral Rolloff
    # Note: Is a measure of the shape of the signal. It represents the frequency below which a specified
    #  percentage of the total spectral energy(e.g. 85 %) lies.

    # Spectral RollOff Vector
    spectral_rolloff = librosa.feature.spectral_rolloff(signal, sr=sr)[0]

    # spectral flux
    onset_env = librosa.onset.onset_strength(y=signal, sr=sr)

    # Spectral Bandwidth¶
    # The spectral bandwidth is defined as the width of the band of light at one-half the peak
    # maximum (or full width at half maximum [FWHM]) and is represented by the two vertical
    # red lines and λSB on the wavelength axis.
    spectral_bandwidth_2 = librosa.feature.spectral_bandwidth(signal, sr=sr)[0]
    spectral_bandwidth_3 = librosa.feature.spectral_bandwidth(signal, sr=sr, p=3)[0]
    spectral_bandwidth_4 = librosa.feature.spectral_bandwidth(signal, sr=sr, p=4)[0]

    audio_features = {
        "file_name": audio_file_path,
        "zero_crossing_rate": np.mean(librosa.feature.zero_crossing_rate(signal)[0]),
        "zero_crossings": np.sum(librosa.zero_crossings(signal, pad=False)),
        "spectrogram": np.mean(db_audio[0]),
        "mel_spectrogram": np.mean(s_db_audio[0]),
        "harmonics": np.mean(y_harm),
        "perceptual_shock_wave": np.mean(y_perc),
        "spectral_centroids": np.mean(spectral_centroids),
        "spectral_centroids_delta": np.mean(spectral_centroids_delta),
        "spectral_centroids_accelerate": np.mean(spectral_centroids_accelerate),
        "chroma1": np.mean(chromagram[0]),
        "chroma2": np.mean(chromagram[1]),
        "chroma3": np.mean(chromagram[2]),
        "chroma4": np.mean(chromagram[3]),
        "chroma5": np.mean(chromagram[4]),
        "chroma6": np.mean(chromagram[5]),
        "chroma7": np.mean(chromagram[6]),
        "chroma8": np.mean(chromagram[7]),
        "chroma9": np.mean(chromagram[8]),
        "chroma10": np.mean(chromagram[9]),
        "chroma11": np.mean(chromagram[10]),
        "chroma12": np.mean(chromagram[11]),
        "tempo_bpm": tempo_y,
        "spectral_rolloff": np.mean(spectral_rolloff),
        "spectral_flux": np.mean(onset_env),
        "spectral_bandwidth_2": np.mean(spectral_bandwidth_2),
        "spectral_bandwidth_3": np.mean(spectral_bandwidth_3),
        "spectral_bandwidth_4": np.mean(spectral_bandwidth_4),
    }
    '''

    # extract mfcc feature
    mfcc_df = extract_mfcc_feature_means(audio_file_path,
                                    signal,
                                    sample_rate=sr,
                                    number_of_mfcc=number_of_mfcc)

    df = pd.DataFrame.from_records(data=[audio_features])

    df = pd.merge(df, mfcc_df, on='file_name')

    return df

    # librosa.feature.mfcc(signal)[0, 0]

def extract_mfcc_feature_means(audio_file_name: str,
                          signal: np.ndarray,
                          sample_rate: int,
                          number_of_mfcc: int) -> pd.DataFrame:

    mfcc_alt = librosa.feature.mfcc(y=signal, sr=sample_rate,
                                    n_mfcc=number_of_mfcc)
    delta = librosa.feature.delta(mfcc_alt)
    accelerate = librosa.feature.delta(mfcc_alt, order=2)

    mfcc_features = {
        "file_name": audio_file_name,
    }

    for i in range(0, number_of_mfcc):
        # dict.update({'key3': 'geeks'})

        # mfcc coefficient
        key_name = "".join(['mfcc', str(i)])
        mfcc_value = np.mean(mfcc_alt[i])
        mfcc_features.update({key_name: mfcc_value})

        # mfcc delta coefficient
        key_name = "".join(['mfcc_delta_', str(i)])
        mfcc_value = np.mean(delta[i])
        mfcc_features.update({key_name: mfcc_value})

        # mfcc accelerate coefficient
        #key_name = "".join(['mfcc_accelerate_', str(i)])
        #mfcc_value = np.mean(accelerate[i])
        #mfcc_features.update({key_name: mfcc_value})

    df = pd.DataFrame.from_records(data=[mfcc_features])
    return df

In [None]:
for i in train_fnames:
  

In [None]:
def get_tdata(file_names,file_labels):
    features, features_delta, labels = np.empty((0,n_mfcc)), np.empty((0,n_mfcc)), np.empty(0)
    for fn,fl in zip(file_names,file_labels):
        tpath='/content/drive/MyDrive/ADReSS-IS2020-train/ADReSS-IS2020-data/train/Full_wave_enhanced_audio/all'
        mfcc, mfcc_delta = mfcc_extraction(fn,tpath)
        
        features = np.vstack([features, mfcc])
        features_delta = np.vstack([features_delta, mfcc_delta])
        labels = np.append(labels, fl*np.ones(mfcc.shape[0]))
    return np.array(features), np.array(features_delta), np.array(labels, dtype = np.int)

In [None]:
n_mfcc=40

x_train1, x_train2,ytrain=get_tdata(train_fnames,labels_train)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  # Remove the CWD from sys.path while we load stuff.


In [None]:
x_train1.shape

(83886, 40)

In [None]:
x_train2.shape

(83886, 40)

In [None]:
ytrain.shape

(83886,)

In [None]:
ytrain[5]

0

In [None]:
# Train classifier - MLP
print ("Training classifier...")


clf1 = MLPClassifier(hidden_layer_sizes=(40), alpha=0.01)
clf1.fit(x_train1, ytrain)

clf2 = MLPClassifier(hidden_layer_sizes=(40), alpha=1)
clf2.fit(x_train2, ytrain)

Training classifier...


MLPClassifier(alpha=1, hidden_layer_sizes=40)

In [None]:
testp='/content/drive/MyDrive/ADReSS-IS2020-test/ADReSS-IS2020-data/test/Full_wave_enhanced_audio/all'
def predict_proba(clf1, clf2, X_val):
    pred_proba = np.empty((0,2))
    for x in X_val:
        x_mfcc, x_mfcc_delta = mfcc_extraction(x,testp)
        y_pred_proba1 = np.sum(clf1.predict(x_mfcc), axis=0).reshape(-1)
        y_pred_proba2 = np.sum(clf2.predict(x_mfcc_delta), axis=0).reshape(-1)
        pred_proba = np.vstack([pred_proba,np.hstack([y_pred_proba1,y_pred_proba2])])
        #print pred_proba.shape
    return np.array(pred_proba, dtype=np.int)

In [None]:
#pred_proba_val = predict_proba(clf1, clf2, files_val)
pred_proba_test = predict_proba(clf1, clf2, test_fnames)
print (pred_proba_test.shape)
#print (labels_val.shape)

(48, 2)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  # Remove the CWD from sys.path while we load stuff.


In [None]:
#print "Score calibration..."
t0 = time()
logistic = linear_model.LogisticRegression(C=0.0001)
logistic.fit(pred_proba_test,labels_test)
#print "Done in %0.3fs." % (time()-t0)

LogisticRegression(C=0.0001)

In [None]:
logistic = linear_model.LogisticRegression(C=0.0001)
y_test_pred = logistic.predict(pred_proba_test)
np.savetxt('y_test_pred_mfcc_delta_mfcc_mlp_logistic.txt', y_test_pred, fmt='%d')
print y_test_pred

In [None]:
y_test_pred = logistic.predict(pred_proba_test)

In [None]:

from sklearn.metrics import accuracy_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import classification_report
print('Accuracy: {:.2f}'.format(accuracy_score(labels_test, y_test_pred)))

Accuracy: 0.69


In [None]:
DATA = Path('/content/drive/MyDrive/ADReSS-IS2020-test')
TEST_AUDIO_PATH = DATA/'ADReSS-IS2020-data/test/Full_wave_enhanced_audio/cd'
TEST_AUDIO_PATH2 = DATA/'ADReSS-IS2020-data/test/Full_wave_enhanced_audio/cc'
TEST_IMAGE_PATH = DATA/'ADReSS2020_images/test'
test_cd = [f.name for f in (TEST_AUDIO_PATH.iterdir())
                         if 'S' in f.name]
len(test_cd)
test_cc = [f.name for f in (TEST_AUDIO_PATH2.iterdir())
                         if 'S' in f.name]


24

In [None]:
len(test_cc)

24

In [None]:
test_fnames=[]
for i in test_cc:
   test_fnames.append(i)
for i in test_cd:
   test_fnames.append(i)

In [None]:
test_fnames


['S177.wav',
 'S161.wav',
 'S180.wav',
 'S201.wav',
 'S199.wav',
 'S166.wav',
 'S197.wav',
 'S193.wav',
 'S186.wav',
 'S175.wav',
 'S184.wav',
 'S178.wav',
 'S174.wav',
 'S163.wav',
 'S202.wav',
 'S170.wav',
 'S204.wav',
 'S160.wav',
 'S207.wav',
 'S196.wav',
 'S206.wav',
 'S183.wav',
 'S172.wav',
 'S195.wav',
 'S189.wav',
 'S185.wav',
 'S168.wav',
 'S179.wav',
 'S203.wav',
 'S173.wav',
 'S200.wav',
 'S190.wav',
 'S165.wav',
 'S194.wav',
 'S192.wav',
 'S187.wav',
 'S182.wav',
 'S181.wav',
 'S167.wav',
 'S164.wav',
 'S176.wav',
 'S162.wav',
 'S198.wav',
 'S191.wav',
 'S169.wav',
 'S188.wav',
 'S171.wav',
 'S205.wav']

In [None]:
import glob
import numpy as np
tcontrol = glob.glob('/content/drive/MyDrive/ADReSS-IS2020-test/ADReSS-IS2020-data/test/Full_wave_enhanced_audio/cc/*.*')
tdementia = glob.glob('/content/drive/MyDrive/ADReSS-IS2020-test/ADReSS-IS2020-data/test/Full_wave_enhanced_audio/cd/*.*')

import tensorflow as tf
#data = []
labelst= []
for i in tcontrol:   
    labelst.append('Control')
for i in tdementia:   
    labelst.append('Dementia')

In [None]:
labels_test = pd.factorize(labelst)[0]

In [None]:
labels_test.shape

(48,)