# Imports:

In [1]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.model_selection import train_test_split 
import os
import pandas as pd
import librosa
import numpy as np
from sklearn import metrics
import scipy
import scipy.signal
import scipy.fftpack

# Data Frames:

## Dina Hussam

In [2]:
filelist = os.listdir('voice_dataset//Dina Hussam')
df_dina = pd.DataFrame(filelist)
df_dina['label']='0'
df_dina = df_dina.rename(columns={0:'file'})
df_dina

Unnamed: 0,file,label
0,dina_10.wav,0
1,dina_11.wav,0
2,dina_12.wav,0
3,dina_13.wav,0
4,dina_14.wav,0
5,dina_15.wav,0
6,dina_16.wav,0
7,dina_17.wav,0
8,dina_18.wav,0
9,dina_19.wav,0


## Romisaa Elsaidy

In [3]:
filelist= os.listdir('voice_dataset//Romisaa Elsaidy')
df_romisaa = pd.DataFrame(filelist)
df_romisaa['label']='1'
df_romisaa = df_romisaa.rename(columns={0:'file'})
df_romisaa

Unnamed: 0,file,label
0,close (14).wav,1
1,close (15).wav,1
2,close (16).wav,1
3,open (1).wav,1
4,open (10).wav,1
5,open (11).wav,1
6,open (12).wav,1
7,open (13).wav,1
8,open (14).wav,1
9,open (15).wav,1


## Youssef Shaban

In [4]:
filelist = os.listdir('voice_dataset//Youssef Shaban')
df_youssef = pd.DataFrame(filelist)
df_youssef['label']='2'
df_youssef = df_youssef.rename(columns={0:'file'})
df_youssef

Unnamed: 0,file,label
0,youssef_1.wav,2
1,youssef_10.wav,2
2,youssef_11.wav,2
3,youssef_12.wav,2
4,youssef_13.wav,2
5,youssef_14.wav,2
6,youssef_15.wav,2
7,youssef_16.wav,2
8,youssef_17.wav,2
9,youssef_18.wav,2


## Others

In [5]:
filelist=os.listdir('voice_dataset//Other')
#read them into pandas
df_others = pd.DataFrame(filelist)
df_others['label']='3'
df_others = df_others.rename(columns={0:'file'})
df_others

Unnamed: 0,file,label
0,abdelrahmans1.wav,3
1,abdelrahmans2.wav,3
2,abdelrahmans28.wav,3
3,abdelrahmans29.wav,3
4,abdelrahmans30.wav,3
5,AuphonicRecording_27.wav,3
6,AuphonicRecording_28.wav,3
7,AuphonicRecording_29.wav,3
8,AuphonicRecording_30.wav,3
9,close12.wav,3


# Concatenate the Data Frames:

In [6]:
df = pd.concat([df_dina, df_romisaa,df_youssef,df_others], ignore_index=True)
df.head()

Unnamed: 0,file,label
0,dina_10.wav,0
1,dina_11.wav,0
2,dina_12.wav,0
3,dina_13.wav,0
4,dina_14.wav,0


# Extract Features:

## MFCC Features

In [7]:
def power_to_db(S, ref=1.0, amin=1e-10, top_db=80.0):
    S = np.asarray(S)
    magnitude = S
    ref_value = np.abs(1.0)
    log_spec = 10.0 * np.log10(np.maximum(1e-10, magnitude))
    log_spec -= 10.0 * np.log10(np.maximum(1e-10, ref_value))
    log_spec = np.maximum(log_spec, log_spec.max() - top_db)
    return log_spec

In [8]:
def expand_to(x, ndim, axes):
    # Force axes into a tuple
    try:
        axes = tuple(axes)
    except TypeError:
        axes = tuple([axes])

    shape = [1] * ndim
    for i, axi in enumerate(axes):
        shape[axi] = x.shape[i]

    return x.reshape(shape)

In [9]:
def mfcc(y=None, sr=22050, S=None, n_mfcc=20, dct_type=2, norm="ortho", lifter=0, **kwargs):
    if S is None:
        # multichannel behavior may be different due to relative noise floor differences between channels
        S = power_to_db(melspectrogram(y=y, sr=sr, **kwargs))

    M = scipy.fftpack.dct(S, axis=-2, type=dct_type, norm=norm)[..., :n_mfcc, :]

    if lifter > 0:
        # shape lifter for broadcasting
        LI = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) / lifter)
        LI = expand_to(LI, ndim=S.ndim, axes=-2)

        M *= 1 + (lifter / 2) * LI
        return M
    elif lifter == 0:
        return M

## Chroma STFT Features

## Melspectrogram Features

In [10]:
def _spectrogram(y=None, S=None, n_fft=2048, hop_length=512, power=1, win_length=None, window="hann", center=True,
                 pad_mode="constant"):

    if S is not None:
        # Infer n_fft from spectrogram shape, but only if it mismatches
        if n_fft // 2 + 1 != S.shape[-2]:
            n_fft = 2 * (S.shape[-2] - 1)
    else:
        # Otherwise, compute a magnitude spectrogram from input
        S = (
            np.abs(
                librosa.stft(
                    y,
                    n_fft=n_fft,
                    hop_length=hop_length,
                    win_length=win_length,
                    center=center,
                    window=window,
                    pad_mode=pad_mode,
                )
            )
            ** power
        )

    return S, n_fft

In [11]:
def fft_frequencies(sr=22050, n_fft=2048):
    return np.fft.rfftfreq(n=n_fft, d=1.0 / sr)

In [12]:
def mel_frequencies(n_mels=128, fmin=0.0, fmax=11025.0, htk=False):
    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = hz_to_mel(fmin, htk=htk)
    max_mel = hz_to_mel(fmax, htk=htk)

    mels = np.linspace(min_mel, max_mel, n_mels)

    return mel_to_hz(mels, htk=htk)

In [13]:
def hz_to_mel(frequencies, htk=False):
    frequencies = np.asanyarray(frequencies)

    if htk:
        return 2595.0 * np.log10(1.0 + frequencies / 700.0)

    # Fill in the linear part
    f_min = 0.0
    f_sp = 200.0 / 3

    mels = (frequencies - f_min) / f_sp

    # Fill in the log-scale part

    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = np.log(6.4) / 27.0  # step size for log region

    if frequencies.ndim:
        # If we have array data, vectorize
        log_t = frequencies >= min_log_hz
        mels[log_t] = min_log_mel + np.log(frequencies[log_t] / min_log_hz) / logstep
    elif frequencies >= min_log_hz:
        # If we have scalar data, heck directly
        mels = min_log_mel + np.log(frequencies / min_log_hz) / logstep

    return mels

In [14]:
def mel_to_hz(mels, htk=False):
    mels = np.asanyarray(mels)

    if htk:
        return 700.0 * (10.0 ** (mels / 2595.0) - 1.0)

    # Fill in the linear scale
    f_min = 0.0
    f_sp = 200.0 / 3
    freqs = f_min + f_sp * mels

    # And now the nonlinear scale
    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = np.log(6.4) / 27.0  # step size for log region

    if mels.ndim:
        # If we have vector data, vectorize
        log_t = mels >= min_log_mel
        freqs[log_t] = min_log_hz * np.exp(logstep * (mels[log_t] - min_log_mel))
    elif mels >= min_log_mel:
        # If we have scalar data, check directly
        freqs = min_log_hz * np.exp(logstep * (mels - min_log_mel))

    return freqs

In [15]:
def mel(sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False, norm="slaney", dtype=np.float32):
    if fmax is None:
        fmax = float(sr) / 2

    # Initialize the weights
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)

    fdiff = np.diff(mel_f)
    ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # .. then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper))

    if norm == "slaney":
        # Slaney-style mel is scaled to be approx constant energy per channel
        enorm = 2.0 / (mel_f[2 : n_mels + 2] - mel_f[:n_mels])
        weights *= enorm[:, np.newaxis]
    else:
        weights = normalize(weights, norm=norm, axis=-1)

    return weights

In [16]:
def melspectrogram(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window="hann", center=True,
                   pad_mode="constant", power=2.0, **kwargs):

    S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length, power=power, win_length=win_length, window=window,
                            center=center,pad_mode=pad_mode,)

    # Build a Mel filter
    mel_basis = mel(sr=sr, n_fft=n_fft, **kwargs)

    return np.einsum("...ft,mf->...mt", S, mel_basis, optimize=True)

## Spectral Contrast Features

## tonnetz Features

## Main Features:

In [17]:
def extract_features(files):
    global name
    try:
        # Sets the name to be the path to where the file is in my computer
        file_name = os.path.join(os.path.abspath('voice_dataset/{}').format(name)+ ('\\') +str(files['file']))

        # Loads the audio file as a floating point time series and assigns the default sample rate
        # Sample rate is set to 22050 by default
        X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        
    except:
        print(files['file'])
        
        # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series 
    mfccs = np.mean(mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)

        # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))

        # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)

        # Computes a mel-scaled spectrogram.
    mel = np.mean(melspectrogram(X, sr=sample_rate).T,axis=0)

        # Computes spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)

        # Computes the tonal centroid features (tonnetz)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)

        # We add also the classes of each file as a label at the end
    if name == "all":
        label = files.label
        return mfccs, chroma, mel, contrast, tonnetz, label
    else:
        return mfccs, chroma, mel, contrast, tonnetz

# Concatenate Feature Function:

In [18]:
def feat(features_label):
    features = []
    for i in range(0, len(features_label)):
        features.append(np.concatenate((features_label[i][0], features_label[i][1], 
                features_label[i][2], features_label[i][3],
                features_label[i][4]), axis=0))
    return features    

In [19]:
name = "all"
features_label = df.apply(extract_features, axis=1)
features = feat(features_label)

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


# Decision Tree Model:

In [20]:
X_train, X_test, y_train, y_test = train_test_split(features,df['label'], test_size=0.3, random_state=1)

In [21]:
clf = DecisionTreeClassifier(criterion="entropy", max_depth=20)
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

In [22]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9803921568627451


# Test Model:

## Data Frame Test

In [23]:
filelist = os.listdir('voice_dataset//test') 

#read them into pandas
df_test = pd.DataFrame(filelist)
df_test = df_test.rename(columns={0:'file'})
df_test

Unnamed: 0,file
0,dina_out_dataset.wav
1,other1.wav
2,other2.wav
3,romisaa_out_dataset.wav
4,romisaa_whats.wav
5,youssef_whats1.wav
6,youssef_whats2.wav


## Extract Features Test Function

## Prediction

In [24]:
name = "test"
features_label2 = df_test.apply(extract_features, axis=1)
features=feat(features_label2)
y_pred = clf.predict(features)
y_pred

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


array(['0', '3', '3', '1', '2', '2', '2'], dtype=object)