# Custom

## Loading the packages

In [77]:
import numpy as np
from matplotlib import pyplot as pl
import os
import scipy.io.wavfile as wav

# For model selection
import sys
from ipywidgets import interact, widgets

%matplotlib inline

PATH = 'vowels/'

In [78]:
!pip install python_speech_features
from python_speech_features import mfcc

Defaulting to user installation because normal site-packages is not writeable


## Create the dataset

In [79]:

def create_dataset():
    
    input_data = []
    output_data = []
    
    counter_man = 0
    counter_woman = 0
    dataset = []
    
    # iterate over files in vowels/
    for filename in os.listdir(PATH):
        if filename.startswith('nam'):#man
            counter_man = counter_man + 1
            output_data.append(1)
        elif filename.startswith('naf'): #woman
            counter_woman = counter_woman + 1
            output_data.append(-1)
        else:
            continue
        
        # Generate wav file
        sample_rate, X = wav.read(os.path.join(PATH, filename))
        
        # Calculate MFCC coefficients
        ceps = mfcc(X, samplerate=sample_rate, nfft=1024)
        
        # Calculate the features being used to train the model 
        # On doit en choisir un !
        ceps_avg = np.mean(ceps, axis=0)
        #ceps_std = np.std(ceps, axis=0)
        
        input_data.append(ceps_avg)
        
        
    print("Number of men   : ", counter_man)
    print("Number of woman : ", counter_woman)
    
    input_data = np.vstack(input_data)
    output_data = np.vstack(output_data)
    
    dataset = np.concatenate((input_data, output_data), axis=1)

    return dataset
    
        
        
dataset = create_dataset()

Number of men   :  36
Number of woman :  36


## Model selection

In [80]:
# MLP
import mlp_backprop_momentum as mlp

In [81]:
import k_fold_cross_validation as cv

In [82]:
N_INITS = 10
EPOCHS = 200
N_NEURONS = [2, 4, 8, 16, 32]
LEARNING_RATE = 0.001
MOMENTUM = 0.5
K = 5

### Final model

In [83]:
nn = mlp.MLP([13,15,1], 'tanh')

In [84]:
MSE_train, MSE_test, conf_mat = cv.k_fold_cross_validation(nn,
                                                          dataset,
                                                          k=K,
                                                          learning_rate=LEARNING_RATE,
                                                          momentum=MOMENTUM,
                                                          epochs=70,
                                                          threshold=0.0)

In [85]:
# Just to show that the model is accurate
print('MSE training: ', MSE_train)
print('MSE test: ', MSE_test)
print('Confusion matrix:')
print(conf_mat)

MSE training:  0.0094680435979656
MSE test:  0.18840022707394613
Confusion matrix:
[[33.  3.]
 [ 2. 34.]]


In [86]:
PATH_CUSTOM = 'custom_vowels/'


def man_or_woman(filename):
    sample_rate, X = wav.read(os.path.join(PATH_CUSTOM, filename))
    ceps = mfcc(X, samplerate=sample_rate, nfft=1200)
    ceps_avg = np.mean(ceps, axis=0)
    
    result = nn.predict(ceps_avg)
    
    print("Filename :", filename)
    
    if result < 0 :
        print("is a woman \n")
    else :
        print("is a man \n")

        
for filename in os.listdir(PATH_CUSTOM):
    man_or_woman(filename)
    




Filename : man_axel3.wav
is a man 

Filename : woman_jess2.wav
is a woman 

Filename : man_axel1.wav
is a man 

Filename : woman_jess3.wav
is a woman 

Filename : woman_jess1.wav
is a woman 

Filename : man_nico1.wav
is a woman 

Filename : man_axel2.wav
is a man 

