In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn

import librosa
import librosa.display
import IPython.display as ipd
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm


# **EXTRACTING AUDIO FEATURES**

In [2]:
def code_extractor(filename):
    cnt=0
    str = ''
    for ch in filename:
        if(ch=='_'):
            cnt+=1;
        elif(cnt==2):
            str += ch
    return str

In [3]:
def feature_extractor(y, sr):
    
    S = np.abs(librosa.stft(y))
    
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    tonnetz_mean = np.mean(tonnetz.T, axis=0)
    tonnetz_var = np.var(tonnetz.T, axis=0)
    features = np.append(tonnetz_mean, tonnetz_var)
    
    #zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    #zero_crossing_rate_mean = np.mean(zero_crossing_rate)
    #zero_crossing_rate_var = np.var(zero_crossing_rate)
    #features = np.append(features, [zero_crossing_rate_mean, zero_crossing_rate_var])
    
    spec_centroid = librosa.feature.spectral_centroid(sr, S=S)
    spec_centroid_mean = np.mean(spec_centroid, axis = 1)
    spec_centroid_var = np.var(spec_centroid, axis = 1)
    features = np.append(features, [spec_centroid_mean, spec_centroid_var])

    mfcc = librosa.feature.mfcc(sr, S=S)
    mfcc_mean = np.mean(mfcc.T, axis = 0)
    mfcc_var = np.var(mfcc.T, axis = 0)
    features = np.append(features, mfcc_mean)
    features = np.append(features, mfcc_var)

    spec_width = librosa.feature.spectral_bandwidth(sr, S=S)
    spec_width_mean = np.mean(spec_width)
    spec_width_var = np.var(spec_width)
    features = np.append(features, [spec_width_mean, spec_width_var])
    
    spec_contrast = librosa.feature.spectral_contrast(sr, S=S)
    spec_contrast_mean = np.mean(spec_contrast.T, axis = 0)
    spec_contrast_var= np.var(spec_contrast.T, axis = 0)
    features = np.append(features, spec_contrast_mean)
    features = np.append(features, spec_contrast_var)
    
    return features
    
    

In [4]:
import os

xl = pd.read_excel('/kaggle/input/voice/BVC_Voice_Bio_Public.xlsx')
data = pd.DataFrame()
label = pd.DataFrame()

for dirname, _, filenames in os.walk('/kaggle/input/voice/multiple_sentences'):
    for filename in filenames:
        y, sr = librosa.load(os.path.join(dirname, filename))
        y = librosa.effects.harmonic(y)
        data = data.append([feature_extractor(y, sr)])
        label = label.append([xl[xl['New_ID']==int(code_extractor(filename))].Sex.item()])
data.insert(70, 'Sex', label[0])

In [5]:
columns = ['tonnetz1_mean', 'tonnetz2_mean', 'tonnetz3_mean', 'tonnetz4_mean', 
           'tonnetz5_mean', 'tonnetz6_mean', 'tonnetz1_var', 'tonnetz2_var', 
           'tonnetz3_var', 'tonnetz4_var', 'tonnetz5_var', 'tonnetz6_var', 
           'spec_centroid_mean', 'spec_centroid_var', 
           'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean', 'mfcc4_mean', 'mfcc5_mean', 
           'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean', 'mfcc9_mean', 'mfcc10_mean', 
           'mfcc11_mean', 'mfcc12_mean', 'mfcc13_mean', 'mfcc14_mean', 'mfcc15_mean', 
           'mfcc16_mean', 'mfcc17_mean', 'mfcc18_mean', 'mfcc19_mean', 'mfcc20_mean',
           'mfcc1_var', 'mfcc2_var', 'mfcc3_var', 'mfcc4_var', 'mfcc5_var', 
           'mfcc6_var', 'mfcc7_var', 'mfcc8_var', 'mfcc9_var', 'mfcc10_var', 
           'mfcc11_var', 'mfcc12_var', 'mfcc13_var', 'mfcc14_var', 'mfcc15_var', 
           'mfcc16_var', 'mfcc17_var', 'mfcc18_var', 'mfcc19_var', 'mfcc20_var',
           'spec_width_mean', 'spec_width_var', 'spec_contrast1_mean', 'spec_contrast2_mean', 
           'spec_contrast3_mean', 'spec_contrast4_mean', 'spec_contrast5_mean', 'spec_contrast6_mean', 
           'spec_contrast7_mean', 'spec_contrast_var1', 'spec_contrast2_var', 'spec_contrast3_var', 
           'spec_contrast4_var', 'spec_contrast5_var', 'spec_contrast6_var', 'spec_contrast7_var', 'Sex'
          ]
columns = np.asarray(columns)
data.columns = columns;
data.head()

Unnamed: 0,tonnetz1_mean,tonnetz2_mean,tonnetz3_mean,tonnetz4_mean,tonnetz5_mean,tonnetz6_mean,tonnetz1_var,tonnetz2_var,tonnetz3_var,tonnetz4_var,...,spec_contrast6_mean,spec_contrast7_mean,spec_contrast_var1,spec_contrast2_var,spec_contrast3_var,spec_contrast4_var,spec_contrast5_var,spec_contrast6_var,spec_contrast7_var,Sex
0,-0.00941,0.053376,0.026745,-0.044762,0.03128,0.01468,0.012448,0.012119,0.049109,0.028018,...,18.628574,48.671432,15.539253,54.773673,48.444731,26.35616,7.896053,11.567927,17.568051,'Female'
0,-0.04291,0.004391,0.176053,0.13949,-0.042872,-0.064765,0.01627,0.017637,0.04816,0.024027,...,17.982232,48.076976,10.717954,45.402982,40.660772,46.326461,30.462287,10.307349,25.963423,'Female'
0,0.017443,0.064028,0.068876,0.050504,0.018636,-0.041725,0.011385,0.016506,0.064162,0.032702,...,18.246863,48.626715,17.902991,44.00962,42.72202,43.383776,3.149878,1.974796,34.722975,'Female'
0,-0.05131,0.078133,0.147172,0.109493,-0.009415,-0.062249,0.028554,0.031754,0.065372,0.061447,...,17.772994,47.649859,14.81402,48.167825,62.377584,68.938358,18.895308,6.682546,30.315143,'Female'
0,-0.025493,0.000314,-0.031613,0.074925,-0.02365,-0.008657,0.011211,0.041288,0.025275,0.059714,...,20.186235,45.969501,14.739871,47.084556,65.262772,36.726145,29.365609,13.467612,54.582597,'Female'


In [6]:
data.to_csv('sample', encoding='utf-8', index=False)
data.to_excel('sampleXL.xlsx', sheet_name='sheet1', index=False)

# **MACHINE LEARNING CLASSIFICATION**

In [7]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn import preprocessing
from sklearn.model_selection import train_test_split


In [8]:
y = data['Sex']
X = data.loc[:, data.columns != 'Sex']

col = X.columns
X = preprocessing.MinMaxScaler().fit_transform(X)
X = pd.DataFrame(X, columns = col)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [10]:
def model_assess(model, title = "Default"):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print('Accuracy', title, ':', round(accuracy_score(y_test, preds), 5), '\n')

In [11]:
nb = GaussianNB()
model_assess(nb, "Naive Bayes")

sgd = SGDClassifier(max_iter=5000, random_state=0)
model_assess(sgd, "Stochastic Gradient Descent")

knn = KNeighborsClassifier(n_neighbors=19)
model_assess(knn, "KNN")

tree = DecisionTreeClassifier()
model_assess(tree, "Decission trees")

rforest = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
model_assess(rforest, "Random Forest")

svm = SVC()
model_assess(svm, "Support Vector Machine")

lg = LogisticRegression()
model_assess(lg, "Logistic Regression")


Accuracy Naive Bayes : 0.8015 

Accuracy Stochastic Gradient Descent : 0.92697 

Accuracy KNN : 0.91011 

Accuracy Decission trees : 0.84644 

Accuracy Random Forest : 0.897 

Accuracy Support Vector Machine : 0.92135 

Accuracy Logistic Regression : 0.92322 

