In [1]:
import numpy as np
import pandas as pd
from scipy.io import wavfile
from glob import glob
from matplotlib import pyplot as plt
plt.style.use('seaborn')
from tqdm import tqdm
from IPython.display import clear_output

# Feature extractors
import pyloudnorm as pyln
from librosa.core import lpc
from librosa.feature import (mfcc, rms, spectral_centroid, spectral_bandwidth,
                             spectral_contrast, spectral_flatness, spectral_rolloff,
                             tonnetz, zero_crossing_rate)
from Signal_Analysis.features.signal import (get_F_0, get_Jitter, get_Pulses, get_HNR)

In [2]:
# Collect all wavefiles
male_files = glob('../data/gender_voice/male/wav/*.wav')
female_files = glob('../data/gender_voice/female/wav/*.wav')

In [13]:
# Feature extractor

class FeatureExtractor():
    
    def __init__(self, filepaths):
        self.paths = filepaths
        self.wave = None
        self.fs = None
        
    def _mfcc(self):
        return mfcc(self.wave, self.fs).mean(axis=1)
    
    def _rms(self):
        mean_rms, std_rms = rms(self.wave).mean(), rms(self.wave).std()
        return mean_rms, std_rms
    
    def _spectral_centroid(self):
        mean_sc = spectral_centroid(self.wave, self.fs).mean()
        std_sc = spectral_centroid(self.wave, self.fs).std()
        return mean_sc, std_sc
    
    def _spectral_bandwidth(self):
        mean_sb = spectral_bandwidth(self.wave, self.fs).mean()
        std_sb = spectral_bandwidth(self.wave, self.fs).std()
        return mean_sb, std_sb
    
    def _spectral_flatness(self):
        mean_sf = spectral_flatness(self.wave).mean()
        std_sf = spectral_flatness(self.wave).std()
        return mean_sf, std_sf
    
    def _spectral_rolloff(self, roll_percent):
        mean_ro = spectral_rolloff(self.wave, self.fs, roll_percent=roll_percent).mean()
        std_ro = spectral_rolloff(self.wave, self.fs, roll_percent=roll_percent).std()
        return mean_ro, std_ro
    
    def _zero_crossing_rate(self):
        mean_zcr = zero_crossing_rate(self.wave).mean()
        std_zcr = zero_crossing_rate(self.wave).std()
        return mean_zcr, std_zcr
    
    def _spectral_contrast(self):
        return spectral_contrast(self.wave, self.fs).mean(axis=1)
    
    def _f0(self):
        return get_F_0(self.wave, self.fs)[0]
    
    def _jitter(self):
        loc, loc_abs, rap, ppq5, ddp = list(get_Jitter(self.wave, self.fs).values())
        return loc, loc_abs, rap, ppq5, ddp
    
    def _hnr(self):
        return get_HNR(self.wave, self.fs)
    
    def _pulses(self):
        mean_pulses = get_Pulses(self.wave, self.fs).mean()
        std_pulses = get_Pulses(self.wave, self.fs).std()
        return mean_pulses, std_pulses
    
    def generate(self):
        all_features = []
        
        for path in tqdm(self.paths, position=0, leave=True):
            feat = []
            fs, wave = wavfile.read(path)
            self.wave = np.asarray(wave).astype('float')
            self.fs = np.asarray(fs).astype('int')
            
            # Create features
            mfcc_feat = self._mfcc()
            rms_mean, rms_std = self._rms()
            sc_mean, sc_std = self._spectral_centroid()
            sb_mean, sb_std = self._spectral_bandwidth()
            sf_mean, sf_std = self._spectral_flatness()
            ro_mean_25, ro_std_25 = self._spectral_rolloff(roll_percent=0.25)
            ro_mean_50, ro_std_50 = self._spectral_rolloff(roll_percent=0.5)
            ro_mean_75, ro_std_75 = self._spectral_rolloff(roll_percent=0.75)
            ro_mean_90, ro_std_90 = self._spectral_rolloff(roll_percent=0.9)
            zcr_mean, zcr_std = self._zero_crossing_rate()
            spectral_contrast = self._spectral_contrast()
            F0 = self._f0()
            loc, loc_abs, rap, ppq5, ddp = self._jitter()
            hn_ratio = self._hnr() 
            mean_pulses, std_pulses = self._pulses()
            
            # Add all the features
            assert len(mfcc_feat) > 1
            assert len(spectral_contrast) > 1
            
            for i in range(len(mfcc_feat)):
                feat.append(mfcc_feat[i])
            for i in range(len(spectral_contrast)):
                feat.append(spectral_contrast[i])
            feat.append(rms_mean); feat.append(rms_std)
            feat.append(sc_mean); feat.append(sc_std)
            feat.append(sb_mean); feat.append(sb_std)
            feat.append(sf_mean); feat.append(sf_std)
            feat.append(ro_mean_25); feat.append(ro_std_25)
            feat.append(ro_mean_50); feat.append(ro_std_50)
            feat.append(ro_mean_75); feat.append(ro_std_75)
            feat.append(ro_mean_90); feat.append(ro_std_90)
            feat.append(zcr_mean); feat.append(zcr_std)
            feat.append(F0)
            feat.append(loc); feat.append(loc_abs); feat.append(rap)
            feat.append(ppq5); feat.append(ddp)
            feat.append(hn_ratio)
            feat.append(mean_pulses); feat.append(std_pulses)
            
            # Append to all features
            all_features.append(feat)
            clear_output()
            
        return np.array(all_features)

In [14]:
# Get features
male_fe = FeatureExtractor(male_files)
male_features = male_fe.generate()

female_fe = FeatureExtractor(female_files)
female_features = female_fe.generate()

100%|██████████| 1132/1132 [22:42<00:00,  1.20s/it]


In [21]:
# Male df
male_cols = ['f_'+str(i) for i in range(male_features.shape[1])]
female_cols = ['f_'+str(i) for i in range(female_features.shape[1])]

male_df = pd.DataFrame(male_features, columns=male_cols)
male_df['target'] = 0
female_df = pd.DataFrame(female_features, columns=female_cols)
female_df['target'] = 1

In [22]:
male_df.to_csv('.././gen_data/male_df.csv', index=False)
female_df.to_csv('.././gen_data/female_df.csv', index=False)

# Vanilla Models

In [49]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

In [33]:
data = pd.concat([male_df, female_df], axis=0)
X, y = data.drop(['target'], axis=1), data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y)

In [68]:
clf = LogisticRegression(C=1e-24)
clf.fit(X_train, y_train)

LogisticRegression(C=1e-24, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [69]:
test_preds = clf.predict(X_test)

In [70]:
accuracy_score(y_test, test_preds)

0.5