# Voice Activity

In [14]:
import os
import librosa
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Flatten, Dropout, Activation, Conv1D, MaxPooling1D, Dense
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [2]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# for later versions: 
# tf.compat.v1.set_random_seed(seed_value)


### Relative Path

In [3]:
cur_path = os.getcwd()

# 1. Read Sample File

In [4]:
def features_extractor(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    
    return mfccs_scaled_features

In [29]:
recordNames = os.listdir("./ses/")
SampleNumbers = {}
recordNames.remove('.DS_Store')

In [37]:
recordDatasets = {}
numberOfSamples = []
for recordName in recordNames:
    path_0 = os.path.relpath(f'./ses/{recordName}/samples_0', cur_path)
    path_1 = os.path.relpath(f'./ses/{recordName}/samples_1', cur_path)
    AudioFiles_0 = [f for f in os.listdir(path_0) if os.path.isfile(os.path.join(path_0, f))]
    AudioFiles_1 = [f for f in os.listdir(path_1) if os.path.isfile(os.path.join(path_1, f))]
    SampleNumbers[recordName] = {
        "Child" : len(AudioFiles_1),
        "NonChild" : len(AudioFiles_0)
    }
    features = []
    labels = []
    n = min(len(AudioFiles_0), len(AudioFiles_1))
    selectedSamples_0 = random.sample(AudioFiles_0, n)
    selectedSamples_1 = random.sample(AudioFiles_1, n)
    for i in range(n):
        features.append(features_extractor(path_0 + "/" + selectedSamples_0[i]))
        labels.append(0)
        features.append(features_extractor(path_1 + "/" + selectedSamples_1[i]))
        labels.append(1)
    print(f"Dataset: {recordName} Number of Samples: {len(labels)}")
    recordDatasets[f"{recordName}"] = {
        "features" : features,
        "labels" : labels
    }
    numberOfSamples.append(len(labels))


Dataset: ENG Number of Samples: 0
Dataset: 4CH072I Number of Samples: 722
Dataset: 4CH071I Number of Samples: 218
Dataset: 4CH069I Number of Samples: 1106
Dataset: 4CH067I Number of Samples: 858
Dataset: MAAP3 Number of Samples: 2184
Dataset: MAAP5 Number of Samples: 2222
Dataset: MAAP2 Number of Samples: 1964
Dataset: 4CH065I Number of Samples: 976
Dataset: 4CH066I Number of Samples: 4544
Dataset: 4CH068I Number of Samples: 3696


KeyboardInterrupt: 

In [31]:
X, y = [], []
for recordName in recordNames:
    X.extend(recordDatasets[recordName]['features'])
    y.extend(recordDatasets[recordName]['labels'])
X = np.array(X)
y = np.array(y)

In [32]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [33]:
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [34]:
model.compile(loss='binary_crossentropy',metrics=['accuracy'],optimizer='adam')

In [35]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 150
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/150
Epoch 1: val_loss improved from inf to 0.67834, saving model to saved_models/audio_classification.hdf5
Epoch 2/150
Epoch 2: val_loss improved from 0.67834 to 0.67436, saving model to saved_models/audio_classification.hdf5
Epoch 3/150
Epoch 3: val_loss improved from 0.67436 to 0.64170, saving model to saved_models/audio_classification.hdf5
Epoch 4/150
Epoch 4: val_loss improved from 0.64170 to 0.61353, saving model to saved_models/audio_classification.hdf5
Epoch 5/150
Epoch 5: val_loss improved from 0.61353 to 0.50333, saving model to saved_models/audio_classification.hdf5
Epoch 6/150
Epoch 6: val_loss improved from 0.50333 to 0.50216, saving model to saved_models/audio_classification.hdf5
Epoch 7/150
Epoch 7: val_loss improved from 0.50216 to 0.45863, saving model to saved_models/audio_classification.hdf5
Epoch 8/150
Epoch 8: val_loss improved from 0.45863 to 0.44172, saving model to saved_models/audio_classification.hdf5
Epoch 9/150
Epoch 9: val_loss did not improve from 0

In [36]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.855461835861206
