In [1]:
import pandas as pd
import numpy as np
import os
import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from sklearn.model_selection import train_test_split

import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt


In [2]:
df = pd.read_csv("balanced-all.csv")
df.head()

Unnamed: 0,filename,gender
0,data/cv-other-train/sample-069205.npy,female
1,data/cv-valid-train/sample-063134.npy,female
2,data/cv-other-train/sample-080873.npy,female
3,data/cv-other-train/sample-105595.npy,female
4,data/cv-valid-train/sample-144613.npy,female


In [3]:
# get total samples
n_samples = len(df)
# get total male samples
n_male_samples = len(df[df['gender'] == 'male'])
# get total female samples
n_female_samples = len(df[df['gender'] == 'female'])
print("Total samples:", n_samples)
print("Total male samples:", n_male_samples)
print("Total female samples:", n_female_samples)

Total samples: 66938
Total male samples: 33469
Total female samples: 33469


In [4]:
label2int = {
    "male": 1,
    "female": 0
}

def load_data(vector_length=128):
    """A function to load gender recognition dataset from `data` folder
    After the second run, this will load from results/features.npy and results/labels.npy files
    as it is much faster!"""
    # make sure results folder exists
    if not os.path.isdir("results"):
        os.mkdir("results")
    # if features & labels already loaded individually and bundled, load them from there instead
    if os.path.isfile("results/features.npy") and os.path.isfile("results/labels.npy"):
        X = np.load("results/features.npy")
        y = np.load("results/labels.npy")
        return X, y
    # read dataframe
    df = pd.read_csv("balanced-all.csv")
    # get total samples
    n_samples = len(df)
    # get total male samples
    n_male_samples = len(df[df['gender'] == 'male'])
    # get total female samples
    n_female_samples = len(df[df['gender'] == 'female'])
    print("Total samples:", n_samples)
    print("Total male samples:", n_male_samples)
    print("Total female samples:", n_female_samples)
    # initialize an empty array for all audio features
    X = np.zeros((n_samples, vector_length))
    # initialize an empty array for all audio labels (1 for male and 0 for female)
    y = np.zeros((n_samples, 1))
    for i, (filename, gender) in tqdm.tqdm(enumerate(zip(df['filename'], df['gender'])), "Loading data", total=n_samples):
        features = np.load(filename)
        X[i] = features
        y[i] = label2int[gender]
    # save the audio features and labels into files
    # so we won't load each one of them next run
    np.save("results/features", X)
    np.save("results/labels", y)
    return X, y

In [5]:
def split_data(X, y, test_size=0.1, valid_size=0.1):
    # split training set and testing set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=7)
    # split training set and validation set
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=valid_size, random_state=7)
    # return a dictionary of values
    return {
        "X_train": X_train,
        "X_valid": X_valid,
        "X_test": X_test,
        "y_train": y_train,
        "y_valid": y_valid,
        "y_test": y_test
    }

In [6]:
# load the dataset
X, y = load_data()
# split the data into training, validation and testing sets
data = split_data(X, y, test_size=0.1, valid_size=0.1)

In [7]:
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner import HyperParameters
import tensorflow as tf

def build_model(hp,vector_length=128):
    drop_out_rate = hp.Float('dropout_rate', 0.2, 0.5,0.1)
    model = Sequential()
    model.add(layers.Dense(units=hp.Int('units_input', 32, 512, 32),
                               input_shape=(vector_length,),
                               activation=hp.Choice('activation_input', ['relu','tanh','sigmoid']
                               )))
    model.add(Dropout(drop_out_rate))
    
    for i in range(hp.Int('num_layers_dense', 1, 5, 1)):
        model.add(Dense(units=hp.Int(f"units_dense_{i}", 32, 512, 32),
                              activation=hp.Choice(f"activation_dense_{i}", [ 'relu','tanh','sigmoid'])))
        model.add(Dropout(drop_out_rate))

    model.add(layers.Dense(1,activation="sigmoid"))

    model.compile(optimizer=hp.Choice('optimizer', ['adam', 'rmsprop']),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

  from kerastuner import HyperParameters


In [8]:
from kerastuner import RandomSearch

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='my_dir',
    overwrite=True,
    project_name='helloworld')

In [9]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1)

In [10]:
tuner.search(data["X_train"], data["y_train"],epochs=50,validation_data=(data["X_valid"], data["y_valid"]), callbacks=[stop_early])

Trial 10 Complete [00h 03m 45s]
val_accuracy: 0.9268049597740173

Best val_accuracy So Far: 0.9415767788887024
Total elapsed time: 00h 46m 55s
INFO:tensorflow:Oracle triggered exit


In [11]:
tuner.results_summary()

Results summary
Results in my_dir\helloworld
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x000001C853FEA310>
Trial summary
Hyperparameters:
dropout_rate: 0.2
units_input: 288
activation_input: relu
num_layers_dense: 3
units_dense_0: 192
activation_dense_0: tanh
optimizer: adam
units_dense_1: 288
activation_dense_1: relu
units_dense_2: 32
activation_dense_2: relu
Score: 0.9415767788887024
Trial summary
Hyperparameters:
dropout_rate: 0.30000000000000004
units_input: 448
activation_input: relu
num_layers_dense: 4
units_dense_0: 288
activation_dense_0: tanh
optimizer: adam
units_dense_1: 224
activation_dense_1: sigmoid
units_dense_2: 224
activation_dense_2: tanh
units_dense_3: 320
activation_dense_3: tanh
Score: 0.940248966217041
Trial summary
Hyperparameters:
dropout_rate: 0.2
units_input: 352
activation_input: tanh
num_layers_dense: 1
units_dense_0: 384
activation_dense_0: sigmoid
optimizer: adam
units_dense_1: 320
activation_dense_1: relu
units_dense_2: 96
a

In [12]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=10)[0]

In [24]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(data["X_train"], data["y_train"], epochs=50, validation_data=(data["X_valid"], data["y_valid"]))

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Best epoch: 48


In [14]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(data["X_train"], data["y_train"], epochs=best_epoch, validation_split=0.2)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1c85d271b80>

In [15]:
eval_result = hypermodel.evaluate(data["X_train"], data["y_train"])
print("[test loss, test accuracy]:", eval_result)

[test loss, test accuracy]: [0.12227419018745422, 0.955495297908783]


In [16]:
# test

In [17]:
import numpy
p = numpy.load('sample-000005.npy');
# print(p)

# predict the gender!
male_prob = model.predict(p.reshape(1,-1))[0][0]
female_prob = 1 - male_prob
gender = "male" if male_prob > female_prob else "female"
# show the result!
print("Result:", gender)
print(f"Probabilities::: Male: {male_prob*100:.2f}%    Female: {female_prob*100:.2f}%")

Result: male
Probabilities::: Male: 99.64%    Female: 0.36%


In [18]:
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    X, sample_rate = librosa.core.load(file_name)
    if chroma or contrast:
        stft = np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
        result = np.hstack((result, mel))
    if contrast:
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, contrast))
    if tonnetz:
        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
        result = np.hstack((result, tonnetz))
    return result

In [20]:
# import numpy
# h=extract_feature('test_data_before_masking/male/5639-40744-0008.flac',mel=True)
# print(len(h))
# # predict the gender!
# male_prob = model.predict(h.reshape(1,-1))[0][0]
# female_prob = 1 - male_prob
# gender = "male" if male_prob > female_prob else "female"
# # show the result!
# print("Result:", gender)
# print(f"Probabilities::: Male: {male_prob*100:.2f}%    Female: {female_prob*100:.2f}%")

In [None]:
import os
import csv
import re

def read_text_gt(datapath):
    
    gt_path = os.path.join(datapath, "SPEAKERS.TXT")
    with open(gt_path, 'r') as file:
        lines = file.readlines()
    speaker_dict = {}
    for num, line in enumerate(lines):
        if num<12:
            continue
        items = line.strip().split(' ')
        key = items[0]
        gender = next((char for char in line if char in ["F", "M"]), None)
        speaker_dict[key] = gender

    audio_dict = {}
    for root, dirs, files in os.walk(datapath):
        for file in files:
            if file.endswith('.flac'):
                key = file.split('-')[0].split('_')[-1]
                audio_dict[os.path.join(root, file)]=speaker_dict[key]
    
    return audio_dict

In [None]:
a=read_text_gt(r'C:\Users\ntyzx\Security and Privacy\test-clean\LibriSpeech',)

# test-clean without masking
male_1 = {}
female_1 = {}
for k, v in a.items():
    if v == 'M':
        male_1[k] = v
    elif v == 'F':
        female_1[k] = v
print(len(male_1))
print(len(female_1))
      
# get all keys
keys_male_1 = male_1.keys()
keys_female_1 = female_1.keys()

# keys to list
key_list_male_1 = list(keys_male_1)
key_list_female_1 = list(keys_female_1)

# print list
print(len(key_list_male_1))  
print(len(key_list_female_1))  

In [None]:
b=read_text_gt(r'C:\Users\ntyzx\Security and Privacy\dev-clean\LibriSpeech',)

# dev-clean without masking
male_2 = {}
female_2 = {}
for k, v in b.items():
    if v == 'M':
        male_2[k] = v
    elif v == 'F':
        female_2[k] = v
print(len(male_2))
print(len(female_2))
      
# get all keys
keys_male_2 = male_2.keys()
keys_female_2 = female_2.keys()

# keys to list
key_list_male_2 = list(keys_male_2)
key_list_female_2 = list(keys_female_2)

# print list
print(len(key_list_male_2))  
print(len(key_list_female_2))  

In [None]:
#combine two data set into one lists
male_1_2 = key_list_male_1+key_list_male_2
female_1_2 = key_list_female_1+key_list_female_2
print(len(male_1_2))
print(len(female_1_2))

In [None]:
h_male =[]
male_prob =[]
female_prob =[]
gender_male=[]
for i in range(len(male_1_2)):
    h_male.append(extract_feature(male_1_2[i], mel=True))

    # predict the gender!
    male_prob.append(model.predict(h_male[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_male.append("male" if male_prob[i] > female_prob[i] else "female")

count_of_male = gender_male.count('male')
print("Number of 'male' in the list:", count_of_male)

In [None]:
h_female =[]
male_prob =[]
female_prob =[]
gender_female=[]
for i in range(len(male_1_2)):
    h_female.append(extract_feature(female_1_2[i], mel=True))

    # predict the gender!
    male_prob.append(loaded_model.predict(h_female[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_female.append("male" if male_prob[i] > female_prob[i] else "female")

count_of_female = gender_female.count('female')
print("Number of 'female' in the list:", count_of_female)

In [None]:
####################################################################################################################

In [None]:
c=read_text_gt(r'C:\Users\ntyzx\Security and Privacy\test-clean_1\LibriSpeech',)

# test-clean with masking
male_3 = {}
female_3 = {}
for k, v in c.items():
    if v == 'M':
        male_3[k] = v
    elif v == 'F':
        female_3[k] = v
print(len(male_3))
print(len(female_3))
      
# get all keys
keys_male_3 = male_3.keys()
keys_female_3 = female_3.keys()

# keys to list
key_list_male_3 = list(keys_male_3)
key_list_female_3 = list(keys_female_3)

# print list
print(len(key_list_male_3))  
print(len(key_list_female_3))  

In [None]:
d=read_text_gt(r'C:\Users\ntyzx\Security and Privacy\dev-clean_2\LibriSpeech',)

# dev-clean without masking
male_4 = {}
female_4 = {}
for k, v in d.items():
    if v == 'M':
        male_4[k] = v
    elif v == 'F':
        female_4[k] = v
print(len(male_4))
print(len(female_4))
      
# get all keys
keys_male_4 = male_4.keys()
keys_female_4 = female_4.keys()

# keys to list
key_list_male_4 = list(keys_male_4)
key_list_female_4 = list(keys_female_4)

# print list
print(len(key_list_male_4))  
print(len(key_list_female_4))  

In [None]:
#combine two data set into one lists
male_3_4 = key_list_male_3+key_list_male_4
female_3_4 = key_list_female_3+key_list_female_4
print(len(male_3_4))
print(len(female_3_4))

In [None]:
h_male_2 =[]
male_prob =[]
female_prob =[]
gender_male_2=[]
for i in range(len(male_3_4)):
    h_male_2.append(extract_feature(male_3_4[i], mel=True))

    # predict the gender!
    male_prob.append(model.predict(h_male_2[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_male_2.append("male" if male_prob[i] > female_prob[i] else "female")

count_of_male = gender_female.count('male')
print("Number of 'male' in the list:", count_of_male)

In [None]:
h_female_2 =[]
male_prob =[]
female_prob =[]
gender_female_2=[]
for i in range(len(male_3_4)):
    h_female_2.append(extract_feature(female_3_4[i], mel=True))

    # predict the gender!
    male_prob.append(model.predict(h_female_2[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_female_2.append("male" if male_prob[i] > female_prob[i] else "female")

print(gender_female_2)

count_of_female = gender_female_2.count('female')
print("Number of 'female' in the list:", count_of_female)