In [None]:
# Import all modules
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from pydub import AudioSegment
from time import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import GridSearchCV, train_test_split
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import StandardScaler

# Load the csv file into data frame
df = pd.read_csv('../input/common-voice/cv-valid-train.csv')

In [None]:
# Install python_speech_features module
!pip install python_speech_features
from python_speech_features import mfcc

In [None]:
df.head()

In [None]:
male_count, female_count = 0, 0
for i in df['gender']:
    if i == "male":
        male_count += 1
    elif i == "female":
        female_count += 1
    else:
        pass
print("No.of Male voices : ", male_count)
print("No.of Female voices : ", female_count)

In [None]:
df.shape

In [None]:
# Create two new data frames
df_male = df[df['gender']=='male']
df_female = df[df['gender']=='female']

# Find out the number of rows
print(df_male.shape)		
# output: (55029, 8) 

print(df_female.shape)		
# output: (18249, 8)

# Take only 300 male and 300 female data
df_male = df_male[:300]
df_female = df_female[:300]

print(df_male.shape)		
print(df_female.shape)		

In [None]:
print(len(df_male))

In [None]:
# Define the audio path
TRAIN_PATH = '../input/common-voice/cv-valid-train/'

# The function to convert mp3 to wav
def convert_to_wav(df, m_f, path=TRAIN_PATH):
    srcs = []

    for file in tqdm(df['filename']):
        sound = AudioSegment.from_mp3(path+file)
        
	# Create new wav files based on existing mp3 files
        if m_f == 'male':
            sound.export('male-'+file.split('/')[-1].split('.')[0]+'.wav', format='wav')
        elif m_f == 'female':
            sound.export('female-'+file.split('/')[-1].split('.')[0]+'.wav', format='wav')
      
    return

# How to use the convert_to_wav() function
convert_to_wav(df_male, m_f='male')
convert_to_wav(df_female, m_f='female')

In [None]:
print(len(df_male))

In [None]:
# Define a function to load the raw audio files
def load_audio(audio_files):
	# Allocate empty list for male and female voices
    male_voices = []
    female_voices = []

    for file in tqdm(audio_files):
        if file.split('-')[0] == 'male':
            male_voices.append(librosa.load(file))
        elif file.split('-')[0] == 'female':
            female_voices.append(librosa.load(file))
    
# Convert the list into Numpy array
    male_voices = np.array(male_voices)
    female_voices = np.array(female_voices)
    
    return male_voices, female_voices

# How to use load_audio() function
male_voices, female_voices = load_audio(os.listdir())

In [None]:
# The function to extract audio features
def extract_features(audio_data):
	

	audio_waves = audio_data[:,0]
	samplerate = audio_data[:,1][1]

	features = []
	for audio_wave in tqdm(audio_waves):
		features.append(mfcc(audio_wave, samplerate=samplerate, numcep=26))
    
	features = np.array(features)
	return features

# Use the extract_features() function
male_features = extract_features(male_voices)
female_features = extract_features(female_voices)

In [None]:
# The function used to concatenate all audio features forming a long 2-dimensional array
def concatenate_features(audio_features):
    concatenated = audio_features[0]
    for audio_feature in tqdm(audio_features):
        concatenated = np.vstack((concatenated, audio_feature))
        
    return concatenated

# How the function is used
male_concatenated = concatenate_features(male_features)
female_concatenated = concatenate_features(female_features)

print(male_concatenated) 		
print(male_concatenated.shape) 		
# Output: (117576, 26)

print(female_concatenated)	
print(female_concatenated.shape) 		
# Output: (124755, 26)



In [None]:
# Concatenate male voices and female voices
X = np.vstack((male_concatenated, female_concatenated))

# Create labels
y = np.append([0] * len(male_concatenated), [1] * len(female_concatenated))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=22)

In [None]:
print(len(X_train))
print(len(X_test))

In [None]:
male_test = 0
female_test = 0
for i in range(len(X_train)):
    if y_train[i] == 0:
        male_test += 1
    else:
        female_test += 1
print(f"Male samples : {male_test} and Female samples : {female_test} Total : {male_test + female_test}")


In [None]:
male_test = 0
female_test = 0
for i in range(len(X_test)):
    if y_test[i] == 0:
        male_test += 1
    else:
        female_test += 1
print(f"Male samples : {male_test} and Female samples : {female_test} Total : {male_test + female_test}")


In [None]:
import tensorflow as tf
import numpy as np


# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, input_shape=(26,), activation='relu'),  # Input layer
    tf.keras.layers.Dense(64, activation='relu'),  # First hidden layer
    tf.keras.layers.Dense(64, activation='relu'),  # Second hidden layer
    tf.keras.layers.Dense(32, activation='relu'),  # Third hidden layer
    tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_data=(X_test, y_test))

# Access the loss and accuracy at each epoch from the history object
loss_values = history.history['loss']  # Training loss at each epoch
accuracy_values = history.history['accuracy']  # Training accuracy at each epoch
val_loss_values = history.history['val_loss']  # Validation loss at each epoch
val_accuracy_values = history.history['val_accuracy']  # Validation accuracy at each epoch

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss : {loss}")
print(f'Test accuracy: {accuracy}') 

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

In [None]:
print(f"Trainin loss: {loss_values}\nTraining accuracy:{accuracy_values}\nValidation loss: {val_loss_values}\nValidation accuracy: {val_accuracy_values}")

In [None]:
import matplotlib.pyplot as plt

# Assuming you have arrays/lists for training and validation loss
epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values, 'r', label='Training Loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Assuming you have arrays/lists for training and validation accuracy
epochs = range(1, len(accuracy_values) + 1)

plt.plot(epochs, accuracy_values, 'r', label='Training Accuracy')
plt.plot(epochs, val_accuracy_values, 'b', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Assuming you have arrays/lists for training and validation accuracy
epochs = range(1, len(accuracy_values) + 1)

plt.plot(epochs, accuracy_values, 'r', label='Training Accuracy')
plt.plot(epochs, loss_values, 'b', label='Training loss')
plt.title('Accuracy VS Loss')
plt.xlabel('Epochs')
plt.ylabel('Accuracy X Loss')
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score


# Predict the first test data
ann_predictions = model.predict(X_test)
binary_results = [1 if result > 0.5 else 0 for result in ann_predictions]
# Create the confusion matrix values
f1 = f1_score(y_test, binary_results)
recall = recall_score(y_test, binary_results)
prec = precision_score(y_test, binary_results)
acc = accuracy_score(y_test, binary_results)
cm = confusion_matrix(y_test, binary_results)

# Create the confusion matrix display
plt.figure(figsize=(6,6))
plt.title('Confusion matrix on test data')
sns.heatmap(cm, annot=True, fmt='d', 
            cmap=plt.cm.Blues, cbar=False, annot_kws={'size':14})
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

print(f"Evaluation the model on TESTING data : \nf1_score : {f1}\nrecall_score : {recall}\nprecision_score : {prec}\naccuracy_score : {acc}")

In [None]:
print(cm)

In [None]:
male_test = 0
female_test = 0
for i in range(len(X_train)):
    if y_train[i] == 0:
        male_test += 1
    else:
        female_test += 1
print(f"Male samples : {male_test} and Female samples : {female_test} Total : {male_test + female_test}")
