In [1]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# Function to extract features from audio file
def extract_features(file_name):
    y, sr = librosa.load(file_name, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfccs.T, axis=0)

In [3]:
# Load your dataset from a directory
def load_dataset(directory):
    file_paths = []
    labels = []
    for file in os.listdir(directory):
        if file.endswith('.wav'):
            file_paths.append(os.path.join(directory, file))
            # Assuming file names contain gender information, e.g., 'male_01.wav' or 'female_01.wav'
            if 'male' in file:
                labels.append(0)
            elif 'female' in file:
                labels.append(1)
    return file_paths, labels

In [4]:
# Directory containing your audio files
directory = "C:\\Users\\OneDrive\\Documents\\Assignment\\data"

In [5]:
# Extract features and prepare dataset
file_paths, labels = load_dataset(directory)
features = [extract_features(file) for file in file_paths]
x = np.array(features)
y = np.array(labels)

In [6]:
# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [7]:
x_train

array([[-3.66325226e+02,  1.06735100e+02,  2.56273746e+01,
        -5.98508596e+00,  7.59117782e-01, -5.00790167e+00,
        -1.63180275e+01,  3.28889036e+00, -8.09356022e+00,
        -1.47490282e+01, -2.84078389e-01, -4.48425150e+00,
        -8.27618504e+00],
       [-3.57809204e+02,  1.09204872e+02,  2.21662188e+00,
         8.12708950e+00,  1.84637337e+01,  5.80751276e+00,
        -9.91049767e-01,  1.66921937e+00, -2.09607334e+01,
         2.98674178e+00, -8.40781307e+00, -9.75141430e+00,
        -5.78062248e+00],
       [-3.49400085e+02,  6.82376938e+01,  1.02700148e+01,
         2.78332025e-01,  1.35588818e+01,  3.13366604e+00,
        -3.02405033e+01, -9.89907074e+00,  4.72461492e-01,
        -1.46522732e+01, -8.06063747e+00,  8.77877930e-04,
        -1.32307415e+01],
       [-3.36805389e+02,  1.44349670e+02,  3.50577974e+00,
         3.41411514e+01,  6.34977722e+00,  8.86483097e+00,
         1.10280848e+00, -7.91195631e+00,  3.52339721e+00,
        -8.14878178e+00, -9.55342388e

In [8]:
x_test

array([[-3.67140900e+02,  1.30940338e+02, -1.90175343e+01,
         2.53299789e+01,  1.18814926e+01,  2.11127186e+01,
        -1.55705005e-01, -1.39177742e+01,  7.73438931e-01,
        -1.33710661e+01, -4.13833857e+00, -4.30739021e+00,
        -4.61977673e+00],
       [-3.65570465e+02,  1.33795303e+02,  6.10747194e+00,
         2.00895958e+01, -3.87295699e+00, -1.12761173e+01,
        -7.76271629e+00,  4.24013996e+00,  5.03311825e+00,
        -1.63360100e+01, -5.38347721e+00, -8.86538410e+00,
        -1.40298195e+01],
       [-3.43253510e+02,  1.54372284e+02,  1.98473701e+01,
         2.59261131e+01,  1.11825094e+01, -1.35007346e+00,
        -6.91724873e+00, -3.89115667e+00,  7.65869379e+00,
        -7.32425451e+00, -5.83440399e+00, -2.35522795e+00,
        -1.40109301e+01]], dtype=float32)

In [9]:
from sklearn.naive_bayes import GaussianNB

In [10]:
clf = GaussianNB() 
clf.fit(x_train, y_train)

In [11]:
from sklearn.metrics import accuracy_score

In [12]:
y_pred = clf.predict(x_test) 
accuracy = accuracy_score(y_test, y_pred) 
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 100.00%
