In [5]:
import warnings
warnings.filterwarnings('ignore')

In [1]:
import numpy as np
import sklearn
from torch.utils.data import Dataset, DataLoader
import os
from pathlib import Path
import pandas as pd

## Preprocessing the data

In [17]:
import librosa
def aggregate_2d(feature):
    return np.concatenate([
        np.mean(feature, axis=1),  
        np.std(feature, axis=1),   
        np.max(feature, axis=1),    
        np.min(feature, axis=1),    
    ])
def aggregate(feature):
    return np.array([
        np.mean(feature),
        np.std(feature),
        np.median(feature),
        np.max(feature),
        np.min(feature),
        np.percentile(feature, 25),  
        np.percentile(feature, 75)
    ])
def preprocess_wav(wav_file_path,sample_rate=16000):
    audio, sr = librosa.load(wav_file_path, sr=sample_rate)
    audio = librosa.effects.preemphasis(audio, coef=0.97)
    
    spectrogram = np.abs(librosa.stft(audio, n_fft=1024, hop_length=256))**2
    
    centroid = librosa.feature.spectral_centroid(S=spectrogram, sr=sr)
    centroid = aggregate(centroid)
   
    contrast = librosa.feature.spectral_contrast(S=spectrogram, sr=sr)
    contrast = aggregate_2d(contrast)
    
    flatness = librosa.feature.spectral_flatness(S=spectrogram)
    flatness = aggregate(flatness)
    
    rolloff = librosa.feature.spectral_rolloff(S=spectrogram, sr=sr)
    rolloff = aggregate(rolloff)

    mfccs = librosa.feature.mfcc(S=librosa.power_to_db(spectrogram), n_mfcc=13)
    mfccs = aggregate_2d(mfccs)
    
    features = np.concatenate([mfccs, centroid, contrast, flatness, rolloff],axis=0)
    return features


In [205]:
(preprocess_wav('Dataset/males/0.wav')).shape

(101,)

In [206]:
class GenderDataset(Dataset):
    def __init__(self, root,sample_rate=16000):
        self.root = root
        self.sample_rate = sample_rate
        self.audio_files = list(self.root.glob('*/*.wav'))

    def __len__(self): 
        return len(self.audio_files)
        
    def __getitem__(self,idx):
        audio_file = self.audio_files[idx]
        label = audio_file.parent.stem
        audio_features = preprocess_wav(audio_file,self.sample_rate)
        return audio_features, label
        


In [207]:
dataset = GenderDataset(Path('Dataset'))
dataset[0][0].shape

(101,)

In [208]:
import pandas as pd
label_to_num_map = {'males':0,'females':1}
num_to_label_map = {0:'males',1:'females'}
data=[]
labels=[]
for i in range(len(dataset)):
    features,label = dataset[i]
    data.append(features)
    labels.append(label_to_num_map[label])
df = pd.DataFrame(data)
df['label'] = labels
df.head()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,label
0,-492.583435,5.590181,-85.012947,-29.718378,13.982943,-15.483821,-1.473186,22.367838,-20.045767,-24.53491,...,4e-05,0.00342,4517.796986,1258.97465,4421.875,7781.25,0.0,3843.75,5343.75,1
1,-624.739197,41.501122,-72.664757,7.006831,-36.303776,-32.625492,-16.470577,-2.263682,-38.48645,-22.437748,...,4.6e-05,0.001125,4062.229671,1837.402135,4046.875,7671.875,671.875,2671.875,5656.25,1
2,-888.857483,78.258888,-33.335213,-11.95757,-13.451708,10.004904,-15.609121,-14.173033,-16.972511,3.926111,...,7e-05,0.013684,3575.753891,1769.505889,3265.625,7265.625,562.5,2218.75,5187.5,1
3,-513.988708,38.329533,-84.357986,-54.898586,-18.513611,-20.977715,-33.619297,-1.87151,-2.335996,-15.545382,...,1.4e-05,0.00277,4317.977941,1768.822872,3984.375,7765.625,0.0,2906.25,5359.375,1
4,-449.6521,55.489613,-45.95636,-41.597935,-19.258604,-1.030979,-12.622212,7.296183,-13.935996,-15.501382,...,2.9e-05,0.005256,4162.197232,1531.697944,3687.5,7718.75,1046.875,2968.75,5203.125,1


In [209]:
labels = np.array(labels)
series = pd.Series(labels)
series.isna().sum()

0

In [210]:
df.columns = df.columns.astype(str)
df.to_csv('data.csv',index=False)


In [211]:
df['label'].isna().sum()

0

## Building the base model

In [2]:

def likelihood(x, mean, variance):
    return (1 / np.sqrt(2 * np.pi * variance)) * np.exp(-0.5 * ((x - mean) ** 2) / variance)

class NaiveBayesClassifier():
    def __init__(self):
        self.variances= {0:{},1:{}}
        self.means= {0:{},1:{}}
        self.priors= {0:0,1:0}
    
    def fit(self,df,y=None):
        self.priors[0] = (df[df['label']==0].shape[0]) / df.shape[0]
        self.priors[1] = (df[df['label']==1].shape[0]) / df.shape[0]
        for i in range(len(df.columns)- 1):
            self.variances[0][i] = df[df['label']==0].iloc[:,i].var()
            self.variances[1][i] = df[df['label']==1].iloc[:,i].var()
            self.means[0][i] = df[df['label']==0].iloc[:,i].mean()
            self.means[1][i] = df[df['label']==1].iloc[:,i].mean()
    def predict_sample(self,x):
        likelihoods = {0:[],1:[]}
        for i in range(len(x)):
            likelihoods[0].append(likelihood(x[i],self.means[0][i],self.variances[0][i]))
            likelihoods[1].append(likelihood(x[i],self.means[1][i],self.variances[1][i]))
        posteriors = {0:0,1:0}
        for i in range(len(likelihoods[0])):
            posteriors[0] += np.log(likelihoods[0][i])
            posteriors[1] += np.log(likelihoods[1][i])
        posteriors[0] += np.log(self.priors[0])
        posteriors[1] += np.log(self.priors[1])
        if(posteriors[0] > posteriors[1]):
            return 0
        return 1
    def predict(self,X):
        predictions = []
        for x in X:
            predictions.append(self.predict_sample(x))
        return np.array(predictions)
    
    
    


## Training the model

In [9]:
df = pd.read_csv('data.csv')

In [10]:

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
scaler = StandardScaler()
df_train, df_test = train_test_split(df,test_size=0.2,random_state=42)
y_train = df_train['label'].values
X= scaler.fit_transform(df_train.drop(columns=['label']))
df_scaled = pd.DataFrame(X,columns=df_train.columns[:-1])
df_scaled['label'] = y_train




In [11]:
naive_bayes = NaiveBayesClassifier()
naive_bayes.fit(df_scaled)



In [12]:
X_test = scaler.transform(df_test.drop(columns=['label']))
y_test = df_test['label']
y_pred =naive_bayes.predict(X_test)


In [13]:
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"From Scratch Naive-Bayes Accuracy: {accuracy:.2f}")




From Scratch Naive-Bayes Accuracy: 0.82


In [14]:
from sklearn.metrics import classification_report
individual_naive_report = classification_report(y_test,y_pred)
print(individual_naive_report)


              precision    recall  f1-score   support

           0       0.85      0.85      0.85       711
           1       0.77      0.77      0.77       488

    accuracy                           0.82      1199
   macro avg       0.81      0.81      0.81      1199
weighted avg       0.82      0.82      0.82      1199



In [15]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
X_train = df_scaled.drop(columns=['label'])
y_train = df_scaled['label']
gnb.fit(X_train,y_train)

y_pred = gnb.predict(X_test)
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"Sklearn Naive Bayes Accuracy: {accuracy:.2f}")



Sklearn Naive Bayes Accuracy: 0.82


## Testing with Custom samples

In [369]:
import sounddevice as sd
from scipy.io.wavfile import write

fs = 44100  
duration = 5 

print("Recording...")
audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16',device=2)
sd.wait()  
print("Recording finished.")

write("output.wav", fs, audio)

Recording...
Recording finished.


In [19]:
def predict_custom_sample(wav_path,model):
    features = preprocess_wav(wav_path)
    features = scaler.transform(features.reshape(1,-1))
    output = model.predict_sample(features.flatten())
    gender = 'Male' if output == 0 else 'Female'
    return gender
prediction = predict_custom_sample('output.wav',naive_bayes)

print(f"Predicted Gender: {prediction}")


Predicted Gender: Male


## Using Bagging ensemble

In [408]:
import copy
from scipy.stats import mode

def random_subset(X,y=None):
    n = X.shape[0]
    indices = np.random.choice(n,size=n,replace=True)
    if y is not None:
        return X[indices], y[indices]
    return X.iloc[indices], None
    
    
class BaggingClassifier():
    def __init__(self,base_model,n_models):
        self.base_model = base_model
        self.n_models = n_models
        self.models = []
    def fit(self,X,y=None):
        for i in range(self.n_models):
            model = copy.deepcopy(self.base_model)
            X_train, y_train = random_subset(X,y)
            model.fit(X_train,y_train)
            self.models.append(model)

    def predict(self,X):
        predictions = []
        for model in self.models:
            prediction = model.predict(X)
            predictions.append(prediction)
        return mode(predictions)[0]


In [411]:
base_naive = NaiveBayesClassifier()
bagging_naive = BaggingClassifier(base_model=base_naive,n_models=10)
bagging_naive.fit(df_scaled)

In [412]:
y_pred_bagging = bagging_naive.predict(X_test)
bagging_naive_report = classification_report(y_test,y_pred_bagging)
print(bagging_naive_report)


              precision    recall  f1-score   support

           0       0.84      0.85      0.85       711
           1       0.78      0.77      0.77       488

    accuracy                           0.82      1199
   macro avg       0.81      0.81      0.81      1199
weighted avg       0.82      0.82      0.82      1199



In [409]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
bagging_lr = BaggingClassifier(base_model=lr,n_models=10)
bagging_lr.fit((df_scaled.drop(columns=['label'])).values,df_scaled['label'].values)

In [410]:
y_pred_bagging_lr = bagging_lr.predict(X_test)
bagging_lr_report = classification_report(y_test,y_pred_bagging_lr)
print(bagging_lr_report)

              precision    recall  f1-score   support

           0       0.89      0.93      0.91       711
           1       0.89      0.84      0.87       488

    accuracy                           0.89      1199
   macro avg       0.89      0.89      0.89      1199
weighted avg       0.89      0.89      0.89      1199



## Comparison between different approaches

In [417]:
print('Individual Naive Bayes')
print(individual_naive_report)

Individual Naive Bayes
              precision    recall  f1-score   support

           0       0.85      0.85      0.85       711
           1       0.77      0.77      0.77       488

    accuracy                           0.82      1199
   macro avg       0.81      0.81      0.81      1199
weighted avg       0.82      0.82      0.82      1199



In [418]:
print('Bagging Naive Bayes')
print(bagging_naive_report)

Bagging Naive Bayes
              precision    recall  f1-score   support

           0       0.84      0.85      0.85       711
           1       0.78      0.77      0.77       488

    accuracy                           0.82      1199
   macro avg       0.81      0.81      0.81      1199
weighted avg       0.82      0.82      0.82      1199



In [419]:
print('Bagging Logistic Regression')
print(bagging_lr_report)

Bagging Logistic Regression
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       711
           1       0.89      0.84      0.87       488

    accuracy                           0.89      1199
   macro avg       0.89      0.89      0.89      1199
weighted avg       0.89      0.89      0.89      1199

