#     Voice Gender Classification Project 

### This cell imports all necessary Python libraries for the project:
- **NumPy** and **Pandas** for numerical operations and data handling
- **Scikit-learn** utilities for model training, evaluation, and preprocessing
- **Librosa** for audio feature extraction (MFCCs, spectral features)
- **Noisereduce** for audio denoising
- **Sounddevice** and **Scipy** for audio recording and file I/O

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import librosa
import noisereduce as nr
from sklearn.preprocessing import StandardScaler


import os
import sounddevice as sd
from scipy.io.wavfile import write



### This cell defines a custom `GaussianNaiveBayes` classifier from scratch:
- **Key Methods**:
  - `fit()`: Computes class-wise mean, variance, and priors from training data.
  - `_log_gaussian_pdf()`: Calculates log probabilities using Gaussian PDF (stable log-space implementation).
  - `predict()`: Classifies samples by maximizing log posterior probabilities.
- **Optimizations**: 
  - Adds `1e-9` to variances to avoid division by zero.
  - Uses log probabilities to prevent numerical underflow.

In [2]:
class GaussianNaiveBayes:
    def get_params(self, deep=True):
        """Return parameters - required for scikit-learn compatibility"""
        return {}  # Return empty dict if no parameters
    
    def set_params(self, **params):
        """Set parameters - required for scikit-learn compatibility"""
        return self
    
    
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.means = {}
        self.variances = {}
        self.priors = {}

        for current_class in self.classes:
            X_cls = X[y == current_class]
            self.means[current_class] = np.mean(X_cls, axis=0)
            self.variances[current_class] = np.var(X_cls, axis=0, ddof=1) + 1e-9
            self.priors[current_class] = len(X_cls) / len(X)

    def _log_gaussian_pdf(self, x, mean, var):
        fo8 = np.exp(- (x - mean)**2 / (2 * var))
        t7t = np.sqrt(2 * np.pi * var)
        # Avoid underflow by using log space calculations 
        # log(fo8) - log(t7t) = log(fo8 / t7t)
        # This is numerically more stable than calculating pdf directly
        return np.log(fo8) - np.log(t7t)  
    
    def predict(self, X):
        predictions = []
        for x in X:
            class_log_probs = {}
            for current_class in self.classes:
                # Calculate log prior
                log_prior = np.log(self.priors[current_class])
                
                # likelihood for each feature and sum
                log_likelihood = np.sum(
                    self._log_gaussian_pdf(x, self.means[current_class],  self.variances[current_class] )
                )
                
                # Total log probability
                class_log_probs[current_class] = log_prior + log_likelihood
            
            # Select class with highest log probability
            predictions.append(max(class_log_probs, key=class_log_probs.get))
        return np.array(predictions)

### **`extract_features()` Function
Processes audio files to extract discriminative features:
1. **Preprocessing**:
   - Noise reduction using spectral gating.
   - Silence trimming (30dB threshold).
   - Amplitude normalization.
2. **Feature Extraction**:
   - 13 MFCCs (mean across frames).
   - Spectral centroid, rolloff, and zero-crossing rate.

#### **`build_dataset()` Function
Constructs a labeled dataset from audio files:
- Organizes data by gender (`male=0`, `female=1`).
- Handles missing directories gracefully.
- Returns features and labels as NumPy arrays.

In [14]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)

    # Noise Reduction
    # Estimate noise from a silent part (first 0.5 sec)
    noise_sample = y[0:int(0.5 * sr)]
    y_denoised = nr.reduce_noise(y=y, sr=sr, y_noise=noise_sample)

    # Silence Removal
    y_trimmed, _ = librosa.effects.trim(y_denoised, top_db=30)

    # Normalization (scaling waveform to -1 to 1)
    y_normalized = librosa.util.normalize(y_trimmed)

    #  Feature Extraction
    mfccs = np.mean(librosa.feature.mfcc(y=y_normalized, sr=sr, n_mfcc=13), axis=1)
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y_normalized, sr=sr))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y_normalized, sr=sr))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y_normalized))
    

    #  Feature Vector
    return np.concatenate([mfccs, [spectral_centroid, spectral_rolloff, zcr]])



def build_dataset(folder_path):
    print("Building dataset...")
    print("Folder path:", folder_path)
    features = []
    labels = []
    label_map = {'male': 0, 'female': 1}

    for label_name, label_value in label_map.items():
        subfolder = os.path.join(folder_path, label_name)
        if not os.path.isdir(subfolder):
            print(f"Skipping: {subfolder} (not a folder)")
            continue
        for file_name in os.listdir(subfolder):
            if file_name.endswith('.wav'):
                path = os.path.join(subfolder, file_name)
                feature_vector = extract_features(path)
                features.append(feature_vector)
                labels.append(label_value)
    return np.array(features), np.array(labels)


def save_features_to_csv(features, labels, output_file):
    """
    Save extracted features and labels to a CSV file.
    
    Args:
        features (np.array): Array of feature vectors
        labels (np.array): Array of corresponding labels
        output_file (str): Path to the output CSV file
    """
    # Create a DataFrame from the features and labels
    df = pd.DataFrame(features)
    
    # Add column names for MFCCs and other features
    columns = [f'mfcc_{i}' for i in range(13)] + ['spectral_centroid', 'spectral_rolloff', 'zcr']
    df.columns = columns
    
    # Add the label column
    df['label'] = labels
    
    # Save to CSV
    df.to_csv(output_file, index=False)
    print(f"Features saved to {output_file}")

### **`Utility Functions`
- **`print_features_with_labels()`**:  
  Displays formatted feature vectors with headers (MFCCs, spectral stats) and labels for debugging.

- **`record_voice()`**:  
  Captures audio via microphone (10s duration, 22.05kHz sample rate) and saves as `input.wav`.

- **`evaluate_model()`**:  
  Computes and prints classification metrics (Accuracy, Precision, Recall, F1).

In [12]:
def print_features_with_labels(X, y):
    # Feature headers
    mfcc_headers = [f"MFCC_{i+1:02d}" for i in range(13)]  
    other_headers = ['Spectral_Centroid', 'Spectral_Rolloff', 'ZCR']
    headers = mfcc_headers + other_headers + ['Label']
    
    separator = "-" * 80
    
    print(f"\n{separator}")
    print(f"{'Features with Labels':^80}")  
    print(separator)
    print("\t".join(f"{h:<15}" for h in headers))  
    
    for features, label in zip(X, y):
        formatted_features = [f"{val:>15.4f}" for val in features] 
        formatted_label = f"{label:>15}"
        print("\t".join(formatted_features + [formatted_label]))
    
    print(separator)
        


def record_voice(filename='input.wav', duration=10, fs=22050):
    print("Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    write(filename, fs, recording)
    print("Recording saved as", filename)

def evaluate_model(y_true, y_pred):
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall:", recall_score(y_true, y_pred))
    print("F1 Score:", f1_score(y_true, y_pred))

### 1. **Dataset Construction**:
   - Loads audio data from `./audio_data`.
   - Splits into 70% train / 30% test sets with stratification.

### 2. **Feature Scaling**:
   - Standardizes features (zero mean, unit variance) using `StandardScaler`.

### 3. **Model Training & Evaluation**:
   - **Custom GNB**: Matches scikit-learn's `GaussianNB` (80% accuracy).
   - **Logistic Regression**: Achieves 86.67% accuracy (best performer).
   - Prints detailed feature-label pairs for transparency.

In [None]:
if __name__ == "__main__":
    X, y = build_dataset("./audio_data")  
    
    # save_features_to_csv(X, y, "features.csv")
    
    # Load features from CSV if needed
    # df = pd.read_csv('features.csv')
    # X = df.drop('label', axis=1).values
    # y = df['label'].values
    
    print ("Feature shape:", X.shape)
    # # print first 10 samples of features
    # print ("First 10 samples of features:", X[:10])
    # # print first 10 samples of labels
    print ("Label shape:", y.shape)
    # print ("First 10 samples of labels:", y[:20])



    
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=15)  #
    
    print ("Train set shape:", X_train.shape)
    print ("Test set shape:", X_test.shape)
    print ("Train labels shape:", y_train.shape)
    print ("Test labels shape:", y_test.shape)
    print("\n" + "="*50)

    
    print_features_with_labels(X, y)
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
        # My Model
    model = GaussianNaiveBayes()
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    print("From Scratch Model")
    evaluate_model(y_test, y_pred)
    

         # Sklearn Model 
    from sklearn.naive_bayes import GaussianNB
    gnb = GaussianNB()
    gnb.fit(X_train_scaled, y_train)
    sklearn_preds = gnb.predict(X_test_scaled)
    print("Sklearn Model")
    evaluate_model(y_test, sklearn_preds)

        # Logistic Regression Model
    from sklearn.linear_model import LogisticRegression

    logistic_model = LogisticRegression()
    logistic_model.fit(X_train_scaled, y_train)

    logistic_preds = logistic_model.predict(X_test_scaled)

    print("Logistic Regression Model")
    evaluate_model(y_test, logistic_preds)


Building dataset...
Folder path: ./audio_data
Features saved to features.csv
Feature shape: (100, 16)
Label shape: (100,)
Train set shape: (70, 16)
Test set shape: (30, 16)
Train labels shape: (70,)
Test labels shape: (30,)


--------------------------------------------------------------------------------
                              Features with Labels                              
--------------------------------------------------------------------------------
MFCC_01        	MFCC_02        	MFCC_03        	MFCC_04        	MFCC_05        	MFCC_06        	MFCC_07        	MFCC_08        	MFCC_09        	MFCC_10        	MFCC_11        	MFCC_12        	MFCC_13        	Spectral_Centroid	Spectral_Rolloff	ZCR            	Label          
      -318.0206	        70.8188	        16.9755	        37.0357	       -11.1962	        -4.3173	        -8.5741	       -26.4434	       -12.6614	        -6.0836	       -13.9412	         2.8369	        -6.9908	      2335.8841	      3772.7200	         0.2281	

### 1. **Live Demo**:
   - Records audio → extracts features → scales them.
   - Predicts gender using all trained models (Custom NB, scikit-learn NB, Logistic Regression).

### 2. **Output**:
   - Displays extracted feature vector.

In [6]:
# record_voice()
# new_features = extract_features("AUD-20240503-WA0030.wav")

new_features = extract_features("input.wav")

new_features_scaled = scaler.transform([new_features])

print("Extracted features from input.wav:", new_features_scaled)

custom_pred = model.predict(new_features_scaled)[0]
sklearn_pred = gnb.predict(new_features_scaled)[0]
logistic_model_pred = logistic_model.predict(new_features_scaled)[0]

print("\n" + "="*50)
print("VOICE GENDER PREDICTION RESULTS".center(50))
print("="*50)

print(f"\n{'Custom Model Prediction:':<25}", "Male" if custom_pred == 0 else "Female")
print(f"{'scikit-learn Prediction:':<25}", "Male" if sklearn_pred == 0 else "Female")
print(f"{'logistic  Prediction:':<25}", "Male" if logistic_model_pred == 0 else "Female")
    
    

Extracted features from input.wav: [[-0.34639775  1.41266773 -0.19444673  0.17144206  0.06786369 -1.29168165
   0.40729831 -0.05474055 -0.94623946 -0.58338589 -0.52048584  0.48419222
   0.79416779 -0.82338409 -1.02862825  0.20687238]]

         VOICE GENDER PREDICTION RESULTS          

Custom Model Prediction:  Male
scikit-learn Prediction:  Male
logistic  Prediction:     Male


### Evaluates ensemble performance using scikit-learn's `BaggingClassifier`:
1. **Custom NB Ensemble**: 80% accuracy.
2. **GaussianNB Ensemble**: 83.33% accuracy.
3. **Logistic Regression Ensemble**: 86.67% accuracy (best F1-score: 0.846).


In [7]:
from sklearn.ensemble import BaggingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression

print("Scikit-learn Bagging Ensemble Evaluation".center(50))

print("Scikit-learn Bagging with Custom Naïve Bayes:")
bagging_custom_nb = BaggingClassifier(
    estimator=GaussianNaiveBayes(),  
    n_estimators=10,
    random_state=42
)
bagging_custom_nb.fit(X_train_scaled, y_train)
bagging_custom_nb_preds = bagging_custom_nb.predict(X_test_scaled)
evaluate_model(y_test, bagging_custom_nb_preds)

print("\nScikit-learn Bagging with GaussianNB:")
bagging_sk_nb = BaggingClassifier(
    estimator=GaussianNB(),
    n_estimators=10,
    random_state=42
)
bagging_sk_nb.fit(X_train_scaled, y_train)
bagging_sk_nb_preds = bagging_sk_nb.predict(X_test_scaled)
evaluate_model(y_test, bagging_sk_nb_preds)

print("\nScikit-learn Bagging with Logistic Regression:")
bagging_lr = BaggingClassifier(
    estimator=LogisticRegression(),
    n_estimators=10,
    random_state=42
)
bagging_lr.fit(X_train_scaled, y_train)
bagging_lr_preds = bagging_lr.predict(X_test_scaled)
evaluate_model(y_test, bagging_lr_preds)




     Scikit-learn Bagging Ensemble Evaluation     
Scikit-learn Bagging with Custom Naïve Bayes:
Accuracy: 0.8
Precision: 0.8461538461538461
Recall: 0.7333333333333333
F1 Score: 0.7857142857142857

Scikit-learn Bagging with GaussianNB:
Accuracy: 0.8333333333333334
Precision: 0.8571428571428571
Recall: 0.8
F1 Score: 0.8275862068965517

Scikit-learn Bagging with Logistic Regression:
Accuracy: 0.8666666666666667
Precision: 1.0
Recall: 0.7333333333333333
F1 Score: 0.8461538461538461


### - Combines predictions from all bagging models via `scipy.stats.mode`.

In [8]:
from scipy.stats import mode

print (bagging_custom_nb_preds)
print (bagging_sk_nb_preds)
print (bagging_lr_preds)

y = np.column_stack((bagging_custom_nb_preds, bagging_sk_nb_preds, bagging_lr_preds))


final_y_predict = mode(y, axis=1).mode.flatten()
print(final_y_predict)

print (final_y_predict.shape)
print("Ensemble Model Evaluation")
evaluate_model(y_test, final_y_predict)






[1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 1 0 0]
[1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 0 1 0 0 1 1 0 0 1 0 1 0 1 0 0]
[1 0 1 1 0 1 0 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0]
[1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 0 1 0 0 1 1 0 0 1 0 1 0 1 0 0]
(30,)
Ensemble Model Evaluation
Accuracy: 0.8333333333333334
Precision: 0.8571428571428571
Recall: 0.8
F1 Score: 0.8275862068965517


### Model Comparison Summary

In [9]:

print("\n" + "="*50)
print("Model Comparison Summary".center(50))
print("="*50)

models = {
    "Custom NB": y_pred,
    "Sklearn NB": sklearn_preds,
    "Logistic Reg": logistic_preds,
    "Bagging Custom NB": bagging_custom_nb_preds,
    "Bagging Sklearn NB": bagging_sk_nb_preds,
    "Bagging Logistic Reg": bagging_lr_preds
}

print("\n{:<25} {:<10} {:<10} {:<10} {:<10}".format(
    "Model", "Accuracy", "Precision", "Recall", "F1"))
print("-"*65)

for name, preds in models.items():
    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    
    print("{:<25} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
        name, acc, prec, rec, f1))




             Model Comparison Summary             

Model                     Accuracy   Precision  Recall     F1        
-----------------------------------------------------------------
Custom NB                 0.8000     0.8000     0.8000     0.8000    
Sklearn NB                0.8000     0.8000     0.8000     0.8000    
Logistic Reg              0.8667     0.9231     0.8000     0.8571    
Bagging Custom NB         0.8000     0.8462     0.7333     0.7857    
Bagging Sklearn NB        0.8333     0.8571     0.8000     0.8276    
Bagging Logistic Reg      0.8667     1.0000     0.7333     0.8462    


### Demonstrates end-to-end gender prediction on a new recording:

In [10]:

ensemble_pred = bagging_lr.predict(new_features_scaled)[0]

print("\n" + "="*50)
print("VOICE GENDER PREDICTION RESULTS".center(50))
print("="*50)

print(f"\n{'Custom Model Prediction:':<25}", "Male" if custom_pred == 0 else "Female")
print(f"{'scikit-learn Prediction:':<25}", "Male" if sklearn_pred == 0 else "Female")
print(f"{'Logistic Regression:':<25}", "Male" if logistic_model_pred == 0 else "Female")
print(f"{'Bagging Ensemble:':<25}", "Male" if ensemble_pred == 0 else "Female")


         VOICE GENDER PREDICTION RESULTS          

Custom Model Prediction:  Male
scikit-learn Prediction:  Male
Logistic Regression:      Male
Bagging Ensemble:         Male
