In [1]:
import os
import librosa
import numpy as np
import joblib
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [2]:
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=16000)  # Load audio file
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)  # Extract MFCCs
        pitch = librosa.yin(y, fmin=50, fmax=500).mean() 
        zcr = librosa.feature.zero_crossing_rate(y).mean() 
        return np.hstack((np.mean(mfcc, axis=1), pitch, zcr))  # Combine features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [None]:
# Function to load dataset from "male" and "female" folders
def load_dataset(directory):
    X, y = [], []
    
    for label, gender in enumerate(["male", "female"]):  # 0: male, 1: female
        class_dir = os.path.join(directory, gender)
        if not os.path.isdir(class_dir):
            print(f"Warning: Directory {class_dir} not found")
            continue
            
        for file in os.listdir(class_dir):
            file_path = os.path.join(class_dir, file)
            if file_path.endswith(".wav"):  
                features = extract_features(file_path)
                if features is not None:
                    X.append(features)
                    y.append(label)  

    return np.array(X), np.array(y)

# Specify dataset path 
# Dataset can be downloaded from https://www.kaggle.com/datasets/murtadhanajim/gender-recognition-by-voiceoriginal
dataset_path = "add the path to your data set"  # Ensure "dataset/male" and "dataset/female" exist

# Load dataset
print("Loading dataset...")
X, y = load_dataset(dataset_path)

print("Dataset loaded successfully!")
print(f"Total samples: {len(y)}")
print(f"Feature shape: {X.shape}")

Loading dataset...
Dataset loaded successfully!
Total samples: 16148
Feature shape: (16148, 15)


In [4]:

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train an SVM classifier
print("Training model...")
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

# Save model and scaler
joblib.dump(model, "gender_recognition_svm.pkl")
joblib.dump(scaler, "scaler.pkl")

# Evaluate model
y_pred = model.predict(X_test)
print("\nModel Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=['Male', 'Female']))

# Function to predict gender from a new voice sample
def predict_gender(file_path, model_path="gender_recognition_svm.pkl", scaler_path="scaler.pkl"):
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)
    
    features = extract_features(file_path)
    if features is None:
        return "Error extracting features"
    
    features = scaler.transform([features])  # Standardize features
    prediction = model.predict(features)
    return "Male" if prediction[0] == 0 else "Female"

# Example usage
test_audio = "test_voice.wav"  # Replace with an actual audio file path
print("\nPredicted Gender:", predict_gender(test_audio))


Training model...

Model Evaluation:
Accuracy: 0.9786377708978328
              precision    recall  f1-score   support

        Male       0.99      0.98      0.98      2114
      Female       0.97      0.97      0.97      1116

    accuracy                           0.98      3230
   macro avg       0.98      0.98      0.98      3230
weighted avg       0.98      0.98      0.98      3230

Error processing test_voice.wav: [Errno 2] No such file or directory: 'test_voice.wav'

Predicted Gender: Error extracting features


  y, sr = librosa.load(file_path, sr=16000)  # Load audio file
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
