In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
import librosa
import numpy as np
import time

# Read the dataset from a CSV file
df = pd.read_csv('C:/Users/sufiy/Downloads/voice.csv')

# Extract features and labels
X = df.drop('label', axis=1)  # Features
y = df['label']  # Labels

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model on the training set
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

# Predictions on the testing set
y_pred = rf_classifier.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

def extract_features(audio_file_path, n_mfcc=20):
    y, sr = librosa.load(audio_file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mean_mfccs = np.mean(mfccs, axis=1)
    return mean_mfccs

# Function to predict gender based on a user-provided audio file
def predict_gender(audio_file_path, threshold=0.4):
    try:
        # Extract features from the user-provided audio file
        features = pd.DataFrame([extract_features(audio_file_path)])
        print("Extracted Features:", features)  # Print the extracted features
        features_scaled = scaler.transform(features)

        # Predict the probability of being class 1 (female)
        probability_female = rf_classifier.predict_proba(features_scaled)[:, 1]

        # Make prediction based on the threshold
        gender_label = "female" if probability_female >= threshold else "male"

        return gender_label
    except Exception as e:
        return f"Error: {str(e)}"

# User input: Path to the audio file with a timeout
timeout_seconds = 60  # Set a timeout (adjust as needed)
start_time = time.time()

while True:
    if time.time() - start_time > timeout_seconds:
        print("Timeout reached. Exiting.")
        break

    try:
        # Prompt user for input with a timeout
        audio_path = input("Enter the path to the audio file: ")

        # Check if the user provided a path
        if audio_path:
            # Predict gender based on the user input
            predicted_gender = predict_gender(audio_path)

            # Display the predicted gender
            print(f"Predicted gender: {predicted_gender}")
            break  # Break out of the loop after successful prediction
        else:
            print("Please provide a valid audio file path.")
    except KeyboardInterrupt:
        print("\nUser interrupted. Exiting.")
        break
    except Exception as e:
        print(f"Error: {str(e)}")

Accuracy: 0.98
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       297
           1       0.99      0.98      0.99       337

    accuracy                           0.98       634
   macro avg       0.98      0.98      0.98       634
weighted avg       0.98      0.98      0.98       634

Enter the path to the audio file: C:/NullClass_Internship_projects(Sufiyan)/Voice_Gender_Detection/dataset and audio samples to test/voicesample2.wav
Extracted Features:            0          1          2          3         4         5         6   \
0 -275.752136  52.380169  11.664045  37.136749  6.917902  4.147127  7.154928   

         7         8         9         10        11        12        13  \
0  1.713875  4.838797  5.021072  1.789734  1.705184  2.447904  4.329894   

         14        15        16        17        18        19  
0  0.254191  2.968092  0.885584  2.259485 -2.599371  2.096371  
Predicted gender: male


