In [5]:
import numpy as np
import sounddevice as sd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Function to capture real-time audio
def capture_audio(duration=5, sampling_rate=44100):
    print("Recording...")
    audio_data = sd.rec(int(sampling_rate * duration), samplerate=sampling_rate, channels=1, dtype='int16')
    sd.wait()
    return audio_data.flatten()



# Function to collect training data
def collect_training_data(num_samples=10, label=0):
    X = []
    y = []

    for _ in range(num_samples):
        input("Press Enter and speak for a few seconds...")
        audio_sample = capture_audio()
        X.append(audio_sample)
        y.append(label)

    return np.vstack(X), np.array(y)

# Main program
class VoiceClassifier:
    def __init__(self):
        self.model = RandomForestClassifier()

    def train(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def predict(self, X_test):
        return self.model.predict(X_test)

# Collect positive samples (voice)
positive_X, positive_y = collect_training_data(num_samples=10, label=1)

# Collect negative samples (background noise or non-voice)
negative_X, negative_y = collect_training_data(num_samples=10, label=0)

# Combine and shuffle the data
X = np.vstack([positive_X, negative_X])
y = np.concatenate([positive_y, negative_y])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the voice classifier model
voice_classifier = VoiceClassifier()
voice_classifier.train(X_train, y_train)

# Make predictions on the test set
predictions = voice_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Recording...
Recording...
Recording...
Recording...
Recording...
Recording...
Recording...
Recording...
Recording...
Recording...
Model Accuracy: 50.00%


In [6]:
pip install joblib sounddevice scikit-learn


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [7]:
import joblib
# Save the trained model during training
joblib.dump(voice_classifier, "voice_classifier_model.pkl")


['voice_classifier_model.pkl']

In [8]:
import numpy as np
import sounddevice as sd
from sklearn.ensemble import RandomForestClassifier
#from sklearn.externals import joblib  # For model persistence

# Load the pre-trained model
voice_classifier = joblib.load("voice_classifier_model.pkl")

# Function to capture real-time audio
def capture_audio(duration=5, sampling_rate=44100):
    print("Recording...")
    audio_data = sd.rec(int(sampling_rate * duration), samplerate=sampling_rate, channels=1, dtype='int16')
    sd.wait()
    return audio_data.flatten()

# Function to predict voice using the trained model
def predict_voice(audio_sample):
    prediction = voice_classifier.predict([audio_sample])
    return prediction[0]

# Main program for real-time voice classification
def real_time_voice_classification():
    while True:
        input("Press Enter and speak for a few seconds...")

        # Capture new audio
        new_audio_sample = capture_audio()

        # Predict if it's voice or non-voice
        result = predict_voice(new_audio_sample)

        if result == 1:
            print("Voice detected!")
        else:
            print("Non-voice detected.")

if __name__ == "__main__":
    real_time_voice_classification()


Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
Non-voice detected.
Recording...
