# Import the required libraries:

In [22]:
import sounddevice as sd
import soundfile as sf
import librosa
import numpy as np


In [23]:
#Set the duration and sample rate for recording:
duration = 5  # Set the duration of the recording (in seconds)
sample_rate = 22050  # Set the sample rate (commonly used for audio)


In [24]:
print("Recording started. Speak into the microphone...")
recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
sd.wait()  # Wait until recording is finished
print("Recording completed.")


Recording started. Speak into the microphone...
Recording completed.


In [25]:
#Save the recorded audio to a file:
audio_file = "recorded_audio.wav"  # Choose a filename for the recorded audio
sf.write(audio_file, recording, sample_rate)
print(f"Audio saved to {audio_file}.")


Audio saved to recorded_audio.wav.


In [26]:
#Extract audio features for classification:
# Load the audio file
audio, sr = librosa.load(audio_file, sr=sample_rate)

# Extract audio features
features = librosa.feature.mfcc(y=audio, sr=sr)
features = features.reshape(1, -1)  # Reshape features to match the expected input shape of the MLP classifier


In [27]:
import joblib
# Load the saved model from a file
loaded_model = joblib.load('model_file.pkl')



In [28]:
import numpy as np

# Assuming 'features' is your extracted features with shape (n_samples, 128)
expected_shape = 180  # Expected shape of the MLPClassifier input

if features.shape[1] < expected_shape:
    padding_width = expected_shape - features.shape[1]
    padded_features = np.pad(features, ((0, 0), (0, padding_width)), mode='constant')
elif features.shape[1] > expected_shape:
    padded_features = features[:, :expected_shape]  # Truncate to expected shape
else:
    padded_features = features  # No padding or truncation needed

# Reshape features to match the expected input shape of the MLP classifier
reshaped_features = padded_features.reshape(padded_features.shape[0], -1)

# Use the loaded MLP classifier to predict the emotion
emotion = loaded_model.predict(reshaped_features)
print("Predicted emotion:", emotion)


Predicted emotion: ['calm']


In [35]:
import soundfile as sf
import numpy as np
import librosa

def extract_feature(file_name, mfcc, chroma, mel):
    with sf.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))
        return result

# Specify the path to the recorded audio file
file_path = "recorded_audio.wav"

# Extract features from the recorded audio
features = extract_feature(file_path, mfcc=True, chroma=True, mel=True)

# Pad or truncate the features to match the expected shape
expected_shape = 180  # Expected shape of the MLPClassifier input
padded_features = np.pad(features, (0, max(0, expected_shape - len(features))), mode='constant')

# Reshape the features to match the expected input shape of the MLP classifier
reshaped_features = padded_features.reshape(1, -1)

# Use the loaded MLP classifier to predict the emotion
emotion = loaded_model.predict(reshaped_features)
print("Predicted emotion:", emotion)


Predicted emotion: ['happy']


In [41]:
import soundfile as sf
import numpy as np
import librosa
import sounddevice as sd

def extract_feature(file_name, mfcc, chroma, mel):
    with sf.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))
        return result

# Specify the path to save the recorded audio file
file_path = "recorded_audio.wav"

# Specify the duration and sample rate of the recording
duration = 5  # Duration of the recording in seconds
sample_rate = 22050  # Sample rate of the recording

# Start recording
print("Recording started. Speak into the microphone...")
recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
sd.wait()  # Wait until recording is finished
print("Recording completed.")

# Save the recorded audio
sf.write(file_path, recording, sample_rate)

# Extract features from the recorded audio
features = extract_feature(file_path, mfcc=True, chroma=True, mel=True)
""
# Pad or truncate the features to match the expected shape
expected_shape = 180  # Expected shape of the MLPClassifier input
padded_features = np.pad(features, (0, max(0, expected_shape - len(features))), mode='constant')

# Reshape the features to match the expected input shape of the MLP classifier
reshaped_features = padded_features.reshape(1, -1)

# Use the loaded MLP classifier to predict the emotion
emotion = loaded_model.predict(reshaped_features)
print("Predicted emotion:", emotion)


Recording started. Speak into the microphone...
Recording completed.
Predicted emotion: ['fearful']
