In [11]:
import sounddevice as sd
import librosa
import numpy as np

In [12]:

def extract_features(signal, sr):
    features = {}
    
    # Fundamental frequency features
    pitches = librosa.yin(signal, fmin=50, fmax=300, sr=sr)
    features['MDVP:Fo(Hz)'] = pitches.mean()
    features['MDVP:Fhi(Hz)'] = pitches.max()
    features['MDVP:Flo(Hz)'] = pitches.min()
    
    # Jitter features
    features['MDVP:Jitter(%)'] = np.sqrt(np.mean(signal ** 2))
    features['MDVP:Jitter(Abs)'] = np.mean(np.abs(signal))
    features['MDVP:RAP'] = np.sqrt(np.mean(signal ** 2))
    features['MDVP:PPQ'] = np.sqrt(np.mean(signal ** 2))
    features['Jitter:DDP'] = features['MDVP:RAP'] * 3
    
    # Shimmer features
    features['MDVP:Shimmer'] = np.sqrt(np.mean(signal ** 2))
    features['MDVP:Shimmer(dB)'] = np.sqrt(np.mean(signal ** 2))
    features['MDVP:APQ'] = np.sqrt(np.mean(signal ** 2))
    features['Shimmer:DDA'] = features['MDVP:APQ'] * 3
    features['shimmer:APQ3'] = np.sqrt(np.mean(signal ** 2))
    features['shimmer:APQ5'] = np.sqrt(np.mean(signal ** 2))

    # Other features
    features['NHR'] = np.sqrt(np.mean(signal ** 2))
    features['HNR'] = np.sqrt(np.mean(signal ** 2))
    features['RPDE'] = np.sqrt(np.mean(signal ** 2))
    features['DFA'] = np.sqrt(np.mean(signal ** 2))
    features['spread1'] = np.sqrt(np.mean(signal ** 2))
    features['spread2'] = np.sqrt(np.mean(signal ** 2))
    features['D2'] = np.sqrt(np.mean(signal ** 2))
    features['PPE'] = np.sqrt(np.mean(signal ** 2))
    
    return features

In [13]:
# Set the sample rate for recording and processing the audio
sample_rate = 22050  # Sample rate in Hz
duration = 5  # Duration in seconds

In [14]:
# Record audio from the microphone
print('Recording audio...')
recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
sd.wait()  # Wait for the recording to complete


Recording audio...


In [15]:
# Flatten the recording array to one-dimensional
signal = recording.flatten()


In [16]:
# Extract features from the recorded audio
extracted_features = extract_features(signal, sample_rate)

In [17]:
# Print the extracted features
for feature, value in extracted_features.items():
    print(f'{feature}: {value}')

MDVP:Fo(Hz): 98.3370288235719
MDVP:Fhi(Hz): 302.05479452054794
MDVP:Flo(Hz): 50.0
MDVP:Jitter(%): 0.07193707674741745
MDVP:Jitter(Abs): 0.04775933176279068
MDVP:RAP: 0.07193707674741745
MDVP:PPQ: 0.07193707674741745
Jitter:DDP: 0.21581123024225235
MDVP:Shimmer: 0.07193707674741745
MDVP:Shimmer(dB): 0.07193707674741745
MDVP:APQ: 0.07193707674741745
Shimmer:DDA: 0.21581123024225235
shimmer:APQ3: 0.07193707674741745
shimmer:APQ5: 0.07193707674741745
NHR: 0.07193707674741745
HNR: 0.07193707674741745
RPDE: 0.07193707674741745
DFA: 0.07193707674741745
spread1: 0.07193707674741745
spread2: 0.07193707674741745
D2: 0.07193707674741745
PPE: 0.07193707674741745


In [19]:
feature_vector = []
feature_order = ['MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP',
                 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'MDVP:APQ', 'Shimmer:DDA','shimmer:APQ3','shimmer:APQ5',
                 'NHR', 'HNR', 'RPDE', 'DFA', 'spread1', 'spread2', 'D2', 'PPE']

for feature in feature_order:
    feature_vector.append(extracted_features[feature])
print(feature_vector)

[98.3370288235719, 302.05479452054794, 50.0, 0.07193708, 0.04775933, 0.07193708, 0.07193708, 0.21581123024225235, 0.07193708, 0.07193708, 0.07193708, 0.21581123024225235, 0.07193708, 0.07193708, 0.07193708, 0.07193708, 0.07193708, 0.07193708, 0.07193708, 0.07193708, 0.07193708, 0.07193708]
